diff options
995 files changed, 25632 insertions, 11424 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..ee0569a040c6 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4945,8 +4945,6 @@ sa1100ir [NET] See drivers/net/irda/sa1100_ir.c. - sbni= [NET] Granch SBNI12 leased line adapter - sched_verbose [KNL] Enables verbose scheduler debug messages. schedstats= [KNL,X86] Enable or disable scheduled statistics. diff --git a/Documentation/arm64/tagged-address-abi.rst b/Documentation/arm64/tagged-address-abi.rst index 459e6b66ff68..0c9120ec58ae 100644 --- a/Documentation/arm64/tagged-address-abi.rst +++ b/Documentation/arm64/tagged-address-abi.rst @@ -45,14 +45,24 @@ how the user addresses are used by the kernel: 1. User addresses not accessed by the kernel but used for address space management (e.g. ``mprotect()``, ``madvise()``). The use of valid - tagged pointers in this context is allowed with the exception of - ``brk()``, ``mmap()`` and the ``new_address`` argument to - ``mremap()`` as these have the potential to alias with existing - user addresses. - - NOTE: This behaviour changed in v5.6 and so some earlier kernels may - incorrectly accept valid tagged pointers for the ``brk()``, - ``mmap()`` and ``mremap()`` system calls. + tagged pointers in this context is allowed with these exceptions: + + - ``brk()``, ``mmap()`` and the ``new_address`` argument to + ``mremap()`` as these have the potential to alias with existing + user addresses. + + NOTE: This behaviour changed in v5.6 and so some earlier kernels may + incorrectly accept valid tagged pointers for the ``brk()``, + ``mmap()`` and ``mremap()`` system calls. + + - The ``range.start``, ``start`` and ``dst`` arguments to the + ``UFFDIO_*`` ``ioctl()``s used on a file descriptor obtained from + ``userfaultfd()``, as fault addresses subsequently obtained by reading + the file descriptor will be untagged, which may otherwise confuse + tag-unaware programs. + + NOTE: This behaviour changed in v5.14 and so some earlier kernels may + incorrectly accept valid tagged pointers for this system call. 2. User addresses accessed by the kernel (e.g. ``write()``). This ABI relaxation is disabled by default and the application thread needs to diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index a7b5807c5543..fb547e26c676 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -104,6 +104,12 @@ properties: maximum: 32 maxItems: 1 + power-domains: + description: + Power domain provider node and an args specifier containing + the can device id value. + maxItems: 1 + can-transceiver: $ref: can-transceiver.yaml# diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 4853ab7017bd..ed88ba4b94df 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -87,18 +87,16 @@ properties: - const: ipa-setup-ready interconnects: - minItems: 2 items: - - description: Path leading to system memory - - description: Path between the AP and IPA config space - - description: Path leading to internal memory + - description: Interconnect path between IPA and main memory + - description: Interconnect path between IPA and internal memory + - description: Interconnect path between IPA and the AP subsystem interconnect-names: - minItems: 2 items: - const: memory - - const: config - const: imem + - const: config qcom,smem-states: $ref: /schemas/types.yaml#/definitions/phandle-array @@ -209,11 +207,11 @@ examples: interconnects = <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_EBI1>, - <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>, - <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>; + <&rsc_hlos MASTER_IPA &rsc_hlos SLAVE_IMEM>, + <&rsc_hlos MASTER_APPSS_PROC &rsc_hlos SLAVE_IPA_CFG>; interconnect-names = "memory", - "config", - "imem"; + "imem", + "config"; qcom,smem-states = <&ipa_smp2p_out 0>, <&ipa_smp2p_out 1>; diff --git a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml index ee936d1aa724..c2930d65728e 100644 --- a/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml +++ b/Documentation/devicetree/bindings/sound/renesas,rsnd.yaml @@ -114,7 +114,7 @@ properties: ports: $ref: /schemas/graph.yaml#/properties/ports - properties: + patternProperties: port(@[0-9a-f]+)?: $ref: audio-graph-port.yaml# unevaluatedProperties: false diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index 62f2aab8eaec..31cfd7d674a6 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -501,6 +501,18 @@ fail_over_mac This option was added in bonding version 3.2.0. The "follow" policy was added in bonding version 3.3.0. +lacp_active + Option specifying whether to send LACPDU frames periodically. + + off or 0 + LACPDU frames acts as "speak when spoken to". + + on or 1 + LACPDU frames are sent along the configured links + periodically. See lacp_rate for more details. + + The default is on. + lacp_rate Option specifying the rate in which we'll ask our link partner diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst index 3e2221f4abe4..5f13905b12e0 100644 --- a/Documentation/networking/filter.rst +++ b/Documentation/networking/filter.rst @@ -320,13 +320,6 @@ Examples for low-level BPF: ret #-1 drop: ret #0 -**(Accelerated) VLAN w/ id 10**:: - - ld vlan_tci - jneq #10, drop - ret #-1 - drop: ret #0 - **icmp random packet sampling, 1 in 4**:: ldh [12] @@ -358,6 +351,22 @@ Examples for low-level BPF: bad: ret #0 /* SECCOMP_RET_KILL_THREAD */ good: ret #0x7fff0000 /* SECCOMP_RET_ALLOW */ +Examples for low-level BPF extension: + +**Packet for interface index 13**:: + + ld ifidx + jneq #13, drop + ret #-1 + drop: ret #0 + +**(Accelerated) VLAN w/ id 10**:: + + ld vlan_tci + jneq #10, drop + ret #-1 + drop: ret #0 + The above example code can be placed into a file (here called "foo"), and then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf and cls_bpf understands and can directly be loaded with. Example with above diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 91b2cf712801..e26532f49760 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -228,6 +228,23 @@ before posting to the mailing list. The patchwork build bot instance gets overloaded very easily and netdev@vger really doesn't need more traffic if we can help it. +netdevsim is great, can I extend it for my out-of-tree tests? +------------------------------------------------------------- + +No, `netdevsim` is a test vehicle solely for upstream tests. +(Please add your tests under tools/testing/selftests/.) + +We also give no guarantees that `netdevsim` won't change in the future +in a way which would break what would normally be considered uAPI. + +Is netdevsim considered a "user" of an API? +------------------------------------------- + +Linux kernel has a long standing rule that no API should be added unless +it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are +strongly encouraged when adding new APIs, but `netdevsim` in itself +is **not** considered a use case/user. + Any other tips to help ensure my net/net-next patch gets OK'd? -------------------------------------------------------------- Attention to detail. Re-read your own work as if you were the diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst index 9c918f7cb0e8..1ee2141e8ef1 100644 --- a/Documentation/networking/operstates.rst +++ b/Documentation/networking/operstates.rst @@ -73,7 +73,9 @@ IF_OPER_LOWERLAYERDOWN (3): state (f.e. VLAN). IF_OPER_TESTING (4): - Unused in current kernel. + Interface is in testing mode, for example executing driver self-tests + or media (cable) test. It can't be used for normal traffic until tests + complete. IF_OPER_DORMANT (5): Interface is L1 up, but waiting for an external event, f.e. for a @@ -111,7 +113,7 @@ it as lower layer. Note that for certain kind of soft-devices, which are not managing any real hardware, it is possible to set this bit from userspace. One -should use TVL IFLA_CARRIER to do so. +should use TLV IFLA_CARRIER to do so. netif_carrier_ok() can be used to query that bit. diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index b71e09f745c3..f99be8062bc8 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -191,7 +191,7 @@ Documentation written by Tom Zanussi with the event, in nanoseconds. May be modified by .usecs to have timestamps interpreted as microseconds. - cpu int the cpu on which the event occurred. + common_cpu int the cpu on which the event occurred. ====================== ==== ======================================= Extended error information diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index c7b165ca70b6..dae68e68ca23 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -855,7 +855,7 @@ in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for specific cpus. The irq field is interpreted like this:: -  bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 | + bits: | 31 ... 28 | 27 ... 24 | 23 ... 16 | 15 ... 0 | field: | vcpu2_index | irq_type | vcpu_index | irq_id | The irq_type field has the following values: @@ -2149,10 +2149,10 @@ prior to calling the KVM_RUN ioctl. Errors: ====== ============================================================ -  ENOENT   no such register -  EINVAL   invalid register ID, or no such register or used with VMs in + ENOENT no such register + EINVAL invalid register ID, or no such register or used with VMs in protected virtualization mode on s390 -  EPERM    (arm64) register access not allowed before vcpu finalization + EPERM (arm64) register access not allowed before vcpu finalization ====== ============================================================ (These error codes are indicative only: do not rely on a specific error @@ -2590,10 +2590,10 @@ following id bit patterns:: Errors include: ======== ============================================================ -  ENOENT   no such register -  EINVAL   invalid register ID, or no such register or used with VMs in + ENOENT no such register + EINVAL invalid register ID, or no such register or used with VMs in protected virtualization mode on s390 -  EPERM    (arm64) register access not allowed before vcpu finalization + EPERM (arm64) register access not allowed before vcpu finalization ======== ============================================================ (These error codes are indicative only: do not rely on a specific error @@ -3112,13 +3112,13 @@ current state. "addr" is ignored. Errors: ====== ================================================================= -  EINVAL    the target is unknown, or the combination of features is invalid. -  ENOENT    a features bit specified is unknown. + EINVAL the target is unknown, or the combination of features is invalid. + ENOENT a features bit specified is unknown. ====== ================================================================= This tells KVM what type of CPU to present to the guest, and what -optional features it should have.  This will cause a reset of the cpu -registers to their initial values.  If this is not called, KVM_RUN will +optional features it should have. This will cause a reset of the cpu +registers to their initial values. If this is not called, KVM_RUN will return ENOEXEC for that vcpu. The initial values are defined as: @@ -3239,8 +3239,8 @@ VCPU matching underlying host. Errors: ===== ============================================================== -  E2BIG     the reg index list is too big to fit in the array specified by -             the user (the number required will be written into n). + E2BIG the reg index list is too big to fit in the array specified by + the user (the number required will be written into n). ===== ============================================================== :: @@ -3288,7 +3288,7 @@ specific device. ARM/arm64 divides the id field into two parts, a device id and an address type id specific to the individual device:: -  bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | + bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 | field: | 0x00000000 | device id | addr type id | ARM/arm64 currently only require this when using the in-kernel GIC @@ -7049,7 +7049,7 @@ In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. -8.28 KVM_CAP_ENFORCE_PV_CPUID +8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID ----------------------------- Architectures: x86 diff --git a/MAINTAINERS b/MAINTAINERS index 73beb91891ee..41fcfdb24a81 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -445,7 +445,7 @@ F: drivers/platform/x86/wmi.c F: include/uapi/linux/wmi.h ACRN HYPERVISOR SERVICE MODULE -M: Shuo Liu <shuo.a.liu@intel.com> +M: Fei Li <fei1.li@intel.com> L: acrn-dev@lists.projectacrn.org (subscribers-only) S: Supported W: https://projectacrn.org @@ -7859,9 +7859,9 @@ S: Maintained F: drivers/input/touchscreen/goodix.c GOOGLE ETHERNET DRIVERS -M: Catherine Sullivan <csully@google.com> -R: Sagi Shahar <sagis@google.com> -R: Jon Olson <jonolson@google.com> +M: Jeroen de Borst <jeroendb@google.com> +R: Catherine Sullivan <csully@google.com> +R: David Awogbemila <awogbemila@google.com> L: netdev@vger.kernel.org S: Supported F: Documentation/networking/device_drivers/ethernet/google/gve.rst @@ -11347,6 +11347,12 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/phy/mxl-gpy.c +MCAB MICROCHIP CAN BUS ANALYZER TOOL DRIVER +R: Yasushi SHOJI <yashi@spacecubics.com> +L: linux-can@vger.kernel.org +S: Maintained +F: drivers/net/can/usb/mcba_usb.c + MCAN MMIO DEVICE DRIVER M: Chandrasekar Ramakrishnan <rcsekar@samsung.com> L: linux-can@vger.kernel.org @@ -15488,6 +15494,8 @@ M: Pan, Xinhui <Xinhui.Pan@amd.com> L: amd-gfx@lists.freedesktop.org S: Supported T: git https://gitlab.freedesktop.org/agd5f/linux.git +B: https://gitlab.freedesktop.org/drm/amd/-/issues +C: irc://irc.oftc.net/radeon F: drivers/gpu/drm/amd/ F: drivers/gpu/drm/radeon/ F: include/uapi/drm/amdgpu_drm.h @@ -19143,7 +19151,7 @@ M: Mauro Carvalho Chehab <mchehab@kernel.org> L: linux-usb@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/phy/hisilicon,hi3670-usb3.yaml -F: drivers/phy/hisilicon/phy-kirin970-usb3.c +F: drivers/phy/hisilicon/phy-hi3670-usb3.c USB ISP116X DRIVER M: Olav Kongas <ok@artecdesign.ee> @@ -19821,6 +19829,14 @@ L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c +VMWARE VMCI DRIVER +M: Jorgen Hansen <jhansen@vmware.com> +M: Vishnu Dasa <vdasa@vmware.com> +L: linux-kernel@vger.kernel.org +L: pv-drivers@vmware.com (private) +S: Maintained +F: drivers/misc/vmw_vmci/ + VMWARE VMMOUSE SUBDRIVER M: "VMware Graphics" <linux-graphics-maintainer@vmware.com> M: "VMware, Inc." <pv-drivers@vmware.com> @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 14 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc4 NAME = Opossums on Parade # *DOCUMENTATION* @@ -546,7 +546,6 @@ export RCS_TAR_IGNORE := --exclude SCCS --exclude BitKeeper --exclude .svn \ PHONY += scripts_basic scripts_basic: $(Q)$(MAKE) $(build)=scripts/basic - $(Q)rm -f .tmp_quiet_recordmcount PHONY += outputmakefile ifdef building_out_of_srctree diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 77d3280dc678..6c50877841df 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -14,7 +14,6 @@ config ALPHA select PCI_SYSCALL if PCI select HAVE_AOUT select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS select NEED_DMA_MAP_STATE @@ -532,7 +531,7 @@ config SMP will run faster if you say N here. See also the SMP-HOWTO available at - <http://www.tldp.org/docs.html#howto>. + <https://www.tldp.org/docs.html#howto>. If you don't know what to do here, say N. diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 00266e6e1b71..b4faba2432d5 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -23,7 +23,7 @@ #include "ksize.h" extern unsigned long switch_to_osf_pal(unsigned long nr, - struct pcb_struct * pcb_va, struct pcb_struct * pcb_pa, + struct pcb_struct *pcb_va, struct pcb_struct *pcb_pa, unsigned long *vptb); extern void move_stack(unsigned long new_stack); diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 43af71835adf..90a2b341e9c0 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -200,7 +200,7 @@ extern char _end; START_ADDR KSEG address of the entry point of kernel code. ZERO_PGE KSEG address of page full of zeroes, but - upon entry to kerne cvan be expected + upon entry to kernel, it can be expected to hold the parameter list and possible INTRD information. diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index d65192202703..325d4dd4f904 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c @@ -30,7 +30,7 @@ extern long srm_printk(const char *, ...) __attribute__ ((format (printf, 1, 2))); /* - * gzip delarations + * gzip declarations */ #define OF(args) args #define STATIC static diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index dd2dd9f0861f..7f1ca30b115b 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -70,3 +70,4 @@ CONFIG_DEBUG_INFO=y CONFIG_ALPHA_LEGACY_START_ADDRESS=y CONFIG_MATHEMU=y CONFIG_CRYPTO_HMAC=y +CONFIG_DEVTMPFS=y diff --git a/arch/alpha/include/asm/compiler.h b/arch/alpha/include/asm/compiler.h index 5159ba259d65..ae645959018a 100644 --- a/arch/alpha/include/asm/compiler.h +++ b/arch/alpha/include/asm/compiler.h @@ -4,15 +4,4 @@ #include <uapi/asm/compiler.h> -/* Some idiots over in <linux/compiler.h> thought inline should imply - always_inline. This breaks stuff. We'll include this file whenever - we run into such problems. */ - -#include <linux/compiler.h> -#undef inline -#undef __inline__ -#undef __inline -#undef __always_inline -#define __always_inline inline __attribute__((always_inline)) - #endif /* __ALPHA_COMPILER_H */ diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h index 11c688c1d7ec..f21babaeed85 100644 --- a/arch/alpha/include/asm/syscall.h +++ b/arch/alpha/include/asm/syscall.h @@ -9,4 +9,10 @@ static inline int syscall_get_arch(struct task_struct *task) return AUDIT_ARCH_ALPHA; } +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r0; +} + #endif /* _ASM_ALPHA_SYSCALL_H */ diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 6b3daba60987..1dd9baf4a6c2 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -129,6 +129,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d5367a1c6300..d31167e3269c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -834,7 +834,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, return -EFAULT; state = ¤t_thread_info()->ieee_state; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ *state = (*state & ~IEEE_SW_MASK) | (swcr & IEEE_SW_MASK); /* Update the real fpcr. */ @@ -854,7 +854,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, state = ¤t_thread_info()->ieee_state; exc &= IEEE_STATUS_MASK; - /* Update softare trap enable bits. */ + /* Update software trap enable bits. */ swcr = (*state & IEEE_SW_MASK) | exc; *state |= exc; diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index e7a59d927d78..efcf7321701b 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -574,7 +574,7 @@ static void alpha_pmu_start(struct perf_event *event, int flags) * Check that CPU performance counters are supported. * - currently support EV67 and later CPUs. * - actually some later revisions of the EV6 have the same PMC model as the - * EV67 but we don't do suffiently deep CPU detection to detect them. + * EV67 but we don't do sufficiently deep CPU detection to detect them. * Bad luck to the very few people who might have one, I guess. */ static int supported_cpu(void) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index ef0c08ed0481..a5123ea426ce 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -256,7 +256,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, childstack->r26 = (unsigned long) ret_from_kernel_thread; childstack->r9 = usp; /* function */ childstack->r10 = kthread_arg; - childregs->hae = alpha_mv.hae_cache, + childregs->hae = alpha_mv.hae_cache; childti->pcb.usp = 0; return 0; } diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 7d56c217b235..b4fbbba30aa2 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -319,18 +319,19 @@ setup_memory(void *kernel_end) i, cluster->usage, cluster->start_pfn, cluster->start_pfn + cluster->numpages); - /* Bit 0 is console/PALcode reserved. Bit 1 is - non-volatile memory -- we might want to mark - this for later. */ - if (cluster->usage & 3) - continue; - end = cluster->start_pfn + cluster->numpages; if (end > max_low_pfn) max_low_pfn = end; memblock_add(PFN_PHYS(cluster->start_pfn), cluster->numpages << PAGE_SHIFT); + + /* Bit 0 is console/PALcode reserved. Bit 1 is + non-volatile memory -- we might want to mark + this for later. */ + if (cluster->usage & 3) + memblock_reserve(PFN_PHYS(cluster->start_pfn), + cluster->numpages << PAGE_SHIFT); } /* diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 4b2575f936d4..cb64e4797d2a 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -582,7 +582,7 @@ void smp_send_stop(void) { cpumask_t to_whom; - cpumask_copy(&to_whom, cpu_possible_mask); + cpumask_copy(&to_whom, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &to_whom); #ifdef DEBUG_IPI_MSG if (hard_smp_processor_id() != boot_cpu_id) diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 53adf43dcd44..96fd6ff3fe81 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -212,7 +212,7 @@ nautilus_init_pci(void) /* Use default IO. */ pci_add_resource(&bridge->windows, &ioport_resource); - /* Irongate PCI memory aperture, calculate requred size before + /* Irongate PCI memory aperture, calculate required size before setting it up. */ pci_add_resource(&bridge->windows, &irongate_mem); diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 921d4b6e4d95..5398f982bdd1 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -730,7 +730,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, long error; /* Check the UAC bits to decide what the user wants us to do - with the unaliged access. */ + with the unaligned access. */ if (!(current_thread_info()->status & TS_UAC_NOPRINT)) { if (__ratelimit(&ratelimit)) { diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c index d568cd9a3e43..f7cef66af88d 100644 --- a/arch/alpha/math-emu/math.c +++ b/arch/alpha/math-emu/math.c @@ -65,7 +65,7 @@ static long (*save_emul) (unsigned long pc); long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long); long do_alpha_fp_emul(unsigned long); -int init_module(void) +static int alpha_fp_emul_init_module(void) { save_emul_imprecise = alpha_fp_emul_imprecise; save_emul = alpha_fp_emul; @@ -73,12 +73,14 @@ int init_module(void) alpha_fp_emul = do_alpha_fp_emul; return 0; } +module_init(alpha_fp_emul_init_module); -void cleanup_module(void) +static void alpha_fp_emul_cleanup_module(void) { alpha_fp_emul_imprecise = save_emul_imprecise; alpha_fp_emul = save_emul; } +module_exit(alpha_fp_emul_cleanup_module); #undef alpha_fp_emul_imprecise #define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise @@ -401,3 +403,5 @@ alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask) egress: return si_code; } + +EXPORT_SYMBOL(__udiv_qrnnd); diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 82f908fa5676..2fb7012c3246 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -95,7 +95,6 @@ config ARM select HAVE_FUNCTION_TRACER if !XIP_KERNEL select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) - select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -361,7 +360,6 @@ config ARCH_FOOTBRIDGE bool "FootBridge" select CPU_SA110 select FOOTBRIDGE - select HAVE_IDE select NEED_MACH_IO_H if !MMU select NEED_MACH_MEMORY_H help @@ -430,7 +428,6 @@ config ARCH_PXA select GENERIC_IRQ_MULTI_HANDLER select GPIO_PXA select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select PLAT_PXA select SPARSE_IRQ @@ -446,7 +443,6 @@ config ARCH_RPC select ARM_HAS_SG_CHAIN select CPU_SA110 select FIQ - select HAVE_IDE select HAVE_PATA_PLATFORM select ISA_DMA_API select LEGACY_TIMER_TICK @@ -469,7 +465,6 @@ config ARCH_SA1100 select CPU_SA1100 select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select IRQ_DOMAIN select ISA select NEED_MACH_MEMORY_H @@ -505,7 +500,6 @@ config ARCH_OMAP1 select GENERIC_IRQ_CHIP select GENERIC_IRQ_MULTI_HANDLER select GPIOLIB - select HAVE_IDE select HAVE_LEGACY_CLK select IRQ_DOMAIN select NEED_MACH_IO_H if PCCARD diff --git a/arch/arm/mach-davinci/Kconfig b/arch/arm/mach-davinci/Kconfig index de11030748d0..1d3aef84287d 100644 --- a/arch/arm/mach-davinci/Kconfig +++ b/arch/arm/mach-davinci/Kconfig @@ -9,7 +9,6 @@ menuconfig ARCH_DAVINCI select PM_GENERIC_DOMAINS_OF if PM && OF select REGMAP_MMIO select RESET_CONTROLLER - select HAVE_IDE select PINCTRL_SINGLE if ARCH_DAVINCI diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index d23970bd638d..f70fb9c4b0cb 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -49,6 +49,7 @@ static int __init parse_tag_acorn(const struct tag *tag) fallthrough; /* ??? */ case 256: vram_size += PAGE_SIZE * 256; + break; default: break; } diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 897634d0a67c..a951276f0547 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1602,6 +1602,9 @@ exit: rn = arm_bpf_get_reg32(src_lo, tmp2[1], ctx); emit_ldx_r(dst, rn, off, ctx, BPF_SIZE(code)); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index ca38d0d6c3c4..f4eaab3ecf03 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -579,7 +579,7 @@ }; flexcan1: can@308c0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308c0000 0x10000>; interrupts = <GIC_SPI 142 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, @@ -594,7 +594,7 @@ }; flexcan2: can@308d0000 { - compatible = "fsl,imx8mp-flexcan", "fsl,imx6q-flexcan"; + compatible = "fsl,imx8mp-flexcan"; reg = <0x308d0000 0x10000>; interrupts = <GIC_SPI 144 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clk IMX8MP_CLK_IPG_ROOT>, diff --git a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi index 068692350e00..51e17094d7b1 100644 --- a/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi +++ b/arch/arm64/boot/dts/qcom/apq8096-db820c.dtsi @@ -1063,7 +1063,7 @@ status = "okay"; extcon = <&usb2_id>; - usb@7600000 { + dwc3@7600000 { extcon = <&usb2_id>; dr_mode = "otg"; maximum-speed = "high-speed"; @@ -1074,7 +1074,7 @@ status = "okay"; extcon = <&usb3_id>; - usb@6a00000 { + dwc3@6a00000 { extcon = <&usb3_id>; dr_mode = "otg"; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 95d6cb8cd4c0..f39bc10cc5bd 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -443,7 +443,7 @@ resets = <&gcc GCC_USB0_BCR>; status = "disabled"; - dwc_0: usb@8a00000 { + dwc_0: dwc3@8a00000 { compatible = "snps,dwc3"; reg = <0x8a00000 0xcd00>; interrupts = <GIC_SPI 140 IRQ_TYPE_LEVEL_HIGH>; @@ -484,7 +484,7 @@ resets = <&gcc GCC_USB1_BCR>; status = "disabled"; - dwc_1: usb@8c00000 { + dwc_1: dwc3@8c00000 { compatible = "snps,dwc3"; reg = <0x8c00000 0xcd00>; interrupts = <GIC_SPI 99 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 0e1bc4669d7e..78c55ca10ba9 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -2566,7 +2566,7 @@ power-domains = <&gcc USB30_GDSC>; status = "disabled"; - usb@6a00000 { + dwc3@6a00000 { compatible = "snps,dwc3"; reg = <0x06a00000 0xcc00>; interrupts = <0 131 IRQ_TYPE_LEVEL_HIGH>; @@ -2873,7 +2873,7 @@ qcom,select-utmi-as-pipe-clk; status = "disabled"; - usb@7600000 { + dwc3@7600000 { compatible = "snps,dwc3"; reg = <0x07600000 0xcc00>; interrupts = <0 138 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index 6f294f9c0cdf..e9d3ce29937c 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -1964,7 +1964,7 @@ resets = <&gcc GCC_USB_30_BCR>; - usb3_dwc3: usb@a800000 { + usb3_dwc3: dwc3@a800000 { compatible = "snps,dwc3"; reg = <0x0a800000 0xcd00>; interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi index f8a55307b855..a80c578484ba 100644 --- a/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404-evb.dtsi @@ -337,7 +337,7 @@ &usb3 { status = "okay"; - usb@7580000 { + dwc3@7580000 { dr_mode = "host"; }; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi index 9c4be020d568..339790ba585d 100644 --- a/arch/arm64/boot/dts/qcom/qcs404.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi @@ -544,7 +544,7 @@ assigned-clock-rates = <19200000>, <200000000>; status = "disabled"; - usb@7580000 { + dwc3@7580000 { compatible = "snps,dwc3"; reg = <0x07580000 0xcd00>; interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>; @@ -573,7 +573,7 @@ assigned-clock-rates = <19200000>, <133333333>; status = "disabled"; - usb@78c0000 { + dwc3@78c0000 { compatible = "snps,dwc3"; reg = <0x078c0000 0xcc00>; interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 7af551a1fd90..a9a052f8c63c 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -110,11 +110,6 @@ no-map; }; - ipa_fw_mem: memory@8b700000 { - reg = <0 0x8b700000 0 0x10000>; - no-map; - }; - rmtfs_mem: memory@94600000 { compatible = "qcom,rmtfs-mem"; reg = <0x0 0x94600000 0x0 0x200000>; @@ -2761,7 +2756,7 @@ <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xe000>; interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index 5eb2b58ea23b..a8c274ad74c4 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -7,7 +7,6 @@ #include <dt-bindings/clock/qcom,gcc-sc7280.h> #include <dt-bindings/clock/qcom,rpmh.h> -#include <dt-bindings/interconnect/qcom,sc7280.h> #include <dt-bindings/interrupt-controller/arm-gic.h> #include <dt-bindings/mailbox/qcom-ipcc.h> #include <dt-bindings/power/qcom-aoss-qmp.h> @@ -64,11 +63,6 @@ no-map; reg = <0x0 0x80b00000 0x0 0x100000>; }; - - ipa_fw_mem: memory@8b700000 { - reg = <0 0x8b700000 0 0x10000>; - no-map; - }; }; cpus { @@ -514,43 +508,6 @@ qcom,bcm-voters = <&apps_bcm_voter>; }; - ipa: ipa@1e40000 { - compatible = "qcom,sc7280-ipa"; - - iommus = <&apps_smmu 0x480 0x0>, - <&apps_smmu 0x482 0x0>; - reg = <0 0x1e40000 0 0x8000>, - <0 0x1e50000 0 0x4ad0>, - <0 0x1e04000 0 0x23000>; - reg-names = "ipa-reg", - "ipa-shared", - "gsi"; - - interrupts-extended = <&intc 0 654 IRQ_TYPE_EDGE_RISING>, - <&intc 0 432 IRQ_TYPE_LEVEL_HIGH>, - <&ipa_smp2p_in 0 IRQ_TYPE_EDGE_RISING>, - <&ipa_smp2p_in 1 IRQ_TYPE_EDGE_RISING>; - interrupt-names = "ipa", - "gsi", - "ipa-clock-query", - "ipa-setup-ready"; - - clocks = <&rpmhcc RPMH_IPA_CLK>; - clock-names = "core"; - - interconnects = <&aggre2_noc MASTER_IPA 0 &mc_virt SLAVE_EBI1 0>, - <&gem_noc MASTER_APPSS_PROC 0 &cnoc2 SLAVE_IPA_CFG 0>; - interconnect-names = "memory", - "config"; - - qcom,smem-states = <&ipa_smp2p_out 0>, - <&ipa_smp2p_out 1>; - qcom,smem-state-names = "ipa-clock-enabled-valid", - "ipa-clock-enabled"; - - status = "disabled"; - }; - tcsr_mutex: hwlock@1f40000 { compatible = "qcom,tcsr-mutex", "syscon"; reg = <0 0x01f40000 0 0x40000>; diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 1796ae8372be..0a86fe71a66d 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -3781,7 +3781,7 @@ <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_0 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>; @@ -3829,7 +3829,7 @@ <&gladiator_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_USB3_1 0>; interconnect-names = "usb-ddr", "apps-usb"; - usb_2_dwc3: usb@a800000 { + usb_2_dwc3: dwc3@a800000 { compatible = "snps,dwc3"; reg = <0 0x0a800000 0 0xcd00>; interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 612dda0fef43..eef9d79157e9 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -2344,7 +2344,7 @@ resets = <&gcc GCC_USB30_PRIM_BCR>; - usb_1_dwc3: usb@a600000 { + usb_1_dwc3: dwc3@a600000 { compatible = "snps,dwc3"; reg = <0 0x0a600000 0 0xcd00>; interrupts = <GIC_SPI 133 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 3155c9e778f0..0625bf2353c2 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -947,7 +947,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_PFNMAP); + shared = (vma->vm_flags & VM_SHARED); switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index dccf98a37283..41c23f474ea6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -823,6 +823,19 @@ emit_cond_jmp: return ret; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + /* + * Nothing required here. + * + * In case of arm64, we rely on the firmware mitigation of + * Speculative Store Bypass as controlled via the ssbd kernel + * parameter. Whenever the mitigation is enabled, it works + * for all of the kernel code with no need to provide any + * additional instructions. + */ + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/h8300/Kconfig.cpu b/arch/h8300/Kconfig.cpu index b5e14d513e62..c30baa0499fc 100644 --- a/arch/h8300/Kconfig.cpu +++ b/arch/h8300/Kconfig.cpu @@ -44,7 +44,6 @@ config H8300_H8MAX bool "H8MAX" select H83069 select RAMKERNEL - select HAVE_IDE help H8MAX Evaluation Board Support More Information. (Japanese Only) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index cf425c2c63af..4993c7ac7ff6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -25,7 +25,6 @@ config IA64 select HAVE_ASM_MODVERSIONS select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_EXIT_THREAD - select HAVE_IDE select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 96989ad46f66..d632a1d576f9 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,7 +23,6 @@ config M68K select HAVE_DEBUG_BUGVERBOSE select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED select HAVE_FUTEX_CMPXCHG if MMU && FUTEX - select HAVE_IDE select HAVE_MOD_ARCH_SPECIFIC select HAVE_UID16 select MMU_GATHER_NO_RANGE if MMU diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index d964c1f27399..6a07a6817885 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -33,6 +33,7 @@ config MAC depends on MMU select MMU_MOTOROLA if MMU select HAVE_ARCH_NVRAM_OPS + select HAVE_PATA_PLATFORM select LEGACY_TIMER_TICK help This option enables support for the Apple Macintosh series of diff --git a/arch/m68k/coldfire/m525x.c b/arch/m68k/coldfire/m525x.c index 2c4d2ca2f20d..485375112e28 100644 --- a/arch/m68k/coldfire/m525x.c +++ b/arch/m68k/coldfire/m525x.c @@ -26,7 +26,7 @@ DEFINE_CLK(pll, "pll.0", MCF_CLK); DEFINE_CLK(sys, "sys.0", MCF_BUSCLK); static struct clk_lookup m525x_clk_lookup[] = { - CLKDEV_INIT(NULL, "pll.0", &pll), + CLKDEV_INIT(NULL, "pll.0", &clk_pll), CLKDEV_INIT(NULL, "sys.0", &clk_sys), CLKDEV_INIT("mcftmr.0", NULL, &clk_sys), CLKDEV_INIT("mcftmr.1", NULL, &clk_sys), diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index cee6087cd686..6dfb27d531dd 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -71,7 +71,6 @@ config MIPS select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index cdf404a831b2..1eaf6a1ca561 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index 939dd06764bc..3a73e9375712 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -1355,6 +1355,9 @@ jeq_common: } break; + case BPF_ST | BPF_NOSPEC: /* speculation barrier */ + break; + case BPF_ST | BPF_B | BPF_MEM: case BPF_ST | BPF_H | BPF_MEM: case BPF_ST | BPF_W | BPF_MEM: diff --git a/arch/nds32/mm/mmap.c b/arch/nds32/mm/mmap.c index c206b31ce07a..1bdf5e7d1b43 100644 --- a/arch/nds32/mm/mmap.c +++ b/arch/nds32/mm/mmap.c @@ -59,7 +59,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vma->vm_start)) + (!vma || addr + len <= vm_start_gap(vma))) return addr; } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index bde9907bc5b2..4f8c1fbf8f2f 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -3,7 +3,6 @@ config PARISC def_bool y select ARCH_32BIT_OFF_T if !64BIT select ARCH_MIGHT_HAVE_PC_PARPORT - select HAVE_IDE select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_SYSCALL_TRACEPOINTS diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 5b5351cdcb33..8baaad52d799 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -121,6 +121,8 @@ #define SO_NETNS_COOKIE 0x4045 +#define SO_BUF_LOCK 0x4046 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index d01e3401581d..663766fbf505 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -220,7 +220,6 @@ config PPC select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index 2813e3f98db6..3c5baaa6f1e7 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -27,6 +27,13 @@ KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both + +# Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true +# by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is +# compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code +# generation is minimal, it will just use r29 instead. +ccflags-y += $(call cc-option, -ffixed-r30) + asflags-y := -D__VDSO64__ -s targets += vdso64.lds diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1d1fcc290fca..085fb8ecbf68 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2697,8 +2697,10 @@ static int kvmppc_core_vcpu_create_hv(struct kvm_vcpu *vcpu) HFSCR_DSCR | HFSCR_VECVSX | HFSCR_FP | HFSCR_PREFIX; if (cpu_has_feature(CPU_FTR_HVMODE)) { vcpu->arch.hfscr &= mfspr(SPRN_HFSCR); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST)) vcpu->arch.hfscr |= HFSCR_TM; +#endif } if (cpu_has_feature(CPU_FTR_TM_COMP)) vcpu->arch.hfscr |= HFSCR_TM; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8543ad538b0c..898f942eb198 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -302,6 +302,9 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.l1_ptcr == 0) return H_NOT_AVAILABLE; + if (MSR_TM_TRANSACTIONAL(vcpu->arch.shregs.msr)) + return H_BAD_MODE; + /* copy parameters in */ hv_ptr = kvmppc_get_gpr(vcpu, 4); regs_ptr = kvmppc_get_gpr(vcpu, 5); @@ -322,6 +325,23 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (l2_hv.vcpu_token >= NR_CPUS) return H_PARAMETER; + /* + * L1 must have set up a suspended state to enter the L2 in a + * transactional state, and only in that case. These have to be + * filtered out here to prevent causing a TM Bad Thing in the + * host HRFID. We could synthesize a TM Bad Thing back to the L1 + * here but there doesn't seem like much point. + */ + if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr)) { + if (!MSR_TM_ACTIVE(l2_regs.msr)) + return H_BAD_MODE; + } else { + if (l2_regs.msr & MSR_TS_MASK) + return H_BAD_MODE; + if (WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_TS_MASK)) + return H_BAD_MODE; + } + /* translate lpid */ l2 = kvmhv_get_nested(vcpu->kvm, l2_hv.lpid, true); if (!l2) diff --git a/arch/powerpc/kvm/book3s_hv_p9_entry.c b/arch/powerpc/kvm/book3s_hv_p9_entry.c index 83f592eadcd2..961b3d70483c 100644 --- a/arch/powerpc/kvm/book3s_hv_p9_entry.c +++ b/arch/powerpc/kvm/book3s_hv_p9_entry.c @@ -317,6 +317,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc */ mtspr(SPRN_HDEC, hdec); +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +tm_return_to_guest: +#endif mtspr(SPRN_DAR, vcpu->arch.shregs.dar); mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr); mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0); @@ -415,11 +418,23 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * is in real suspend mode and is trying to transition to * transactional mode. */ - if (local_paca->kvm_hstate.fake_suspend && + if (!local_paca->kvm_hstate.fake_suspend && (vcpu->arch.shregs.msr & MSR_TS_S)) { if (kvmhv_p9_tm_emulation_early(vcpu)) { - /* Prevent it being handled again. */ - trap = 0; + /* + * Go straight back into the guest with the + * new NIP/MSR as set by TM emulation. + */ + mtspr(SPRN_HSRR0, vcpu->arch.regs.nip); + mtspr(SPRN_HSRR1, vcpu->arch.shregs.msr); + + /* + * tm_return_to_guest re-loads SRR0/1, DAR, + * DSISR after RI is cleared, in case they had + * been clobbered by a MCE. + */ + __mtmsrd(0, 1); /* clear RI */ + goto tm_return_to_guest; } } #endif @@ -499,6 +514,10 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc * If we are in real mode, only switch MMU on after the MMU is * switched to host, to avoid the P9_RADIX_PREFETCH_BUG. */ + if (IS_ENABLED(CONFIG_PPC_TRANSACTIONAL_MEM) && + vcpu->arch.shregs.msr & MSR_TS_MASK) + msr |= MSR_TS_S; + __mtmsrd(msr, 0); end_timing(vcpu); diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index c5e677508d3b..0f847f1e5ddd 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -242,6 +242,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; + if (be32_to_cpu(args.nargs) >= ARRAY_SIZE(args.args)) { + /* + * Don't overflow our args array: ensure there is room for + * at least rets[0] (even if the call specifies 0 nret). + * + * Each handler must then check for the correct nargs and nret + * values, but they may always return failure in rets[0]. + */ + rc = -EINVAL; + goto fail; + } args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->arch.rtas_token_lock); @@ -269,9 +280,17 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) fail: /* * We only get here if the guest has called RTAS with a bogus - * args pointer. That means we can't get to the args, and so we - * can't fail the RTAS call. So fail right out to userspace, - * which should kill the guest. + * args pointer or nargs/nret values that would overflow the + * array. That means we can't get to the args, and so we can't + * fail the RTAS call. So fail right out to userspace, which + * should kill the guest. + * + * SLOF should actually pass the hcall return value from the + * rtas handler call in r3, so enter_rtas could be modified to + * return a failure indication in r3 and we could return such + * errors to the guest rather than failing to host userspace. + * However old guests that don't test for failure could then + * continue silently after errors, so for now we won't do this. */ return rc; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index be33b5321a76..b4e6f70b97b9 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -2048,9 +2048,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, { struct kvm_enable_cap cap; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&cap, argp, sizeof(cap))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); vcpu_put(vcpu); break; @@ -2074,9 +2074,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp, case KVM_DIRTY_TLB: { struct kvm_dirty_tlb dirty; r = -EFAULT; - vcpu_load(vcpu); if (copy_from_user(&dirty, argp, sizeof(dirty))) goto out; + vcpu_load(vcpu); r = kvm_vcpu_ioctl_dirty_tlb(vcpu, &dirty); vcpu_put(vcpu); break; diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 34bb1583fc0c..beb12cbc8c29 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -738,6 +738,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * break; /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index de8595880fee..b87a63dba9c8 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -628,6 +628,12 @@ emit_clear: break; /* + * BPF_ST NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ diff --git a/arch/powerpc/platforms/pasemi/idle.c b/arch/powerpc/platforms/pasemi/idle.c index 9b88e3cded7d..534b0317fc15 100644 --- a/arch/powerpc/platforms/pasemi/idle.c +++ b/arch/powerpc/platforms/pasemi/idle.c @@ -42,6 +42,7 @@ static int pasemi_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 631a0d57b6cd..6b0886668465 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -77,7 +77,7 @@ #include "../../../../drivers/pci/pci.h" DEFINE_STATIC_KEY_FALSE(shared_processor); -EXPORT_SYMBOL_GPL(shared_processor); +EXPORT_SYMBOL(shared_processor); int CMO_PrPSP = -1; int CMO_SecPSP = -1; diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 6d98cd999680..7b3483ba2e84 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -27,10 +27,10 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) -/* Load initrd at enough distance from DRAM start */ +/* Load initrd anywhere in system RAM */ static inline unsigned long efi_get_max_initrd_addr(unsigned long image_addr) { - return image_addr + SZ_256M; + return ULONG_MAX; } #define alloc_screen_info(x...) (&screen_info) diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index ff467b98c3e3..ac7593607fa6 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -132,8 +132,12 @@ unsigned long get_wchan(struct task_struct *task) { unsigned long pc = 0; - if (likely(task && task != current && !task_is_running(task))) + if (likely(task && task != current && !task_is_running(task))) { + if (!try_get_task_stack(task)) + return 0; walk_stackframe(task, NULL, save_wchan, &pc); + put_task_stack(task); + } return pc; } diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index bceb0629e440..63bc691cff91 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -30,23 +30,23 @@ ENTRY(__asm_copy_from_user) * t0 - end of uncopied dst */ add t0, a0, a2 - bgtu a0, t0, 5f /* * Use byte copy only if too small. + * SZREG holds 4 for RV32 and 8 for RV64 */ - li a3, 8*SZREG /* size must be larger than size in word_copy */ + li a3, 9*SZREG /* size must be larger than size in word_copy */ bltu a2, a3, .Lbyte_copy_tail /* - * Copy first bytes until dst is align to word boundary. + * Copy first bytes until dst is aligned to word boundary. * a0 - start of dst * t1 - start of aligned dst */ addi t1, a0, SZREG-1 andi t1, t1, ~(SZREG-1) /* dst is already aligned, skip */ - beq a0, t1, .Lskip_first_bytes + beq a0, t1, .Lskip_align_dst 1: /* a5 - one byte for copying data */ fixup lb a5, 0(a1), 10f @@ -55,7 +55,7 @@ ENTRY(__asm_copy_from_user) addi a0, a0, 1 /* dst */ bltu a0, t1, 1b /* t1 - start of aligned dst */ -.Lskip_first_bytes: +.Lskip_align_dst: /* * Now dst is aligned. * Use shift-copy if src is misaligned. @@ -72,10 +72,9 @@ ENTRY(__asm_copy_from_user) * * a0 - start of aligned dst * a1 - start of aligned src - * a3 - a1 & mask:(SZREG-1) * t0 - end of aligned dst */ - addi t0, t0, -(8*SZREG-1) /* not to over run */ + addi t0, t0, -(8*SZREG) /* not to over run */ 2: fixup REG_L a4, 0(a1), 10f fixup REG_L a5, SZREG(a1), 10f @@ -97,7 +96,7 @@ ENTRY(__asm_copy_from_user) addi a1, a1, 8*SZREG bltu a0, t0, 2b - addi t0, t0, 8*SZREG-1 /* revert to original value */ + addi t0, t0, 8*SZREG /* revert to original value */ j .Lbyte_copy_tail .Lshift_copy: @@ -107,7 +106,7 @@ ENTRY(__asm_copy_from_user) * For misaligned copy we still perform aligned word copy, but * we need to use the value fetched from the previous iteration and * do some shifts. - * This is safe because reading less than a word size. + * This is safe because reading is less than a word size. * * a0 - start of aligned dst * a1 - start of src @@ -117,7 +116,7 @@ ENTRY(__asm_copy_from_user) */ /* calculating aligned word boundary for dst */ andi t1, t0, ~(SZREG-1) - /* Converting unaligned src to aligned arc */ + /* Converting unaligned src to aligned src */ andi a1, a1, ~(SZREG-1) /* @@ -125,11 +124,11 @@ ENTRY(__asm_copy_from_user) * t3 - prev shift * t4 - current shift */ - slli t3, a3, LGREG + slli t3, a3, 3 /* converting bytes in a3 to bits */ li a5, SZREG*8 sub t4, a5, t3 - /* Load the first word to combine with seceond word */ + /* Load the first word to combine with second word */ fixup REG_L a5, 0(a1), 10f 3: @@ -161,7 +160,7 @@ ENTRY(__asm_copy_from_user) * a1 - start of remaining src * t0 - end of remaining dst */ - bgeu a0, t0, 5f + bgeu a0, t0, .Lout_copy_user /* check if end of copy */ 4: fixup lb a5, 0(a1), 10f addi a1, a1, 1 /* src */ @@ -169,7 +168,7 @@ ENTRY(__asm_copy_from_user) addi a0, a0, 1 /* dst */ bltu a0, t0, 4b /* t0 - end of dst */ -5: +.Lout_copy_user: /* Disable access to user memory */ csrc CSR_STATUS, t6 li a0, 0 diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 269fc648ef3d..a14bf3910eec 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -127,10 +127,17 @@ void __init mem_init(void) } /* - * The default maximal physical memory size is -PAGE_OFFSET, - * limit the memory size via mem. + * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, + * whereas for 64-bit kernel, the end of the virtual address space is occupied + * by the modules/BPF/kernel mappings which reduces the available size of the + * linear mapping. + * Limit the memory size via mem. */ +#ifdef CONFIG_64BIT +static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; +#else static phys_addr_t memory_limit = -PAGE_OFFSET; +#endif static int __init early_mem(char *p) { @@ -152,7 +159,7 @@ static void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t max_mapped_addr = __pa(~(ulong)0); + phys_addr_t __maybe_unused max_mapped_addr; phys_addr_t dram_end; #ifdef CONFIG_XIP_KERNEL @@ -175,14 +182,21 @@ static void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); dram_end = memblock_end_of_DRAM(); + +#ifndef CONFIG_64BIT /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE * macro. Make sure that last 4k bytes are not usable by memblock - * if end of dram is equal to maximum addressable memory. + * if end of dram is equal to maximum addressable memory. For 64-bit + * kernel, this problem can't happen here as the end of the virtual + * address space is occupied by the kernel mapping then this check must + * be done in create_kernel_page_table. */ + max_mapped_addr = __pa(~(ulong)0); if (max_mapped_addr == (dram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); +#endif min_low_pfn = PFN_UP(memblock_start_of_DRAM()); max_low_pfn = max_pfn = PFN_DOWN(dram_end); @@ -570,6 +584,14 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((kernel_map.phys_addr % map_size) != 0); +#ifdef CONFIG_64BIT + /* + * The last 4K bytes of the addressable memory can not be mapped because + * of IS_ERR_VALUE macro. + */ + BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); +#endif + pt_ops.alloc_pte = alloc_pte_early; pt_ops.get_pte_virt = get_pte_virt_early; #ifndef __PAGETABLE_PMD_FOLDED @@ -709,6 +731,8 @@ static void __init setup_vm_final(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); + if (end >= __pa(PAGE_OFFSET) + memory_limit) + end = __pa(PAGE_OFFSET) + memory_limit; map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c index 81de865f4c7c..e6497424cbf6 100644 --- a/arch/riscv/net/bpf_jit_comp32.c +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -1251,6 +1251,10 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, return -1; break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_W: diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 87e3bf5b9086..3af4131c22c7 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -939,6 +939,10 @@ out_be: emit_ld(rd, 0, RV_REG_T1, ctx); break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: emit_imm(RV_REG_T1, imm, ctx); diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index 660c799d875d..e30d3fdbbc78 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -11,6 +11,7 @@ UBSAN_SANITIZE := n KASAN_SANITIZE := n obj-y := $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o +obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o obj-all := $(obj-y) piggy.o syms.o targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 targets += vmlinux.bin.xz vmlinux.bin.lzma vmlinux.bin.lzo vmlinux.bin.lz4 diff --git a/arch/s390/boot/compressed/clz_ctz.c b/arch/s390/boot/compressed/clz_ctz.c new file mode 100644 index 000000000000..c3ebf248596b --- /dev/null +++ b/arch/s390/boot/compressed/clz_ctz.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "../../../../lib/clz_ctz.c" diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 7de253f766e8..b88184019af9 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -335,7 +335,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index b671642967ba..1667a3cdcf0a 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -325,7 +325,7 @@ CONFIG_L2TP_DEBUGFS=m CONFIG_L2TP_V3=y CONFIG_L2TP_IP=m CONFIG_L2TP_ETH=m -CONFIG_BRIDGE=m +CONFIG_BRIDGE=y CONFIG_BRIDGE_MRP=y CONFIG_VLAN_8021Q=m CONFIG_VLAN_8021Q_GVRP=y diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 9b4473f76e56..161a9e12bfb8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -445,15 +445,15 @@ struct kvm_vcpu_stat { u64 instruction_sigp_init_cpu_reset; u64 instruction_sigp_cpu_reset; u64 instruction_sigp_unknown; - u64 diagnose_10; - u64 diagnose_44; - u64 diagnose_9c; - u64 diagnose_9c_ignored; - u64 diagnose_9c_forward; - u64 diagnose_258; - u64 diagnose_308; - u64 diagnose_500; - u64 diagnose_other; + u64 instruction_diagnose_10; + u64 instruction_diagnose_44; + u64 instruction_diagnose_9c; + u64 diag_9c_ignored; + u64 diag_9c_forward; + u64 instruction_diagnose_258; + u64 instruction_diagnose_308; + u64 instruction_diagnose_500; + u64 instruction_diagnose_other; u64 pfault_sync; }; diff --git a/arch/s390/kernel/vdso32/vdso32.lds.S b/arch/s390/kernel/vdso32/vdso32.lds.S index bff50b6acd6d..edf5ff1debe1 100644 --- a/arch/s390/kernel/vdso32/vdso32.lds.S +++ b/arch/s390/kernel/vdso32/vdso32.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index d4fb336d747b..4461ea151e49 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -51,6 +51,7 @@ SECTIONS .rela.dyn ALIGN(8) : { *(.rela.dyn) } .got ALIGN(8) : { *(.got .toc) } + .got.plt ALIGN(8) : { *(.got.plt) } _end = .; PROVIDE(end = .); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 02c146f9e5cd..807fa9da1e72 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -24,7 +24,7 @@ static int diag_release_pages(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + PAGE_SIZE; - vcpu->stat.diagnose_10++; + vcpu->stat.instruction_diagnose_10++; if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end || start < 2 * PAGE_SIZE) @@ -74,7 +74,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "diag page reference parameter block at 0x%llx", vcpu->run->s.regs.gprs[rx]); - vcpu->stat.diagnose_258++; + vcpu->stat.instruction_diagnose_258++; if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); @@ -145,7 +145,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) static int __diag_time_slice_end(struct kvm_vcpu *vcpu) { VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); - vcpu->stat.diagnose_44++; + vcpu->stat.instruction_diagnose_44++; kvm_vcpu_on_spin(vcpu, true); return 0; } @@ -169,7 +169,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) int tid; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; - vcpu->stat.diagnose_9c++; + vcpu->stat.instruction_diagnose_9c++; /* yield to self */ if (tid == vcpu->vcpu_id) @@ -192,7 +192,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: yield forwarded", tid); - vcpu->stat.diagnose_9c_forward++; + vcpu->stat.diag_9c_forward++; return 0; } @@ -203,7 +203,7 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) return 0; no_yield: VCPU_EVENT(vcpu, 5, "diag time slice end directed to %d: ignored", tid); - vcpu->stat.diagnose_9c_ignored++; + vcpu->stat.diag_9c_ignored++; return 0; } @@ -213,7 +213,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu) unsigned long subcode = vcpu->run->s.regs.gprs[reg] & 0xffff; VCPU_EVENT(vcpu, 3, "diag ipl functions, subcode %lx", subcode); - vcpu->stat.diagnose_308++; + vcpu->stat.instruction_diagnose_308++; switch (subcode) { case 3: vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; @@ -245,7 +245,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) { int ret; - vcpu->stat.diagnose_500++; + vcpu->stat.instruction_diagnose_500++; /* No virtio-ccw notification? Get out quickly. */ if (!vcpu->kvm->arch.css_support || (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) @@ -299,7 +299,7 @@ int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) case 0x500: return __diag_virtio_hypercall(vcpu); default: - vcpu->stat.diagnose_other++; + vcpu->stat.instruction_diagnose_other++; return -EOPNOTSUPP; } } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b655a7d82bf0..4527ac7b5961 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -163,15 +163,15 @@ const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = { STATS_DESC_COUNTER(VCPU, instruction_sigp_init_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_cpu_reset), STATS_DESC_COUNTER(VCPU, instruction_sigp_unknown), - STATS_DESC_COUNTER(VCPU, diagnose_10), - STATS_DESC_COUNTER(VCPU, diagnose_44), - STATS_DESC_COUNTER(VCPU, diagnose_9c), - STATS_DESC_COUNTER(VCPU, diagnose_9c_ignored), - STATS_DESC_COUNTER(VCPU, diagnose_9c_forward), - STATS_DESC_COUNTER(VCPU, diagnose_258), - STATS_DESC_COUNTER(VCPU, diagnose_308), - STATS_DESC_COUNTER(VCPU, diagnose_500), - STATS_DESC_COUNTER(VCPU, diagnose_other), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_10), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_44), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_9c), + STATS_DESC_COUNTER(VCPU, diag_9c_ignored), + STATS_DESC_COUNTER(VCPU, diag_9c_forward), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_258), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_308), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_500), + STATS_DESC_COUNTER(VCPU, instruction_diagnose_other), STATS_DESC_COUNTER(VCPU, pfault_sync) }; static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) == diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 2ae419f5115a..88419263a89a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1154,6 +1154,11 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, } break; /* + * BPF_NOSPEC (speculation barrier) + */ + case BPF_ST | BPF_NOSPEC: + break; + /* * BPF_ST(X) */ case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */ diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 45a0549421cd..b683b69a4556 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -39,7 +39,6 @@ config SUPERH select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FTRACE_MCOUNT_RECORD select HAVE_HW_BREAKPOINT - select HAVE_IDE if HAS_IOPORT_MAP select HAVE_IOREMAP_PROT if MMU && !X2TLB select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c5fa7932b550..f0c0f955e169 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -19,7 +19,6 @@ config SPARC select OF select OF_PROMTREE select HAVE_ASM_MODVERSIONS - select HAVE_IDE select HAVE_ARCH_KGDB if !SMP || SPARC64 select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_SECCOMP if SPARC64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 92675dc380fa..e80ee8641ac3 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -122,6 +122,8 @@ #define SO_NETNS_COOKIE 0x0050 +#define SO_BUF_LOCK 0x0051 + #if !defined(__KERNEL__) diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 4b8d3c65d266..9a2f20cbd48b 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1287,6 +1287,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) return 1; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; /* ST: *(size *)(dst + off) = imm */ case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 49270655e827..88fb922c23a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -202,7 +202,6 @@ config X86 select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT - select HAVE_IDE select HAVE_IOREMAP_PROT select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 674906fad43b..68f091ba8443 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -79,9 +79,10 @@ __jump_label_patch(struct jump_entry *entry, enum jump_label_type type) return (struct jump_label_patch){.code = code, .size = size}; } -static inline void __jump_label_transform(struct jump_entry *entry, - enum jump_label_type type, - int init) +static __always_inline void +__jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + int init) { const struct jump_label_patch jlp = __jump_label_patch(entry, type); diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index b07592ca92f0..0b38f944c6b6 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -2016,6 +2016,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; return kvm_skip_emulated_instruction(vcpu); @@ -2139,6 +2140,7 @@ static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code) int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct kvm_hv_hcall hc; u64 ret = HV_STATUS_SUCCESS; @@ -2173,17 +2175,25 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff; hc.rep = !!(hc.rep_cnt || hc.rep_idx); - if (hc.fast && is_xmm_fast_hypercall(&hc)) - kvm_hv_hypercall_read_xmm(&hc); - trace_kvm_hv_hypercall(hc.code, hc.fast, hc.rep_cnt, hc.rep_idx, hc.ingpa, hc.outgpa); - if (unlikely(!hv_check_hypercall_access(to_hv_vcpu(vcpu), hc.code))) { + if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) { ret = HV_STATUS_ACCESS_DENIED; goto hypercall_complete; } + if (hc.fast && is_xmm_fast_hypercall(&hc)) { + if (unlikely(hv_vcpu->enforce_cpuid && + !(hv_vcpu->cpuid_cache.features_edx & + HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } + + kvm_hv_hypercall_read_xmm(&hc); + } + switch (hc.code) { case HVCALL_NOTIFY_LONG_SPIN_WAIT: if (unlikely(hc.rep)) { diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 698969e18fe3..ff005fe738a4 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 660401700075..11e4065e1617 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -43,13 +43,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPU_ID]; + u8 vectors[KVM_MAX_VCPU_ID + 1]; }; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 66f7f5bc3482..c4f4fa23320e 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1644,7 +1644,7 @@ static int is_empty_shadow_page(u64 *spt) * aggregate version in order to make the slab shrinker * faster */ -static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, unsigned long nr) +static inline void kvm_mod_used_mmu_pages(struct kvm *kvm, long nr) { kvm->arch.n_used_mmu_pages += nr; percpu_counter_add(&kvm_total_used_mmu_pages, nr); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 1d01da64c333..a8ad78a2faa1 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -646,7 +646,7 @@ out: void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb; + struct vmcb *vmcb = svm->vmcb01.ptr; bool activated = kvm_vcpu_apicv_active(vcpu); if (!enable_apicv) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3bd09c50c98b..61738ff8ef33 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -515,7 +515,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id, * avic_physical_id. */ - WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK); + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); /* Copied from vmcb01. msrpm_base can be overwritten later. */ svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl; @@ -702,8 +702,8 @@ out: } /* Copy state save area fields which are handled by VMRUN */ -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save) +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save) { to_save->es = from_save->es; to_save->cs = from_save->cs; @@ -722,7 +722,7 @@ void svm_copy_vmrun_state(struct vmcb_save_area *from_save, to_save->cpl = 0; } -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) { to_vmcb->save.fs = from_vmcb->save.fs; to_vmcb->save.gs = from_vmcb->save.gs; @@ -1385,7 +1385,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - svm_copy_vmrun_state(save, &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); nested_load_control_from_vmcb12(svm, ctl); svm_switch_vmcb(svm, &svm->nested.vmcb02); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6710d9ee2e4b..7fbce342eec4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -64,6 +64,7 @@ static DEFINE_MUTEX(sev_bitmap_lock); unsigned int max_sev_asid; static unsigned int min_sev_asid; static unsigned long sev_me_mask; +static unsigned int nr_asids; static unsigned long *sev_asid_bitmap; static unsigned long *sev_reclaim_asid_bitmap; @@ -78,11 +79,11 @@ struct enc_region { /* Called with the sev_bitmap_lock held, or on shutdown */ static int sev_flush_asids(int min_asid, int max_asid) { - int ret, pos, error = 0; + int ret, asid, error = 0; /* Check if there are any ASIDs to reclaim before performing a flush */ - pos = find_next_bit(sev_reclaim_asid_bitmap, max_asid, min_asid); - if (pos >= max_asid) + asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); + if (asid > max_asid) return -EBUSY; /* @@ -115,15 +116,15 @@ static bool __sev_recycle_asids(int min_asid, int max_asid) /* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */ bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap, - max_sev_asid); - bitmap_zero(sev_reclaim_asid_bitmap, max_sev_asid); + nr_asids); + bitmap_zero(sev_reclaim_asid_bitmap, nr_asids); return true; } static int sev_asid_new(struct kvm_sev_info *sev) { - int pos, min_asid, max_asid, ret; + int asid, min_asid, max_asid, ret; bool retry = true; enum misc_res_type type; @@ -143,11 +144,11 @@ static int sev_asid_new(struct kvm_sev_info *sev) * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. */ - min_asid = sev->es_active ? 0 : min_sev_asid - 1; + min_asid = sev->es_active ? 1 : min_sev_asid; max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: - pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid); - if (pos >= max_asid) { + asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); + if (asid > max_asid) { if (retry && __sev_recycle_asids(min_asid, max_asid)) { retry = false; goto again; @@ -157,11 +158,11 @@ again: goto e_uncharge; } - __set_bit(pos, sev_asid_bitmap); + __set_bit(asid, sev_asid_bitmap); mutex_unlock(&sev_bitmap_lock); - return pos + 1; + return asid; e_uncharge: misc_cg_uncharge(type, sev->misc_cg, 1); put_misc_cg(sev->misc_cg); @@ -179,17 +180,16 @@ static int sev_get_asid(struct kvm *kvm) static void sev_asid_free(struct kvm_sev_info *sev) { struct svm_cpu_data *sd; - int cpu, pos; + int cpu; enum misc_res_type type; mutex_lock(&sev_bitmap_lock); - pos = sev->asid - 1; - __set_bit(pos, sev_reclaim_asid_bitmap); + __set_bit(sev->asid, sev_reclaim_asid_bitmap); for_each_possible_cpu(cpu) { sd = per_cpu(svm_data, cpu); - sd->sev_vmcbs[pos] = NULL; + sd->sev_vmcbs[sev->asid] = NULL; } mutex_unlock(&sev_bitmap_lock); @@ -1857,12 +1857,17 @@ void __init sev_hardware_setup(void) min_sev_asid = edx; sev_me_mask = 1UL << (ebx & 0x3f); - /* Initialize SEV ASID bitmaps */ - sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + /* + * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap, + * even though it's never used, so that the bitmap is indexed by the + * actual ASID. + */ + nr_asids = max_sev_asid + 1; + sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_asid_bitmap) goto out; - sev_reclaim_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL); + sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL); if (!sev_reclaim_asid_bitmap) { bitmap_free(sev_asid_bitmap); sev_asid_bitmap = NULL; @@ -1907,7 +1912,7 @@ void sev_hardware_teardown(void) return; /* No need to take sev_bitmap_lock, all VMs have been destroyed. */ - sev_flush_asids(0, max_sev_asid); + sev_flush_asids(1, max_sev_asid); bitmap_free(sev_asid_bitmap); bitmap_free(sev_reclaim_asid_bitmap); @@ -1921,7 +1926,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) if (!sev_enabled) return 0; - sd->sev_vmcbs = kcalloc(max_sev_asid + 1, sizeof(void *), GFP_KERNEL); + sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL); if (!sd->sev_vmcbs) return -ENOMEM; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 664d20f0689c..e8ccab50ebf6 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1406,8 +1406,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) goto error_free_vmsa_page; } - svm_vcpu_init_msrpm(vcpu, svm->msrpm); - svm->vmcb01.ptr = page_address(vmcb01_page); svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT); @@ -1419,6 +1417,8 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm_switch_vmcb(svm, &svm->vmcb01); init_vmcb(vcpu); + svm_vcpu_init_msrpm(vcpu, svm->msrpm); + svm_init_osvw(vcpu); vcpu->arch.microcode_version = 0x01000065; @@ -1568,8 +1568,11 @@ static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; - /* The following fields are ignored when AVIC is enabled */ - WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + /* + * The following fields are ignored when AVIC is enabled + */ + WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); + svm_set_intercept(svm, INTERCEPT_VINTR); /* @@ -2147,11 +2150,12 @@ static int vmload_vmsave_interception(struct kvm_vcpu *vcpu, bool vmload) ret = kvm_skip_emulated_instruction(vcpu); if (vmload) { - nested_svm_vmloadsave(vmcb12, svm->vmcb); + svm_copy_vmloadsave_state(svm->vmcb, vmcb12); svm->sysenter_eip_hi = 0; svm->sysenter_esp_hi = 0; - } else - nested_svm_vmloadsave(svm->vmcb, vmcb12); + } else { + svm_copy_vmloadsave_state(vmcb12, svm->vmcb); + } kvm_vcpu_unmap(vcpu, &map, true); @@ -4344,8 +4348,8 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); - svm_copy_vmrun_state(&svm->vmcb01.ptr->save, - map_save.hva + 0x400); + svm_copy_vmrun_state(map_save.hva + 0x400, + &svm->vmcb01.ptr->save); kvm_vcpu_unmap(vcpu, &map_save, true); } @@ -4393,8 +4397,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) &map_save) == -EINVAL) return 1; - svm_copy_vmrun_state(map_save.hva + 0x400, - &svm->vmcb01.ptr->save); + svm_copy_vmrun_state(&svm->vmcb01.ptr->save, + map_save.hva + 0x400); kvm_vcpu_unmap(vcpu, &map_save, true); } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7e2090752d8f..bd0fe94c2920 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -464,9 +464,9 @@ void svm_leave_nested(struct vcpu_svm *svm); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); -void svm_copy_vmrun_state(struct vmcb_save_area *from_save, - struct vmcb_save_area *to_save); -void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); +void svm_copy_vmrun_state(struct vmcb_save_area *to_save, + struct vmcb_save_area *from_save); +void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code) diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 9b9a55abc29f..c53b8bf8d013 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -89,7 +89,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( * as we mark it dirty unconditionally towards end of vcpu * init phase. */ - if (vmcb && vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && + if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index b484141ea15b..03ebe368333e 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -92,6 +92,21 @@ TRACE_EVENT(kvm_hv_hypercall, __entry->outgpa) ); +TRACE_EVENT(kvm_hv_hypercall_done, + TP_PROTO(u64 result), + TP_ARGS(result), + + TP_STRUCT__entry( + __field(__u64, result) + ), + + TP_fast_assign( + __entry->result = result; + ), + + TP_printk("result 0x%llx", __entry->result) +); + /* * Tracepoint for Xen hypercall. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a4fd10604f72..e5d5c5ed7dd4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3407,7 +3407,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; if (data & 0x1) { vcpu->arch.apf.pageready_pending = false; @@ -3746,7 +3746,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.apf.msr_int_val; break; case MSR_KVM_ASYNC_PF_ACK: - if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF)) + if (!guest_pv_has(vcpu, KVM_FEATURE_ASYNC_PF_INT)) return 1; msr_info->data = 0; @@ -4358,8 +4358,17 @@ static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu) static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu) { - return kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_accept_dm_intr(vcpu); + /* + * Do not cause an interrupt window exit if an exception + * is pending or an event needs reinjection; userspace + * might want to inject the interrupt manually using KVM_SET_REGS + * or KVM_SET_SREGS. For that to work, we must be at an + * instruction boundary and with no events half-injected. + */ + return (kvm_arch_interrupt_allowed(vcpu) && + kvm_cpu_accept_dm_intr(vcpu) && + !kvm_event_needs_reinjection(vcpu) && + !vcpu->arch.exception.pending); } static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 333650b9372a..0fe6aacef3db 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1219,6 +1219,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; + /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_B: if (is_ereg(dst_reg)) diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c index 3da88ded6ee3..3bfda5f502cb 100644 --- a/arch/x86/net/bpf_jit_comp32.c +++ b/arch/x86/net/bpf_jit_comp32.c @@ -1886,6 +1886,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, i++; break; } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + if (boot_cpu_has(X86_FEATURE_XMM2)) + /* Emit 'lfence' */ + EMIT3(0x0F, 0xAE, 0xE8); + break; /* ST: *(u8*)(dst_reg + off) = imm */ case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 2332b2156993..3878880469d1 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -327,7 +327,6 @@ config XTENSA_PLATFORM_ISS config XTENSA_PLATFORM_XT2000 bool "XT2000" - select HAVE_IDE help XT2000 is the name of Tensilica's feature-rich emulation platform. This hardware is capable of running a full Linux distribution. diff --git a/block/blk-iocost.c b/block/blk-iocost.c index c2d6bc88d3f1..5fac3757e6e0 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, return -1; iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); + wait->committed = true; /* * autoremove_wake_function() removes the wait entry only when it - * actually changed the task state. We want the wait always - * removed. Remove explicitly and use default_wake_function(). + * actually changed the task state. We want the wait always removed. + * Remove explicitly and use default_wake_function(). Note that the + * order of operations is important as finish_wait() tests whether + * @wq_entry is removed without grabbing the lock. */ - list_del_init(&wq_entry->entry); - wait->committed = true; - default_wake_function(wq_entry, mode, flags, key); + list_del_init_careful(&wq_entry->entry); return 0; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c838d81ac058..0f006cabfd91 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx, percpu_ref_put(&q->q_usage_counter); } -static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set, - struct blk_mq_hw_ctx *hctx, - unsigned int hctx_idx) -{ - if (hctx->sched_tags) { - blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx); - blk_mq_free_rq_map(hctx->sched_tags, set->flags); - hctx->sched_tags = NULL; - } -} - static int blk_mq_sched_alloc_tags(struct request_queue *q, struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) @@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q, return -ENOMEM; ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests); - if (ret) - blk_mq_sched_free_tags(set, hctx, hctx_idx); + if (ret) { + blk_mq_free_rq_map(hctx->sched_tags, set->flags); + hctx->sched_tags = NULL; + } return ret; } diff --git a/block/genhd.c b/block/genhd.c index af4d2ab4a633..298ee78c1bda 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev) disk_release_events(disk); kfree(disk->random); xa_destroy(&disk->part_tbl); - bdput(disk->part0); if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue) blk_put_queue(disk->queue); - kfree(disk); + bdput(disk->part0); /* frees the disk */ } struct class block_class = { .name = "block", diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 9d872ea477a6..8f9940f40baa 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -370,7 +370,7 @@ config ACPI_TABLE_UPGRADE config ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD bool "Override ACPI tables from built-in initrd" depends on ACPI_TABLE_UPGRADE - depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION="" + depends on INITRAMFS_SOURCE!="" && INITRAMFS_COMPRESSION_NONE help This option provides functionality to override arbitrary ACPI tables from built-in uncompressed initrd. diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c index 5fca18296bf6..550b9081fcbc 100644 --- a/drivers/acpi/dptf/dptf_pch_fivr.c +++ b/drivers/acpi/dptf/dptf_pch_fivr.c @@ -9,6 +9,42 @@ #include <linux/module.h> #include <linux/platform_device.h> +struct pch_fivr_resp { + u64 status; + u64 result; +}; + +static int pch_fivr_read(acpi_handle handle, char *method, struct pch_fivr_resp *fivr_resp) +{ + struct acpi_buffer resp = { sizeof(struct pch_fivr_resp), fivr_resp}; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer format = { sizeof("NN"), "NN" }; + union acpi_object *obj; + acpi_status status; + int ret = -EFAULT; + + status = acpi_evaluate_object(handle, method, NULL, &buffer); + if (ACPI_FAILURE(status)) + return ret; + + obj = buffer.pointer; + if (!obj || obj->type != ACPI_TYPE_PACKAGE) + goto release_buffer; + + status = acpi_extract_package(obj, &format, &resp); + if (ACPI_FAILURE(status)) + goto release_buffer; + + if (fivr_resp->status) + goto release_buffer; + + ret = 0; + +release_buffer: + kfree(buffer.pointer); + return ret; +} + /* * Presentation of attributes which are defined for INT1045 * They are: @@ -23,15 +59,14 @@ static ssize_t name##_show(struct device *dev,\ char *buf)\ {\ struct acpi_device *acpi_dev = dev_get_drvdata(dev);\ - unsigned long long val;\ - acpi_status status;\ + struct pch_fivr_resp fivr_resp;\ + int status;\ \ - status = acpi_evaluate_integer(acpi_dev->handle, #method,\ - NULL, &val);\ - if (ACPI_SUCCESS(status))\ - return sprintf(buf, "%d\n", (int)val);\ - else\ - return -EINVAL;\ + status = pch_fivr_read(acpi_dev->handle, #method, &fivr_resp);\ + if (status)\ + return status;\ +\ + return sprintf(buf, "%llu\n", fivr_resp.result);\ } #define PCH_FIVR_STORE(name, method) \ diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index dc01fb550b28..ee78a210c606 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -423,13 +423,6 @@ static void acpi_dev_get_irqresource(struct resource *res, u32 gsi, } } -static bool irq_is_legacy(struct acpi_resource_irq *irq) -{ - return irq->triggering == ACPI_EDGE_SENSITIVE && - irq->polarity == ACPI_ACTIVE_HIGH && - irq->shareable == ACPI_EXCLUSIVE; -} - /** * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information. * @ares: Input ACPI resource object. @@ -468,7 +461,7 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, } acpi_dev_get_irqresource(res, irq->interrupts[index], irq->triggering, irq->polarity, - irq->shareable, irq_is_legacy(irq)); + irq->shareable, true); break; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: ext_irq = &ares->data.extended_irq; diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index e7ddd281afff..d5cedffeeff9 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -860,11 +860,9 @@ EXPORT_SYMBOL(acpi_dev_present); * Return the next match of ACPI device if another matching device was present * at the moment of invocation, or NULL otherwise. * - * FIXME: The function does not tolerate the sudden disappearance of @adev, e.g. - * in the case of a hotplug event. That said, the caller should ensure that - * this will never happen. - * * The caller is responsible for invoking acpi_dev_put() on the returned device. + * On the other hand the function invokes acpi_dev_put() on the given @adev + * assuming that its reference counter had been increased beforehand. * * See additional information in acpi_dev_present() as well. */ @@ -880,6 +878,7 @@ acpi_dev_get_next_match_dev(struct acpi_device *adev, const char *hid, const cha match.hrv = hrv; dev = bus_find_device(&acpi_bus_type, start, &match, acpi_dev_match_cb); + acpi_dev_put(adev); return dev ? to_acpi_device(dev) : NULL; } EXPORT_SYMBOL(acpi_dev_get_next_match_dev); diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index 1c507804fb10..fbdbef0ab552 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -378,19 +378,25 @@ static int lps0_device_attach(struct acpi_device *adev, * AMDI0006: * - should use rev_id 0x0 * - function mask = 0x3: Should use Microsoft method + * AMDI0007: + * - Should use rev_id 0x2 + * - Should only use AMD method */ const char *hid = acpi_device_hid(adev); - rev_id = 0; + rev_id = strcmp(hid, "AMDI0007") ? 0 : 2; lps0_dsm_func_mask = validate_dsm(adev->handle, ACPI_LPS0_DSM_UUID_AMD, rev_id, &lps0_dsm_guid); lps0_dsm_func_mask_microsoft = validate_dsm(adev->handle, - ACPI_LPS0_DSM_UUID_MICROSOFT, rev_id, + ACPI_LPS0_DSM_UUID_MICROSOFT, 0, &lps0_dsm_guid_microsoft); if (lps0_dsm_func_mask > 0x3 && (!strcmp(hid, "AMD0004") || !strcmp(hid, "AMDI0005"))) { lps0_dsm_func_mask = (lps0_dsm_func_mask << 1) | 0x1; acpi_handle_debug(adev->handle, "_DSM UUID %s: Adjusted function mask: 0x%x\n", ACPI_LPS0_DSM_UUID_AMD, lps0_dsm_func_mask); + } else if (lps0_dsm_func_mask_microsoft > 0 && !strcmp(hid, "AMDI0007")) { + lps0_dsm_func_mask_microsoft = -EINVAL; + acpi_handle_debug(adev->handle, "_DSM Using AMD method\n"); } } else { rev_id = 1; diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index ae7189d1a568..b71ea4a680b0 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -637,6 +637,20 @@ unsigned int ata_sff_data_xfer32(struct ata_queued_cmd *qc, unsigned char *buf, } EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); +static void ata_pio_xfer(struct ata_queued_cmd *qc, struct page *page, + unsigned int offset, size_t xfer_size) +{ + bool do_write = (qc->tf.flags & ATA_TFLAG_WRITE); + unsigned char *buf; + + buf = kmap_atomic(page); + qc->ap->ops->sff_data_xfer(qc, buf + offset, xfer_size, do_write); + kunmap_atomic(buf); + + if (!do_write && !PageSlab(page)) + flush_dcache_page(page); +} + /** * ata_pio_sector - Transfer a sector of data. * @qc: Command on going @@ -648,11 +662,9 @@ EXPORT_SYMBOL_GPL(ata_sff_data_xfer32); */ static void ata_pio_sector(struct ata_queued_cmd *qc) { - int do_write = (qc->tf.flags & ATA_TFLAG_WRITE); struct ata_port *ap = qc->ap; struct page *page; unsigned int offset; - unsigned char *buf; if (!qc->cursg) { qc->curbytes = qc->nbytes; @@ -670,13 +682,20 @@ static void ata_pio_sector(struct ata_queued_cmd *qc) DPRINTK("data %s\n", qc->tf.flags & ATA_TFLAG_WRITE ? "write" : "read"); - /* do the actual data transfer */ - buf = kmap_atomic(page); - ap->ops->sff_data_xfer(qc, buf + offset, qc->sect_size, do_write); - kunmap_atomic(buf); + /* + * Split the transfer when it splits a page boundary. Note that the + * split still has to be dword aligned like all ATA data transfers. + */ + WARN_ON_ONCE(offset % 4); + if (offset + qc->sect_size > PAGE_SIZE) { + unsigned int split_len = PAGE_SIZE - offset; - if (!do_write && !PageSlab(page)) - flush_dcache_page(page); + ata_pio_xfer(qc, page, offset, split_len); + ata_pio_xfer(qc, nth_page(page, 1), 0, + qc->sect_size - split_len); + } else { + ata_pio_xfer(qc, page, offset, qc->sect_size); + } qc->curbytes += qc->sect_size; qc->cursg_ofs += qc->sect_size; diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 4f2951cbe69c..d0e67ec46216 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2167,10 +2167,10 @@ static int hrz_open (struct atm_vcc *atm_vcc) // Part of the job is done by atm_pcr_goal which gives us a PCR // specification which says: EITHER grab the maximum available PCR - // (and perhaps a lower bound which we musn't pass), OR grab this + // (and perhaps a lower bound which we must not pass), OR grab this // amount, rounding down if you have to (and perhaps a lower bound - // which we musn't pass) OR grab this amount, rounding up if you - // have to (and perhaps an upper bound which we musn't pass). If any + // which we must not pass) OR grab this amount, rounding up if you + // have to (and perhaps an upper bound which we must not pass). If any // bounds ARE passed we fail. Note that rounding is only rounding to // match device limitations, we do not round down to satisfy // bandwidth availability even if this would not violate any given diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index adc199dfba3c..6a30264ab2ba 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -231,6 +231,8 @@ EXPORT_SYMBOL_GPL(auxiliary_find_device); int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, struct module *owner, const char *modname) { + int ret; + if (WARN_ON(!auxdrv->probe) || WARN_ON(!auxdrv->id_table)) return -EINVAL; @@ -246,7 +248,11 @@ int __auxiliary_driver_register(struct auxiliary_driver *auxdrv, auxdrv->driver.bus = &auxiliary_bus_type; auxdrv->driver.mod_name = modname; - return driver_register(&auxdrv->driver); + ret = driver_register(&auxdrv->driver); + if (ret) + kfree(auxdrv->driver.name); + + return ret; } EXPORT_SYMBOL_GPL(__auxiliary_driver_register); diff --git a/drivers/base/core.c b/drivers/base/core.c index cadcade65825..f6360490a4a3 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -574,8 +574,10 @@ static void devlink_remove_symlinks(struct device *dev, return; } - snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); - sysfs_remove_link(&con->kobj, buf); + if (device_is_registered(con)) { + snprintf(buf, len, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); + sysfs_remove_link(&con->kobj, buf); + } snprintf(buf, len, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); sysfs_remove_link(&sup->kobj, buf); kfree(buf); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f37b9e3d833c..f0cdff0c5fbf 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -88,6 +88,47 @@ static DEFINE_IDR(loop_index_idr); static DEFINE_MUTEX(loop_ctl_mutex); +static DEFINE_MUTEX(loop_validate_mutex); + +/** + * loop_global_lock_killable() - take locks for safe loop_validate_file() test + * + * @lo: struct loop_device + * @global: true if @lo is about to bind another "struct loop_device", false otherwise + * + * Returns 0 on success, -EINTR otherwise. + * + * Since loop_validate_file() traverses on other "struct loop_device" if + * is_loop_device() is true, we need a global lock for serializing concurrent + * loop_configure()/loop_change_fd()/__loop_clr_fd() calls. + */ +static int loop_global_lock_killable(struct loop_device *lo, bool global) +{ + int err; + + if (global) { + err = mutex_lock_killable(&loop_validate_mutex); + if (err) + return err; + } + err = mutex_lock_killable(&lo->lo_mutex); + if (err && global) + mutex_unlock(&loop_validate_mutex); + return err; +} + +/** + * loop_global_unlock() - release locks taken by loop_global_lock_killable() + * + * @lo: struct loop_device + * @global: true if @lo was about to bind another "struct loop_device", false otherwise + */ +static void loop_global_unlock(struct loop_device *lo, bool global) +{ + mutex_unlock(&lo->lo_mutex); + if (global) + mutex_unlock(&loop_validate_mutex); +} static int max_part; static int part_shift; @@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) while (is_loop_device(f)) { struct loop_device *l; + lockdep_assert_held(&loop_validate_mutex); if (f->f_mapping->host->i_rdev == bdev->bd_dev) return -EBADF; l = I_BDEV(f->f_mapping->host)->bd_disk->private_data; - if (l->lo_state != Lo_bound) { + if (l->lo_state != Lo_bound) return -EINVAL; - } + /* Order wrt setting lo->lo_backing_file in loop_configure(). */ + rmb(); f = l->lo_backing_file; } if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) @@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, unsigned int arg) { - struct file *file = NULL, *old_file; - int error; - bool partscan; + struct file *file = fget(arg); + struct file *old_file; + int error; + bool partscan; + bool is_loop; - error = mutex_lock_killable(&lo->lo_mutex); + if (!file) + return -EBADF; + is_loop = is_loop_device(file); + error = loop_global_lock_killable(lo, is_loop); if (error) - return error; + goto out_putf; error = -ENXIO; if (lo->lo_state != Lo_bound) goto out_err; @@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, if (!(lo->lo_flags & LO_FLAGS_READ_ONLY)) goto out_err; - error = -EBADF; - file = fget(arg); - if (!file) - goto out_err; - error = loop_validate_file(file, bdev); if (error) goto out_err; @@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, loop_update_dio(lo); blk_mq_unfreeze_queue(lo->lo_queue); partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); + + /* + * Flush loop_validate_file() before fput(), for l->lo_backing_file + * might be pointing at old_file which might be the last reference. + */ + if (!is_loop) { + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + } /* * We must drop file reference outside of lo_mutex as dropping * the file ref can take open_mutex which creates circular locking @@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, return 0; out_err: - mutex_unlock(&lo->lo_mutex); - if (file) - fput(file); + loop_global_unlock(lo, is_loop); +out_putf: + fput(file); return error; } @@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, struct block_device *bdev, const struct loop_config *config) { - struct file *file; - struct inode *inode; + struct file *file = fget(config->fd); + struct inode *inode; struct address_space *mapping; - int error; - loff_t size; - bool partscan; - unsigned short bsize; + int error; + loff_t size; + bool partscan; + unsigned short bsize; + bool is_loop; + + if (!file) + return -EBADF; + is_loop = is_loop_device(file); /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); - error = -EBADF; - file = fget(config->fd); - if (!file) - goto out; - /* * If we don't hold exclusive handle for the device, upgrade to it * here to avoid changing device under exclusive owner. @@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, goto out_putf; } - error = mutex_lock_killable(&lo->lo_mutex); + error = loop_global_lock_killable(lo, is_loop); if (error) goto out_bdev; @@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, size = get_loop_size(lo, file); loop_set_size(lo, size); + /* Order wrt reading lo_state in loop_validate_file(). */ + wmb(); + lo->lo_state = Lo_bound; if (part_shift) lo->lo_flags |= LO_FLAGS_PARTSCAN; @@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). */ bdgrab(bdev); - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); if (partscan) loop_reread_partitions(lo); if (!(mode & FMODE_EXCL)) @@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, return 0; out_unlock: - mutex_unlock(&lo->lo_mutex); + loop_global_unlock(lo, is_loop); out_bdev: if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, loop_configure); out_putf: fput(file); -out: /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); return error; @@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release) int lo_number; struct loop_worker *pos, *worker; + /* + * Flush loop_configure() and loop_change_fd(). It is acceptable for + * loop_validate_file() to succeed, for actual clear operation has not + * started yet. + */ + mutex_lock(&loop_validate_mutex); + mutex_unlock(&loop_validate_mutex); + /* + * loop_validate_file() now fails because l->lo_state != Lo_bound + * became visible. + */ + mutex_lock(&lo->lo_mutex); if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) { err = -ENXIO; diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 531d390902dd..90b947c96402 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -4100,8 +4100,6 @@ again: static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) { - bool need_wait; - dout("%s rbd_dev %p\n", __func__, rbd_dev); lockdep_assert_held_write(&rbd_dev->lock_rwsem); @@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev) */ rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING; rbd_assert(!completion_done(&rbd_dev->releasing_wait)); - need_wait = !list_empty(&rbd_dev->running_list); - downgrade_write(&rbd_dev->lock_rwsem); - if (need_wait) - wait_for_completion(&rbd_dev->releasing_wait); - up_read(&rbd_dev->lock_rwsem); + if (list_empty(&rbd_dev->running_list)) + return true; + + up_write(&rbd_dev->lock_rwsem); + wait_for_completion(&rbd_dev->releasing_wait); down_write(&rbd_dev->lock_rwsem); if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING) @@ -4203,15 +4201,11 @@ static void rbd_handle_acquired_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - /* - * we already know that the remote client is - * the owner - */ - up_write(&rbd_dev->lock_rwsem); - return; + dout("%s rbd_dev %p cid %llu-%llu == owner_cid\n", + __func__, rbd_dev, cid.gid, cid.handle); + } else { + rbd_set_owner_cid(rbd_dev, &cid); } - - rbd_set_owner_cid(rbd_dev, &cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4236,14 +4230,12 @@ static void rbd_handle_released_lock(struct rbd_device *rbd_dev, u8 struct_v, if (!rbd_cid_equal(&cid, &rbd_empty_cid)) { down_write(&rbd_dev->lock_rwsem); if (!rbd_cid_equal(&cid, &rbd_dev->owner_cid)) { - dout("%s rbd_dev %p unexpected owner, cid %llu-%llu != owner_cid %llu-%llu\n", + dout("%s rbd_dev %p cid %llu-%llu != owner_cid %llu-%llu\n", __func__, rbd_dev, cid.gid, cid.handle, rbd_dev->owner_cid.gid, rbd_dev->owner_cid.handle); - up_write(&rbd_dev->lock_rwsem); - return; + } else { + rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); } - - rbd_set_owner_cid(rbd_dev, &rbd_empty_cid); downgrade_write(&rbd_dev->lock_rwsem); } else { down_read(&rbd_dev->lock_rwsem); @@ -4951,6 +4943,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->minors = RBD_MINORS_PER_MAJOR; } disk->fops = &rbd_bd_ops; + disk->private_data = rbd_dev; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* QUEUE_FLAG_ADD_RANDOM is off by default for blk-mq */ diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 09c8ab5e0959..b3691de8ac06 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -914,7 +914,8 @@ void fsl_mc_device_remove(struct fsl_mc_device *mc_dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_remove); -struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, + u16 if_id) { struct fsl_mc_device *mc_bus_dev, *endpoint; struct fsl_mc_obj_desc endpoint_desc = {{ 0 }}; @@ -925,6 +926,7 @@ struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev) mc_bus_dev = to_fsl_mc_device(mc_dev->dev.parent); strcpy(endpoint1.type, mc_dev->obj_desc.type); endpoint1.id = mc_dev->obj_desc.id; + endpoint1.if_id = if_id; err = dprc_get_connection(mc_bus_dev->mc_io, 0, mc_bus_dev->mc_handle, diff --git a/drivers/bus/mhi/core/internal.h b/drivers/bus/mhi/core/internal.h index 5b9ea66b92dc..bc239a11aa69 100644 --- a/drivers/bus/mhi/core/internal.h +++ b/drivers/bus/mhi/core/internal.h @@ -682,7 +682,7 @@ void mhi_rddm_prepare(struct mhi_controller *mhi_cntrl, struct image_info *img_info); void mhi_fw_load_handler(struct mhi_controller *mhi_cntrl); int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan); + struct mhi_chan *mhi_chan, unsigned int flags); int mhi_init_chan_ctxt(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan); void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, diff --git a/drivers/bus/mhi/core/main.c b/drivers/bus/mhi/core/main.c index 22acde118bc3..84448233f64c 100644 --- a/drivers/bus/mhi/core/main.c +++ b/drivers/bus/mhi/core/main.c @@ -773,11 +773,18 @@ static void mhi_process_cmd_completion(struct mhi_controller *mhi_cntrl, cmd_pkt = mhi_to_virtual(mhi_ring, ptr); chan = MHI_TRE_GET_CMD_CHID(cmd_pkt); - mhi_chan = &mhi_cntrl->mhi_chan[chan]; - write_lock_bh(&mhi_chan->lock); - mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); - complete(&mhi_chan->completion); - write_unlock_bh(&mhi_chan->lock); + + if (chan < mhi_cntrl->max_chan && + mhi_cntrl->mhi_chan[chan].configured) { + mhi_chan = &mhi_cntrl->mhi_chan[chan]; + write_lock_bh(&mhi_chan->lock); + mhi_chan->ccs = MHI_TRE_GET_EV_CODE(tre); + complete(&mhi_chan->completion); + write_unlock_bh(&mhi_chan->lock); + } else { + dev_err(&mhi_cntrl->mhi_dev->dev, + "Completion packet for invalid channel ID: %d\n", chan); + } mhi_del_ring_element(mhi_cntrl, mhi_ring); } @@ -1423,7 +1430,7 @@ exit_unprepare_channel: } int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, - struct mhi_chan *mhi_chan) + struct mhi_chan *mhi_chan, unsigned int flags) { int ret = 0; struct device *dev = &mhi_chan->mhi_dev->dev; @@ -1448,6 +1455,9 @@ int mhi_prepare_channel(struct mhi_controller *mhi_cntrl, if (ret) goto error_pm_state; + if (mhi_chan->dir == DMA_FROM_DEVICE) + mhi_chan->pre_alloc = !!(flags & MHI_CH_INBOUND_ALLOC_BUFS); + /* Pre-allocate buffer for xfer ring */ if (mhi_chan->pre_alloc) { int nr_el = get_nr_avail_ring_elements(mhi_cntrl, @@ -1603,7 +1613,7 @@ void mhi_reset_chan(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan) } /* Move channel to start state */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, unsigned int flags) { int ret, dir; struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl; @@ -1614,7 +1624,7 @@ int mhi_prepare_for_transfer(struct mhi_device *mhi_dev) if (!mhi_chan) continue; - ret = mhi_prepare_channel(mhi_cntrl, mhi_chan); + ret = mhi_prepare_channel(mhi_cntrl, mhi_chan, flags); if (ret) goto error_open_chan; } diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 19413daa0917..b33b9d75e8af 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -33,6 +33,8 @@ * @bar_num: PCI base address register to use for MHI MMIO register space * @dma_data_width: DMA transfer word size (32 or 64 bits) * @mru_default: default MRU size for MBIM network packets + * @sideband_wake: Devices using dedicated sideband GPIO for wakeup instead + * of inband wake support (such as sdx24) */ struct mhi_pci_dev_info { const struct mhi_controller_config *config; @@ -42,6 +44,7 @@ struct mhi_pci_dev_info { unsigned int bar_num; unsigned int dma_data_width; unsigned int mru_default; + bool sideband_wake; }; #define MHI_CHANNEL_CONFIG_UL(ch_num, ch_name, el_count, ev_ring) \ @@ -74,6 +77,22 @@ struct mhi_pci_dev_info { .doorbell_mode_switch = false, \ } +#define MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(ch_num, ch_name, el_count, ev_ring) \ + { \ + .num = ch_num, \ + .name = ch_name, \ + .num_elements = el_count, \ + .event_ring = ev_ring, \ + .dir = DMA_FROM_DEVICE, \ + .ee_mask = BIT(MHI_EE_AMSS), \ + .pollcfg = 0, \ + .doorbell = MHI_DB_BRST_DISABLE, \ + .lpm_notify = false, \ + .offload_channel = false, \ + .doorbell_mode_switch = false, \ + .auto_queue = true, \ + } + #define MHI_EVENT_CONFIG_CTRL(ev_ring, el_count) \ { \ .num_elements = el_count, \ @@ -212,7 +231,7 @@ static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = { MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0), MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0), MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0), - MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0), + MHI_CHANNEL_CONFIG_DL_AUTOQUEUE(21, "IPCR", 8, 0), MHI_CHANNEL_CONFIG_UL_FP(34, "FIREHOSE", 32, 0), MHI_CHANNEL_CONFIG_DL_FP(35, "FIREHOSE", 32, 0), MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2), @@ -244,7 +263,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx65_info = { .edl = "qcom/sdx65m/edl.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { @@ -254,7 +274,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx55_info = { .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, - .mru_default = 32768 + .mru_default = 32768, + .sideband_wake = false, }; static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = { @@ -262,7 +283,8 @@ static const struct mhi_pci_dev_info mhi_qcom_sdx24_info = { .edl = "qcom/prog_firehose_sdx24.mbn", .config = &modem_qcom_v1_mhiv_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = true, }; static const struct mhi_channel_config mhi_quectel_em1xx_channels[] = { @@ -304,7 +326,8 @@ static const struct mhi_pci_dev_info mhi_quectel_em1xx_info = { .edl = "qcom/prog_firehose_sdx24.mbn", .config = &modem_quectel_em1xx_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = true, }; static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = { @@ -342,7 +365,8 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = { .edl = "qcom/sdx55m/edl.mbn", .config = &modem_foxconn_sdx55_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, - .dma_data_width = 32 + .dma_data_width = 32, + .sideband_wake = false, }; static const struct pci_device_id mhi_pci_id_table[] = { @@ -643,11 +667,14 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) mhi_cntrl->status_cb = mhi_pci_status_cb; mhi_cntrl->runtime_get = mhi_pci_runtime_get; mhi_cntrl->runtime_put = mhi_pci_runtime_put; - mhi_cntrl->wake_get = mhi_pci_wake_get_nop; - mhi_cntrl->wake_put = mhi_pci_wake_put_nop; - mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop; mhi_cntrl->mru = info->mru_default; + if (info->sideband_wake) { + mhi_cntrl->wake_get = mhi_pci_wake_get_nop; + mhi_cntrl->wake_put = mhi_pci_wake_put_nop; + mhi_cntrl->wake_toggle = mhi_pci_wake_toggle_nop; + } + err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width)); if (err) return err; diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c index be160764911b..f9d5b7334341 100644 --- a/drivers/clk/clk-devres.c +++ b/drivers/clk/clk-devres.c @@ -92,13 +92,20 @@ int __must_check devm_clk_bulk_get_optional(struct device *dev, int num_clks, } EXPORT_SYMBOL_GPL(devm_clk_bulk_get_optional); +static void devm_clk_bulk_release_all(struct device *dev, void *res) +{ + struct clk_bulk_devres *devres = res; + + clk_bulk_put_all(devres->num_clks, devres->clks); +} + int __must_check devm_clk_bulk_get_all(struct device *dev, struct clk_bulk_data **clks) { struct clk_bulk_devres *devres; int ret; - devres = devres_alloc(devm_clk_bulk_release, + devres = devres_alloc(devm_clk_bulk_release_all, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 18117ce5ff85..5c75e3d906c2 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -526,7 +526,7 @@ struct stm32f4_pll { struct stm32f4_pll_post_div_data { int idx; - u8 pll_num; + int pll_idx; const char *name; const char *parent; u8 flag; @@ -557,13 +557,13 @@ static const struct clk_div_table post_divr_table[] = { #define MAX_POST_DIV 3 static const struct stm32f4_pll_post_div_data post_div_data[MAX_POST_DIV] = { - { CLK_I2SQ_PDIV, PLL_I2S, "plli2s-q-div", "plli2s-q", + { CLK_I2SQ_PDIV, PLL_VCO_I2S, "plli2s-q-div", "plli2s-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 0, 5, 0, NULL}, - { CLK_SAIQ_PDIV, PLL_SAI, "pllsai-q-div", "pllsai-q", + { CLK_SAIQ_PDIV, PLL_VCO_SAI, "pllsai-q-div", "pllsai-q", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 8, 5, 0, NULL }, - { NO_IDX, PLL_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, + { NO_IDX, PLL_VCO_SAI, "pllsai-r-div", "pllsai-r", CLK_SET_RATE_PARENT, STM32F4_RCC_DCKCFGR, 16, 2, 0, post_divr_table }, }; @@ -1774,7 +1774,7 @@ static void __init stm32f4_rcc_init(struct device_node *np) post_div->width, post_div->flag_div, post_div->div_table, - clks[post_div->pll_num], + clks[post_div->pll_idx], &stm32f4_clk_lock); if (post_div->idx != NO_IDX) diff --git a/drivers/clk/hisilicon/Kconfig b/drivers/clk/hisilicon/Kconfig index 5ecc37aaa118..c1ec75aa4ccd 100644 --- a/drivers/clk/hisilicon/Kconfig +++ b/drivers/clk/hisilicon/Kconfig @@ -18,6 +18,7 @@ config COMMON_CLK_HI3519 config COMMON_CLK_HI3559A bool "Hi3559A Clock Driver" depends on ARCH_HISI || COMPILE_TEST + select RESET_HISI default ARCH_HISI help Build the clock driver for hi3559a. diff --git a/drivers/clk/qcom/clk-smd-rpm.c b/drivers/clk/qcom/clk-smd-rpm.c index 800b2fef1887..b2c142f3a649 100644 --- a/drivers/clk/qcom/clk-smd-rpm.c +++ b/drivers/clk/qcom/clk-smd-rpm.c @@ -467,7 +467,7 @@ DEFINE_CLK_SMD_RPM(msm8936, sysmmnoc_clk, sysmmnoc_a_clk, QCOM_SMD_RPM_BUS_CLK, static struct clk_smd_rpm *msm8936_clks[] = { [RPM_SMD_PCNOC_CLK] = &msm8916_pcnoc_clk, - [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_clk, + [RPM_SMD_PCNOC_A_CLK] = &msm8916_pcnoc_a_clk, [RPM_SMD_SNOC_CLK] = &msm8916_snoc_clk, [RPM_SMD_SNOC_A_CLK] = &msm8916_snoc_a_clk, [RPM_SMD_BIMC_CLK] = &msm8916_bimc_clk, diff --git a/drivers/clk/tegra/clk-sdmmc-mux.c b/drivers/clk/tegra/clk-sdmmc-mux.c index 316912d3b1a4..4f2c3309eea4 100644 --- a/drivers/clk/tegra/clk-sdmmc-mux.c +++ b/drivers/clk/tegra/clk-sdmmc-mux.c @@ -194,6 +194,15 @@ static void clk_sdmmc_mux_disable(struct clk_hw *hw) gate_ops->disable(gate_hw); } +static void clk_sdmmc_mux_disable_unused(struct clk_hw *hw) +{ + struct tegra_sdmmc_mux *sdmmc_mux = to_clk_sdmmc_mux(hw); + const struct clk_ops *gate_ops = sdmmc_mux->gate_ops; + struct clk_hw *gate_hw = &sdmmc_mux->gate.hw; + + gate_ops->disable_unused(gate_hw); +} + static void clk_sdmmc_mux_restore_context(struct clk_hw *hw) { struct clk_hw *parent = clk_hw_get_parent(hw); @@ -218,6 +227,7 @@ static const struct clk_ops tegra_clk_sdmmc_mux_ops = { .is_enabled = clk_sdmmc_mux_is_enabled, .enable = clk_sdmmc_mux_enable, .disable = clk_sdmmc_mux_disable, + .disable_unused = clk_sdmmc_mux_disable_unused, .restore_context = clk_sdmmc_mux_restore_context, }; diff --git a/drivers/firmware/efi/dev-path-parser.c b/drivers/firmware/efi/dev-path-parser.c index 10d4457417a4..eb9c65f97841 100644 --- a/drivers/firmware/efi/dev-path-parser.c +++ b/drivers/firmware/efi/dev-path-parser.c @@ -34,7 +34,6 @@ static long __init parse_acpi_path(const struct efi_dev_path *node, break; if (!adev->pnp.unique_id && node->acpi.uid == 0) break; - acpi_dev_put(adev); } if (!adev) return -ENODEV; diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4b7ee3fa9224..847f33ffc4ae 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -896,6 +896,7 @@ static int __init efi_memreserve_map_root(void) static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) { struct resource *res, *parent; + int ret; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); if (!res) @@ -908,7 +909,17 @@ static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size) /* we expect a conflict with a 'System RAM' region */ parent = request_resource_conflict(&iomem_resource, res); - return parent ? request_resource(parent, res) : 0; + ret = parent ? request_resource(parent, res) : 0; + + /* + * Given that efi_mem_reserve_iomem() can be called at any + * time, only call memblock_reserve() if the architecture + * keeps the infrastructure around. + */ + if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK) && !ret) + memblock_reserve(addr, size); + + return ret; } int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index aa8da0a49829..ae87dded989d 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -630,8 +630,8 @@ efi_status_t efi_load_initrd_cmdline(efi_loaded_image_t *image, * @image: EFI loaded image protocol * @load_addr: pointer to loaded initrd * @load_size: size of loaded initrd - * @soft_limit: preferred size of allocated memory for loading the initrd - * @hard_limit: minimum size of allocated memory + * @soft_limit: preferred address for loading the initrd + * @hard_limit: upper limit address for loading the initrd * * Return: status code */ diff --git a/drivers/firmware/efi/mokvar-table.c b/drivers/firmware/efi/mokvar-table.c index d8bc01340686..38722d2009e2 100644 --- a/drivers/firmware/efi/mokvar-table.c +++ b/drivers/firmware/efi/mokvar-table.c @@ -180,7 +180,10 @@ void __init efi_mokvar_table_init(void) pr_err("EFI MOKvar config table is not valid\n"); return; } - efi_mem_reserve(efi.mokvar_table, map_size_needed); + + if (md.type == EFI_BOOT_SERVICES_DATA) + efi_mem_reserve(efi.mokvar_table, map_size_needed); + efi_mokvar_table_size = map_size_needed; } diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c index c1955d320fec..8f665678e9e3 100644 --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -62,9 +62,11 @@ int __init efi_tpm_eventlog_init(void) tbl_size = sizeof(*log_tbl) + log_tbl->size; memblock_reserve(efi.tpm_log, tbl_size); - if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR || - log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) { - pr_warn(FW_BUG "TPM Final Events table missing or invalid\n"); + if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { + pr_info("TPM Final Events table not present\n"); + goto out; + } else if (log_tbl->version != EFI_TCG2_EVENT_LOG_FORMAT_TCG_2) { + pr_warn(FW_BUG "TPM Final Events table invalid\n"); goto out; } diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index 4b9157a69fca..50b321a1ab1b 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -405,7 +405,7 @@ static int mpc8xxx_probe(struct platform_device *pdev) ret = devm_request_irq(&pdev->dev, mpc8xxx_gc->irqn, mpc8xxx_gpio_irq_cascade, - IRQF_SHARED, "gpio-cascade", + IRQF_NO_THREAD | IRQF_SHARED, "gpio-cascade", mpc8xxx_gc); if (ret) { dev_err(&pdev->dev, diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 5022e0ad0fae..0f5d17f343f1 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -238,8 +238,8 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - irq = platform_get_irq(pdev, 0); - if (irq < 0) + irq = platform_get_irq_optional(pdev, 0); + if (irq < 0 && irq != -ENXIO) return irq; res = platform_get_resource(pdev, IORESOURCE_IO, 0); @@ -278,7 +278,7 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); - if (irq) { + if (irq > 0) { struct irq_chip *irq_chip = &gpio->irq_chip; u8 irq_status; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c0316eaba547..8ac6eb9f1fdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -619,6 +619,13 @@ struct amdgpu_video_codec_info { u32 max_level; }; +#define codec_info_build(type, width, height, level) \ + .codec_type = type,\ + .max_width = width,\ + .max_height = height,\ + .max_pixels_per_frame = height * width,\ + .max_level = level, + struct amdgpu_video_codecs { const u32 codec_count; const struct amdgpu_video_codec_info *codec_array; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 84a1b4bc9bb4..6cc0d4fa4d0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -26,6 +26,7 @@ #include <linux/slab.h> #include <linux/power_supply.h> #include <linux/pm_runtime.h> +#include <linux/suspend.h> #include <acpi/video.h> #include <acpi/actbl.h> @@ -1042,7 +1043,7 @@ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) #if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) - return true; + return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; } #endif return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index d303e88e3c23..f3fd5ec710b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3504,13 +3504,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, r = amdgpu_device_get_job_timeout_settings(adev); if (r) { dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); - goto failed_unmap; + return r; } /* early init functions */ r = amdgpu_device_ip_early_init(adev); if (r) - goto failed_unmap; + return r; /* doorbell bar mapping and doorbell index init*/ amdgpu_device_doorbell_init(adev); @@ -3736,10 +3736,6 @@ release_ras_con: failed: amdgpu_vf_error_trans_all(adev); -failed_unmap: - iounmap(adev->rmmio); - adev->rmmio = NULL; - return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index abb928894eac..361b86b71b56 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1190,6 +1190,10 @@ static const struct pci_device_id pciidlist[] = { /* Van Gogh */ {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + /* Yellow Carp */ + {0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + {0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, + /* Navy_Flounder */ {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index d0d9bc445d7b..854fc497844b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -255,6 +255,15 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) return -EPERM; + /* Workaround for Thunk bug creating PROT_NONE,MAP_PRIVATE mappings + * for debugger access to invisible VRAM. Should have used MAP_SHARED + * instead. Clearing VM_MAYWRITE prevents the mapping from ever + * becoming writable and makes is_cow_mapping(vm_flags) false. + */ + if (is_cow_mapping(vma->vm_flags) && + !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) + vma->vm_flags &= ~VM_MAYWRITE; + return drm_gem_ttm_mmap(obj, vma); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index f5e9c022960b..a64b2c706090 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3300,6 +3300,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffbfffff, 0x00a00000) }; @@ -3379,6 +3380,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1_Vangogh, 0xffffffff, 0x00070103), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), @@ -3445,6 +3447,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER7_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0x00010000, 0x00010020), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 94a2c0742ee5..94d029dbf30d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -64,32 +64,13 @@ #include "smuio_v11_0.h" #include "smuio_v11_0_6.h" -#define codec_info_build(type, width, height, level) \ - .codec_type = type,\ - .max_width = width,\ - .max_height = height,\ - .max_pixels_per_frame = height * width,\ - .max_level = level, - static const struct amd_ip_funcs nv_common_ip_funcs; /* Navi */ static const struct amdgpu_video_codec_info nv_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_encode = @@ -101,55 +82,13 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs nv_video_codecs_decode = @@ -161,62 +100,14 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode = /* Sienna Cichlid */ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs sc_video_codecs_decode = @@ -228,80 +119,20 @@ static const struct amdgpu_video_codecs sc_video_codecs_decode = /* SRIOV Sienna Cichlid, not const since data is controlled by host */ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 8192 * 4352, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codecs sriov_sc_video_codecs_encode = @@ -333,6 +164,19 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = { .codec_array = NULL, }; +/* Yellow Carp*/ +static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, +}; + +static const struct amdgpu_video_codecs yc_video_codecs_decode = { + .codec_count = ARRAY_SIZE(yc_video_codecs_decode_array), + .codec_array = yc_video_codecs_decode_array, +}; + static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -353,12 +197,17 @@ static int nv_query_video_codecs(struct amdgpu_device *adev, bool encode, case CHIP_NAVY_FLOUNDER: case CHIP_DIMGREY_CAVEFISH: case CHIP_VANGOGH: - case CHIP_YELLOW_CARP: if (encode) *codecs = &nv_video_codecs_encode; else *codecs = &sc_video_codecs_decode; return 0; + case CHIP_YELLOW_CARP: + if (encode) + *codecs = &nv_video_codecs_encode; + else + *codecs = &yc_video_codecs_decode; + return 0; case CHIP_BEIGE_GOBY: if (encode) *codecs = &bg_video_codecs_encode; @@ -1387,7 +1236,10 @@ static int nv_common_early_init(void *handle) AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_VCN_DPG | AMD_PG_SUPPORT_JPEG; - adev->external_rev_id = adev->rev_id + 0x01; + if (adev->pdev->device == 0x1681) + adev->external_rev_id = adev->rev_id + 0x19; + else + adev->external_rev_id = adev->rev_id + 0x01; break; default: /* FIXME: not supported yet */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 618e5b6b85d9..536d41f327c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -67,7 +67,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) err = psp_init_asd_microcode(psp, chip_name); if (err) - goto out; + return err; snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); @@ -80,7 +80,7 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) } else { err = amdgpu_ucode_validate(adev->psp.ta_fw); if (err) - goto out2; + goto out; ta_hdr = (const struct ta_firmware_header_v1_0 *) adev->psp.ta_fw->data; @@ -105,10 +105,9 @@ static int psp_v12_0_init_microcode(struct psp_context *psp) return 0; -out2: +out: release_firmware(adev->psp.ta_fw); adev->psp.ta_fw = NULL; -out: if (err) { dev_err(adev->dev, "psp v12.0: Failed to load firmware \"%s\"\n", diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b02436401d46..b7d350be8050 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -88,20 +88,8 @@ /* Vega, Raven, Arcturus */ static const struct amdgpu_video_codec_info vega_video_codecs_encode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 2304, - .max_pixels_per_frame = 4096 * 2304, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_encode = @@ -113,48 +101,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = @@ -166,55 +118,13 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs rv_video_codecs_decode = @@ -226,55 +136,13 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 3, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 5, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 52, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 4, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 186, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, - .max_width = 8192, - .max_height = 4352, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; static const struct amdgpu_video_codecs rn_video_codecs_decode = diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index d3a2a5ff57e9..b53f49a23ddc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2429,9 +2429,9 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll; min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll; - if (caps->ext_caps->bits.oled == 1 || + if (caps->ext_caps->bits.oled == 1 /*|| caps->ext_caps->bits.sdr_aux_backlight_control == 1 || - caps->ext_caps->bits.hdr_aux_backlight_control == 1) + caps->ext_caps->bits.hdr_aux_backlight_control == 1*/) caps->aux_support = true; if (amdgpu_backlight == 0) diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c index 6e0c5c664fdc..a5331b96f551 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn20/dcn20_clk_mgr.c @@ -197,7 +197,7 @@ void dcn20_update_clocks_update_dentist(struct clk_mgr_internal *clk_mgr, struct REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_wdivider); -// REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 5, 100); + REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_CHG_DONE, 1, 50, 1000); REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_WDIVIDER, dppclk_wdivider); REG_WAIT(DENTIST_DISPCLK_CNTL, DENTIST_DPPCLK_CHG_DONE, 1, 5, 100); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c index 513676a6f52b..af7004b770ae 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c @@ -190,6 +190,10 @@ void dcn3_init_clocks(struct clk_mgr *clk_mgr_base) &clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz, &num_levels); + /* SOCCLK */ + dcn3_init_single_clock(clk_mgr, PPCLK_SOCCLK, + &clk_mgr_base->bw_params->clk_table.entries[0].socclk_mhz, + &num_levels); // DPREFCLK ??? /* DISPCLK */ diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 7b7d884d58be..4a4894e9d9c9 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -48,6 +48,21 @@ #include "dc_dmub_srv.h" +#include "yellow_carp_offset.h" + +#define regCLK1_CLK_PLL_REQ 0x0237 +#define regCLK1_CLK_PLL_REQ_BASE_IDX 0 + +#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 +#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc +#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 +#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL +#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L +#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L + +#define REG(reg_name) \ + (CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name) + #define TO_CLK_MGR_DCN31(clk_mgr)\ container_of(clk_mgr, struct clk_mgr_dcn31, base) @@ -124,10 +139,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, * also if safe to lower is false, we just go in the higher state */ if (safe_to_lower) { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_ALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_ALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, true); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) { @@ -148,10 +163,10 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, } } } else { - if (new_clocks->z9_support == DCN_Z9_SUPPORT_DISALLOW && - new_clocks->z9_support != clk_mgr_base->clks.z9_support) { + if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW && + new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) { dcn31_smu_set_Z9_support(clk_mgr, false); - clk_mgr_base->clks.z9_support = new_clocks->z9_support; + clk_mgr_base->clks.zstate_support = new_clocks->zstate_support; } if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) { @@ -229,7 +244,32 @@ static void dcn31_update_clocks(struct clk_mgr *clk_mgr_base, static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr) { - return 0; + /* get FbMult value */ + struct fixed31_32 pll_req; + unsigned int fbmult_frac_val = 0; + unsigned int fbmult_int_val = 0; + + /* + * Register value of fbmult is in 8.16 format, we are converting to 31.32 + * to leverage the fix point operations available in driver + */ + + REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/ + REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */ + + pll_req = dc_fixpt_from_int(fbmult_int_val); + + /* + * since fractional part is only 16 bit in register definition but is 32 bit + * in our fix point definiton, need to shift left by 16 to obtain correct value + */ + pll_req.value |= fbmult_frac_val << 16; + + /* multiply by REFCLK period */ + pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz); + + /* integer part is now VCO frequency in kHz */ + return dc_fixpt_floor(pll_req); } static void dcn31_enable_pme_wa(struct clk_mgr *clk_mgr_base) @@ -246,7 +286,7 @@ static void dcn31_init_clocks(struct clk_mgr *clk_mgr) clk_mgr->clks.p_state_change_support = true; clk_mgr->clks.prev_p_state_change_support = true; clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN; - clk_mgr->clks.z9_support = DCN_Z9_SUPPORT_UNKNOWN; + clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN; } static bool dcn31_are_clock_states_equal(struct dc_clocks *a, @@ -260,7 +300,7 @@ static bool dcn31_are_clock_states_equal(struct dc_clocks *a, return false; else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz) return false; - else if (a->z9_support != b->z9_support) + else if (a->zstate_support != b->zstate_support) return false; else if (a->dtbclk_en != b->dtbclk_en) return false; @@ -592,6 +632,7 @@ void dcn31_clk_mgr_construct( clk_mgr->base.dprefclk_ss_percentage = 0; clk_mgr->base.dprefclk_ss_divider = 1000; clk_mgr->base.ss_on_dprefclk = false; + clk_mgr->base.dfs_ref_freq_khz = 48000; clk_mgr->smu_wm_set.wm_set = (struct dcn31_watermarks *)dm_helpers_allocate_gpu_mem( clk_mgr->base.base.ctx, diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h index cc21cf75eafd..f8f100535526 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.h @@ -27,60 +27,6 @@ #define __DCN31_CLK_MGR_H__ #include "clk_mgr_internal.h" -//CLK1_CLK_PLL_REQ -#ifndef CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK11_CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK11_CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L -//CLK1_CLK0_DFS_CNTL -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER__SHIFT 0x0 -#define CLK11_CLK1_CLK0_DFS_CNTL__CLK0_DIVIDER_MASK 0x0000007FL -/*DPREF clock related*/ -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK0_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK1_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK2_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER__SHIFT 0x0 -#define CLK3_CLK3_DFS_CNTL__CLK3_DIVIDER_MASK 0x0000007FL - -//CLK3_0_CLK3_CLK_PLL_REQ -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int__SHIFT 0x0 -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10 -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL -#define CLK3_0_CLK3_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L -#define CLK3_0_CLK3_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L - -#define mmCLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK00_CLK0_CLK3_DFS_CNTL 0x16C60 -#define mmCLK01_CLK0_CLK3_DFS_CNTL 0x16E60 -#define mmCLK02_CLK0_CLK3_DFS_CNTL 0x17060 -#define mmCLK03_CLK0_CLK3_DFS_CNTL 0x17260 - -#define mmCLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK00_CLK0_CLK_PLL_REQ 0x16C10 -#define mmCLK01_CLK0_CLK_PLL_REQ 0x16E10 -#define mmCLK02_CLK0_CLK_PLL_REQ 0x17010 -#define mmCLK03_CLK0_CLK_PLL_REQ 0x17210 - -#define mmCLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK10_CLK1_CLK_PLL_REQ 0x1B00D -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#define mmCLK12_CLK1_CLK_PLL_REQ 0x1B40D -#define mmCLK13_CLK1_CLK_PLL_REQ 0x1B60D - -#define mmCLK2_CLK_PLL_REQ 0x17E0D - -/*AMCLK*/ -#define mmCLK11_CLK1_CLK0_DFS_CNTL 0x1B23F -#define mmCLK11_CLK1_CLK_PLL_REQ 0x1B20D -#endif - struct dcn31_watermarks; struct dcn31_smu_watermark_set { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 6da226bf11d5..9fb8c46dc606 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -1820,8 +1820,7 @@ bool perform_link_training_with_retries( */ panel_mode = DP_PANEL_MODE_DEFAULT; } - } else - panel_mode = DP_PANEL_MODE_DEFAULT; + } } #endif @@ -4650,7 +4649,10 @@ enum dp_panel_mode dp_get_panel_mode(struct dc_link *link) } } - if (link->dpcd_caps.panel_mode_edp) { + if (link->dpcd_caps.panel_mode_edp && + (link->connector_signal == SIGNAL_TYPE_EDP || + (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT && + link->is_internal_display))) { return DP_PANEL_MODE_EDP; } @@ -4914,9 +4916,7 @@ bool dc_link_set_default_brightness_aux(struct dc_link *link) { uint32_t default_backlight; - if (link && - (link->dpcd_sink_ext_caps.bits.hdr_aux_backlight_control == 1 || - link->dpcd_sink_ext_caps.bits.sdr_aux_backlight_control == 1)) { + if (link && link->dpcd_sink_ext_caps.bits.oled == 1) { if (!dc_link_read_default_bl_aux(link, &default_backlight)) default_backlight = 150000; // if < 5 nits or > 5000, it might be wrong readback diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index a6a67244a322..1596f6b7fed7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1062,7 +1062,7 @@ bool resource_build_scaling_params(struct pipe_ctx *pipe_ctx) * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3 * did not show such problems, so this seems to be the exception. */ - if (plane_state->ctx->dce_version != DCE_VERSION_11_0) + if (plane_state->ctx->dce_version > DCE_VERSION_11_0) pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP; else pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 45640f1c26c4..8dcea8ff5c5a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -354,10 +354,10 @@ enum dcn_pwr_state { }; #if defined(CONFIG_DRM_AMD_DC_DCN) -enum dcn_z9_support_state { - DCN_Z9_SUPPORT_UNKNOWN, - DCN_Z9_SUPPORT_ALLOW, - DCN_Z9_SUPPORT_DISALLOW, +enum dcn_zstate_support_state { + DCN_ZSTATE_SUPPORT_UNKNOWN, + DCN_ZSTATE_SUPPORT_ALLOW, + DCN_ZSTATE_SUPPORT_DISALLOW, }; #endif /* @@ -378,7 +378,7 @@ struct dc_clocks { int dramclk_khz; bool p_state_change_support; #if defined(CONFIG_DRM_AMD_DC_DCN) - enum dcn_z9_support_state z9_support; + enum dcn_zstate_support_state zstate_support; bool dtbclk_en; #endif enum dcn_pwr_state pwr_state; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h index df6539e4c730..0464a8f3db3c 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_hwseq.h @@ -636,6 +636,7 @@ struct dce_hwseq_registers { uint32_t ODM_MEM_PWR_CTRL3; uint32_t DMU_MEM_PWR_CNTL; uint32_t MMHUBBUB_MEM_PWR_CNTL; + uint32_t DCHUBBUB_ARB_HOSTVM_CNTL; }; /* set field name */ #define HWS_SF(blk_name, reg_name, field_name, post_fix)\ @@ -1110,7 +1111,8 @@ struct dce_hwseq_registers { type DOMAIN_POWER_FORCEON;\ type DOMAIN_POWER_GATE;\ type DOMAIN_PGFSM_PWR_STATUS;\ - type HPO_HDMISTREAMCLK_G_GATE_DIS; + type HPO_HDMISTREAMCLK_G_GATE_DIS;\ + type DISABLE_HOSTVM_FORCE_ALLOW_PSTATE; struct dce_hwseq_shift { HWSEQ_REG_FIELD_LIST(uint8_t) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c index 673b93f4fea5..cb9767ddf93d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp_dscl.c @@ -217,6 +217,8 @@ static void dpp1_dscl_set_lb( const struct line_buffer_params *lb_params, enum lb_memory_config mem_size_config) { + uint32_t max_partitions = 63; /* Currently hardcoded on all ASICs before DCN 3.2 */ + /* LB */ if (dpp->base.caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT) { /* DSCL caps: pixel data processed in fixed format */ @@ -239,9 +241,12 @@ static void dpp1_dscl_set_lb( LB_DATA_FORMAT__ALPHA_EN, lb_params->alpha_en); /* Alpha enable */ } + if (dpp->base.caps->max_lb_partitions == 31) + max_partitions = 31; + REG_SET_2(LB_MEMORY_CTRL, 0, MEMORY_CONFIG, mem_size_config, - LB_MAX_PARTITIONS, 63); + LB_MAX_PARTITIONS, max_partitions); } static const uint16_t *dpp1_dscl_get_filter_coeffs_64p(int taps, struct fixed31_32 ratio) diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 1b05a37b674d..b173fa3653b5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -2093,8 +2093,10 @@ int dcn20_populate_dml_pipes_from_context( - timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.htotal = timing->h_total; pipes[pipe_cnt].pipe.dest.vtotal = v_total; - pipes[pipe_cnt].pipe.dest.hactive = timing->h_addressable; - pipes[pipe_cnt].pipe.dest.vactive = timing->v_addressable; + pipes[pipe_cnt].pipe.dest.hactive = + timing->h_addressable + timing->h_border_left + timing->h_border_right; + pipes[pipe_cnt].pipe.dest.vactive = + timing->v_addressable + timing->v_border_top + timing->v_border_bottom; pipes[pipe_cnt].pipe.dest.interlaced = timing->flags.INTERLACE; pipes[pipe_cnt].pipe.dest.pixel_rate_mhz = timing->pix_clk_100hz/10000.0; if (timing->timing_3d_format == TIMING_3D_FORMAT_HW_FRAME_PACKING) @@ -3079,6 +3081,37 @@ static bool is_dtbclk_required(struct dc *dc, struct dc_state *context) return false; } +static enum dcn_zstate_support_state decide_zstate_support(struct dc *dc, struct dc_state *context) +{ + int plane_count; + int i; + + plane_count = 0; + for (i = 0; i < dc->res_pool->pipe_count; i++) { + if (context->res_ctx.pipe_ctx[i].plane_state) + plane_count++; + } + + /* + * Zstate is allowed in following scenarios: + * 1. Single eDP with PSR enabled + * 2. 0 planes (No memory requests) + * 3. Single eDP without PSR but > 5ms stutter period + */ + if (plane_count == 0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else if (context->stream_count == 1 && context->streams[0]->signal == SIGNAL_TYPE_EDP) { + struct dc_link *link = context->streams[0]->sink->link; + + if ((link->link_index == 0 && link->psr_settings.psr_feature_enabled) + || context->bw_ctx.dml.vba.StutterPeriod > 5000.0) + return DCN_ZSTATE_SUPPORT_ALLOW; + else + return DCN_ZSTATE_SUPPORT_DISALLOW; + } else + return DCN_ZSTATE_SUPPORT_DISALLOW; +} + void dcn20_calculate_dlg_params( struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, @@ -3086,7 +3119,6 @@ void dcn20_calculate_dlg_params( int vlevel) { int i, pipe_idx; - int plane_count; /* Writeback MCIF_WB arbitration parameters */ dc->res_pool->funcs->set_mcif_arb_params(dc, context, pipes, pipe_cnt); @@ -3102,17 +3134,7 @@ void dcn20_calculate_dlg_params( != dm_dram_clock_change_unsupported; context->bw_ctx.bw.dcn.clk.dppclk_khz = 0; - context->bw_ctx.bw.dcn.clk.z9_support = (context->bw_ctx.dml.vba.StutterPeriod > 5000.0) ? - DCN_Z9_SUPPORT_ALLOW : DCN_Z9_SUPPORT_DISALLOW; - - plane_count = 0; - for (i = 0; i < dc->res_pool->pipe_count; i++) { - if (context->res_ctx.pipe_ctx[i].plane_state) - plane_count++; - } - - if (plane_count == 0) - context->bw_ctx.bw.dcn.clk.z9_support = DCN_Z9_SUPPORT_ALLOW; + context->bw_ctx.bw.dcn.clk.zstate_support = decide_zstate_support(dc, context); context->bw_ctx.bw.dcn.clk.dtbclk_en = is_dtbclk_required(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index f3d98e3ba624..bf0a198eae15 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -109,6 +109,7 @@ struct _vcs_dpi_ip_params_st dcn2_1_ip = { .max_page_table_levels = 4, .pte_chunk_size_kbytes = 2, .meta_chunk_size_kbytes = 2, + .min_meta_chunk_size_bytes = 256, .writeback_chunk_size_kbytes = 2, .line_buffer_size_bits = 789504, .is_line_buffer_bpp_fixed = 0, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c index 2140b75540cf..23a52d47e61c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c @@ -383,13 +383,6 @@ bool dpp3_get_optimal_number_of_taps( int min_taps_y, min_taps_c; enum lb_memory_config lb_config; - /* Some ASICs does not support FP16 scaling, so we reject modes require this*/ - if (scl_data->viewport.width != scl_data->h_active && - scl_data->viewport.height != scl_data->v_active && - dpp->caps->dscl_data_proc_format == DSCL_DATA_PRCESSING_FIXED_FORMAT && - scl_data->format == PIXEL_FORMAT_FP16) - return false; - if (scl_data->viewport.width > scl_data->h_active && dpp->ctx->dc->debug.max_downscale_src_width != 0 && scl_data->viewport.width > dpp->ctx->dc->debug.max_downscale_src_width) @@ -1440,15 +1433,6 @@ bool dpp3_construct( dpp->tf_shift = tf_shift; dpp->tf_mask = tf_mask; - dpp->lb_pixel_depth_supported = - LB_PIXEL_DEPTH_18BPP | - LB_PIXEL_DEPTH_24BPP | - LB_PIXEL_DEPTH_30BPP | - LB_PIXEL_DEPTH_36BPP; - - dpp->lb_bits_per_entry = LB_BITS_PER_ENTRY; - dpp->lb_memory_size = LB_TOTAL_NUMBER_OF_ENTRIES; /*0x1404*/ - return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h index 3fa86cd090a0..ac644ae6b9f2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.h @@ -154,6 +154,7 @@ SRI(COLOR_KEYER_BLUE, CNVC_CFG, id), \ SRI(CURSOR_CONTROL, CURSOR0_, id),\ SRI(OBUF_MEM_PWR_CTRL, DSCL, id),\ + SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ SRI(DSCL_MEM_PWR_CTRL, DSCL, id) #define DPP_REG_LIST_DCN30(id)\ @@ -163,8 +164,6 @@ SRI(CM_SHAPER_LUT_DATA, CM, id),\ SRI(CM_MEM_PWR_CTRL2, CM, id), \ SRI(CM_MEM_PWR_STATUS2, CM, id), \ - SRI(DSCL_MEM_PWR_STATUS, DSCL, id), \ - SRI(DSCL_MEM_PWR_CTRL, DSCL, id), \ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_B, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_G, CM, id),\ SRI(CM_BLNDGAM_RAMA_START_SLOPE_CNTL_R, CM, id),\ diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 16a75ba0ca82..7d3ff5d44402 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -1398,11 +1398,18 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_02_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_02_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_02_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].dtbclk_mhz = dcn3_02_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_02_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_02_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_02_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_02_soc.clock_limits[i].phyclk_d18_mhz = dcn3_02_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_02_soc.clock_limits[i].socclk_mhz = dcn3_02_soc.clock_limits[0].socclk_mhz; dcn3_02_soc.clock_limits[i].dscclk_mhz = dcn3_02_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c index 34b89464ae02..833ab13fa834 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c @@ -1326,11 +1326,18 @@ void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn3_03_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz; dcn3_03_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz; dcn3_03_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz; - dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[0].dtbclk_mhz; + /* Populate from bw_params for DTBCLK, SOCCLK */ + if (!bw_params->clk_table.entries[i].dtbclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].dtbclk_mhz = dcn3_03_soc.clock_limits[i-1].dtbclk_mhz; + else + dcn3_03_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz; + if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0) + dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[i-1].socclk_mhz; + else + dcn3_03_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz; /* These clocks cannot come from bw_params, always fill from dcn3_03_soc[1] */ - /* FCLK, PHYCLK_D18, SOCCLK, DSCCLK */ + /* FCLK, PHYCLK_D18, DSCCLK */ dcn3_03_soc.clock_limits[i].phyclk_d18_mhz = dcn3_03_soc.clock_limits[0].phyclk_d18_mhz; - dcn3_03_soc.clock_limits[i].socclk_mhz = dcn3_03_soc.clock_limits[0].socclk_mhz; dcn3_03_soc.clock_limits[i].dscclk_mhz = dcn3_03_soc.clock_limits[0].dscclk_mhz; } /* re-init DML with updated bb */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c index 836864a5a5dc..6ac6faf0c533 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c @@ -47,6 +47,7 @@ #include "dce/dmub_outbox.h" #include "dc_link_dp.h" #include "inc/link_dpcd.h" +#include "dcn10/dcn10_hw_sequencer.h" #define DC_LOGGER_INIT(logger) @@ -594,3 +595,20 @@ bool dcn31_is_abm_supported(struct dc *dc, } return false; } + +static void apply_riommu_invalidation_wa(struct dc *dc) +{ + struct dce_hwseq *hws = dc->hwseq; + + if (!hws->wa.early_riommu_invalidation) + return; + + REG_UPDATE(DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, 0); +} + +void dcn31_init_pipes(struct dc *dc, struct dc_state *context) +{ + dcn10_init_pipes(dc, context); + apply_riommu_invalidation_wa(dc); + +} diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h index ff72f0fdd5be..40dfebe78fdd 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h @@ -52,5 +52,6 @@ void dcn31_reset_hw_ctx_wrap( struct dc_state *context); bool dcn31_is_abm_supported(struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); +void dcn31_init_pipes(struct dc *dc, struct dc_state *context); #endif /* __DC_HWSS_DCN31_H__ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c index e3048f8827d2..aaf2dbd095fe 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c @@ -93,7 +93,6 @@ static const struct hw_sequencer_funcs dcn31_funcs = { .set_flip_control_gsl = dcn20_set_flip_control_gsl, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, - .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .set_pipe = dcn21_set_pipe, @@ -104,7 +103,7 @@ static const struct hw_sequencer_funcs dcn31_funcs = { }; static const struct hwseq_private_funcs dcn31_private_funcs = { - .init_pipes = dcn10_init_pipes, + .init_pipes = dcn31_init_pipes, .update_plane_addr = dcn20_update_plane_addr, .plane_atomic_disconnect = dcn10_plane_atomic_disconnect, .update_mpcc = dcn20_update_mpcc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index c67bc9544f5d..38c010afade1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -220,6 +220,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_1_soc = { .sr_exit_z8_time_us = 402.0, .sr_enter_plus_exit_z8_time_us = 520.0, .writeback_latency_us = 12.0, + .dram_channel_width_bytes = 4, .round_trip_ping_latency_dcfclk_cycles = 106, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, @@ -741,6 +742,7 @@ static const struct dccg_mask dccg_mask = { #define HWSEQ_DCN31_REG_LIST()\ SR(DCHUBBUB_GLOBAL_TIMER_CNTL), \ + SR(DCHUBBUB_ARB_HOSTVM_CNTL), \ SR(DIO_MEM_PWR_CTRL), \ SR(ODM_MEM_PWR_CTRL3), \ SR(DMU_MEM_PWR_CNTL), \ @@ -801,6 +803,7 @@ static const struct dce_hwseq_registers hwseq_reg = { #define HWSEQ_DCN31_MASK_SH_LIST(mask_sh)\ HWSEQ_DCN_MASK_SH_LIST(mask_sh), \ HWS_SF(, DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_REFDIV, mask_sh), \ + HWS_SF(, DCHUBBUB_ARB_HOSTVM_CNTL, DISABLE_HOSTVM_FORCE_ALLOW_PSTATE, mask_sh), \ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ HWS_SF(, DOMAIN0_PG_CONFIG, DOMAIN_POWER_GATE, mask_sh), \ HWS_SF(, DOMAIN1_PG_CONFIG, DOMAIN_POWER_FORCEON, mask_sh), \ @@ -1299,6 +1302,7 @@ static struct dce_hwseq *dcn31_hwseq_create( hws->regs = &hwseq_reg; hws->shifts = &hwseq_shift; hws->masks = &hwseq_mask; + hws->wa.early_riommu_invalidation = true; } return hws; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c index c26e742e8137..6655bb99fdfd 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c @@ -841,6 +841,9 @@ static bool CalculatePrefetchSchedule( else *DestinationLinesForPrefetch = dst_y_prefetch_equ; + // Limit to prevent overflow in DST_Y_PREFETCH register + *DestinationLinesForPrefetch = dml_min(*DestinationLinesForPrefetch, 63.75); + dml_print("DML: VStartup: %d\n", VStartup); dml_print("DML: TCalc: %f\n", TCalc); dml_print("DML: TWait: %f\n", TWait); @@ -4889,7 +4892,7 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l } } while ((locals->PrefetchSupported[i][j] != true || locals->VRatioInPrefetchSupported[i][j] != true) && (mode_lib->vba.NextMaxVStartup != mode_lib->vba.MaxMaxVStartup[0][0] - || mode_lib->vba.NextPrefetchMode < mode_lib->vba.MaxPrefetchMode)); + || mode_lib->vba.NextPrefetchMode <= mode_lib->vba.MaxPrefetchMode)); if (locals->PrefetchSupported[i][j] == true && locals->VRatioInPrefetchSupported[i][j] == true) { mode_lib->vba.BandwidthAvailableForImmediateFlip = locals->ReturnBWPerState[i][0]; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h index 2a0db2b03047..9ac9d5e8df8b 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/transform.h @@ -289,6 +289,9 @@ struct dpp_caps { /* DSCL processing pixel data in fixed or float format */ enum dscl_data_processing_format dscl_data_proc_format; + /* max LB partitions */ + unsigned int max_lb_partitions; + /* Calculates the number of partitions in the line buffer. * The implementation of this function is overloaded for * different versions of DSCL LB. diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index f7f7e4fff0c2..082549f75978 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -41,6 +41,7 @@ struct dce_hwseq_wa { bool DEGVIDCN10_254; bool DEGVIDCN21; bool disallow_self_refresh_during_multi_plane_transition; + bool early_riommu_invalidation; }; struct hwseq_wa_state { diff --git a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h index 610266088ff1..35fa0d8e92dd 100644 --- a/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h +++ b/drivers/gpu/drm/amd/pm/inc/aldebaran_ppsmc.h @@ -101,7 +101,8 @@ #define PPSMC_MSG_SetSystemVirtualSTBtoDramAddrLow 0x41 #define PPSMC_MSG_GfxDriverResetRecovery 0x42 -#define PPSMC_Message_Count 0x43 +#define PPSMC_MSG_BoardPowerCalibration 0x43 +#define PPSMC_Message_Count 0x44 //PPSMC Reset Types #define PPSMC_RESET_TYPE_WARM_RESET 0x00 diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 89a16dcd0fff..1d3765b873df 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -225,7 +225,8 @@ __SMU_DUMMY_MAP(DisableDeterminism), \ __SMU_DUMMY_MAP(SetUclkDpmMode), \ __SMU_DUMMY_MAP(LightSBR), \ - __SMU_DUMMY_MAP(GfxDriverResetRecovery), + __SMU_DUMMY_MAP(GfxDriverResetRecovery), \ + __SMU_DUMMY_MAP(BoardPowerCalibration), #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type diff --git a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h index 1962a5877191..f61b5c914a3d 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_v11_0.h @@ -34,7 +34,7 @@ #define SMU11_DRIVER_IF_VERSION_Navy_Flounder 0xE #define SMU11_DRIVER_IF_VERSION_VANGOGH 0x03 #define SMU11_DRIVER_IF_VERSION_Dimgrey_Cavefish 0xF -#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0x9 +#define SMU11_DRIVER_IF_VERSION_Beige_Goby 0xD /* MP Apertures */ #define MP0_Public 0x03800000 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c index 9316a726195c..cb5485cf243f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c @@ -134,6 +134,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT MSG_MAP(DisableDeterminism, PPSMC_MSG_DisableDeterminism, 0), MSG_MAP(SetUclkDpmMode, PPSMC_MSG_SetUclkDpmMode, 0), MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0), + MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0), }; static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = { @@ -440,6 +441,39 @@ static int aldebaran_setup_pptable(struct smu_context *smu) return ret; } +static bool aldebaran_is_primary(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + + if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) + return adev->smuio.funcs->get_die_id(adev) == 0; + + return true; +} + +static int aldebaran_run_board_btc(struct smu_context *smu) +{ + u32 smu_version; + int ret; + + if (!aldebaran_is_primary(smu)) + return 0; + + ret = smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (ret) { + dev_err(smu->adev->dev, "Failed to get smu version!\n"); + return ret; + } + if (smu_version <= 0x00441d00) + return 0; + + ret = smu_cmn_send_smc_msg(smu, SMU_MSG_BoardPowerCalibration, NULL); + if (ret) + dev_err(smu->adev->dev, "Board power calibration failed!\n"); + + return ret; +} + static int aldebaran_run_btc(struct smu_context *smu) { int ret; @@ -447,6 +481,8 @@ static int aldebaran_run_btc(struct smu_context *smu) ret = smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); if (ret) dev_err(smu->adev->dev, "RunDcBtc failed!\n"); + else + ret = aldebaran_run_board_btc(smu); return ret; } @@ -524,16 +560,6 @@ static int aldebaran_freqs_in_same_level(int32_t frequency1, return (abs(frequency1 - frequency2) <= EPSILON); } -static bool aldebaran_is_primary(struct smu_context *smu) -{ - struct amdgpu_device *adev = smu->adev; - - if (adev->smuio.funcs && adev->smuio.funcs->get_die_id) - return adev->smuio.funcs->get_die_id(adev) == 0; - - return true; -} - static int aldebaran_get_smu_metrics_data(struct smu_context *smu, MetricsMember_t member, uint32_t *value) diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index 98ae00661656..f454e0424086 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -834,6 +834,9 @@ long drm_ioctl(struct file *filp, if (drm_dev_is_unplugged(dev)) return -ENODEV; + if (DRM_IOCTL_TYPE(cmd) != DRM_IOCTL_BASE) + return -ENOTTY; + is_driver_ioctl = nr >= DRM_COMMAND_BASE && nr < DRM_COMMAND_END; if (is_driver_ioctl) { diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c index 5b6922e28ef2..aa667fa71158 100644 --- a/drivers/gpu/drm/i915/display/intel_bios.c +++ b/drivers/gpu/drm/i915/display/intel_bios.c @@ -2166,7 +2166,8 @@ static void init_vbt_missing_defaults(struct drm_i915_private *i915) { enum port port; - int ports = PORT_A | PORT_B | PORT_C | PORT_D | PORT_E | PORT_F; + int ports = BIT(PORT_A) | BIT(PORT_B) | BIT(PORT_C) | + BIT(PORT_D) | BIT(PORT_E) | BIT(PORT_F); if (!HAS_DDI(i915) && !IS_CHERRYVIEW(i915)) return; diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 3bad4e00f7be..2d5d21740c25 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -11361,13 +11361,19 @@ static void intel_setup_outputs(struct drm_i915_private *dev_priv) intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); vlv_dsi_init(dev_priv); - } else if (DISPLAY_VER(dev_priv) >= 9) { + } else if (DISPLAY_VER(dev_priv) == 10) { intel_ddi_init(dev_priv, PORT_A); intel_ddi_init(dev_priv, PORT_B); intel_ddi_init(dev_priv, PORT_C); intel_ddi_init(dev_priv, PORT_D); intel_ddi_init(dev_priv, PORT_E); intel_ddi_init(dev_priv, PORT_F); + } else if (DISPLAY_VER(dev_priv) >= 9) { + intel_ddi_init(dev_priv, PORT_A); + intel_ddi_init(dev_priv, PORT_B); + intel_ddi_init(dev_priv, PORT_C); + intel_ddi_init(dev_priv, PORT_D); + intel_ddi_init(dev_priv, PORT_E); } else if (HAS_DDI(dev_priv)) { u32 found; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index a8abc9af5ff4..4a6419d7be93 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -25,10 +25,8 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" -#include "i915_sw_fence_work.h" #include "i915_trace.h" #include "i915_user_extensions.h" -#include "i915_memcpy.h" struct eb_vma { struct i915_vma *vma; @@ -1456,6 +1454,10 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb, int err; struct intel_engine_cs *engine = eb->engine; + /* If we need to copy for the cmdparser, we will stall anyway */ + if (eb_use_cmdparser(eb)) + return ERR_PTR(-EWOULDBLOCK); + if (!reloc_can_use_engine(engine)) { engine = engine->gt->engine_class[COPY_ENGINE_CLASS][0]; if (!engine) @@ -2372,217 +2374,6 @@ shadow_batch_pin(struct i915_execbuffer *eb, return vma; } -struct eb_parse_work { - struct dma_fence_work base; - struct intel_engine_cs *engine; - struct i915_vma *batch; - struct i915_vma *shadow; - struct i915_vma *trampoline; - unsigned long batch_offset; - unsigned long batch_length; - unsigned long *jump_whitelist; - const void *batch_map; - void *shadow_map; -}; - -static int __eb_parse(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - int ret; - bool cookie; - - cookie = dma_fence_begin_signalling(); - ret = intel_engine_cmd_parser(pw->engine, - pw->batch, - pw->batch_offset, - pw->batch_length, - pw->shadow, - pw->jump_whitelist, - pw->shadow_map, - pw->batch_map); - dma_fence_end_signalling(cookie); - - return ret; -} - -static void __eb_parse_release(struct dma_fence_work *work) -{ - struct eb_parse_work *pw = container_of(work, typeof(*pw), base); - - if (!IS_ERR_OR_NULL(pw->jump_whitelist)) - kfree(pw->jump_whitelist); - - if (pw->batch_map) - i915_gem_object_unpin_map(pw->batch->obj); - else - i915_gem_object_unpin_pages(pw->batch->obj); - - i915_gem_object_unpin_map(pw->shadow->obj); - - if (pw->trampoline) - i915_active_release(&pw->trampoline->active); - i915_active_release(&pw->shadow->active); - i915_active_release(&pw->batch->active); -} - -static const struct dma_fence_work_ops eb_parse_ops = { - .name = "eb_parse", - .work = __eb_parse, - .release = __eb_parse_release, -}; - -static inline int -__parser_mark_active(struct i915_vma *vma, - struct intel_timeline *tl, - struct dma_fence *fence) -{ - struct intel_gt_buffer_pool_node *node = vma->private; - - return i915_active_ref(&node->active, tl->fence_context, fence); -} - -static int -parser_mark_active(struct eb_parse_work *pw, struct intel_timeline *tl) -{ - int err; - - mutex_lock(&tl->mutex); - - err = __parser_mark_active(pw->shadow, tl, &pw->base.dma); - if (err) - goto unlock; - - if (pw->trampoline) { - err = __parser_mark_active(pw->trampoline, tl, &pw->base.dma); - if (err) - goto unlock; - } - -unlock: - mutex_unlock(&tl->mutex); - return err; -} - -static int eb_parse_pipeline(struct i915_execbuffer *eb, - struct i915_vma *shadow, - struct i915_vma *trampoline) -{ - struct eb_parse_work *pw; - struct drm_i915_gem_object *batch = eb->batch->vma->obj; - bool needs_clflush; - int err; - - GEM_BUG_ON(overflows_type(eb->batch_start_offset, pw->batch_offset)); - GEM_BUG_ON(overflows_type(eb->batch_len, pw->batch_length)); - - pw = kzalloc(sizeof(*pw), GFP_KERNEL); - if (!pw) - return -ENOMEM; - - err = i915_active_acquire(&eb->batch->vma->active); - if (err) - goto err_free; - - err = i915_active_acquire(&shadow->active); - if (err) - goto err_batch; - - if (trampoline) { - err = i915_active_acquire(&trampoline->active); - if (err) - goto err_shadow; - } - - pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_WB); - if (IS_ERR(pw->shadow_map)) { - err = PTR_ERR(pw->shadow_map); - goto err_trampoline; - } - - needs_clflush = - !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - pw->batch_map = ERR_PTR(-ENODEV); - if (needs_clflush && i915_has_memcpy_from_wc()) - pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC); - - if (IS_ERR(pw->batch_map)) { - err = i915_gem_object_pin_pages(batch); - if (err) - goto err_unmap_shadow; - pw->batch_map = NULL; - } - - pw->jump_whitelist = - intel_engine_cmd_parser_alloc_jump_whitelist(eb->batch_len, - trampoline); - if (IS_ERR(pw->jump_whitelist)) { - err = PTR_ERR(pw->jump_whitelist); - goto err_unmap_batch; - } - - dma_fence_work_init(&pw->base, &eb_parse_ops); - - pw->engine = eb->engine; - pw->batch = eb->batch->vma; - pw->batch_offset = eb->batch_start_offset; - pw->batch_length = eb->batch_len; - pw->shadow = shadow; - pw->trampoline = trampoline; - - /* Mark active refs early for this worker, in case we get interrupted */ - err = parser_mark_active(pw, eb->context->timeline); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(pw->batch->resv, 1); - if (err) - goto err_commit; - - err = dma_resv_reserve_shared(shadow->resv, 1); - if (err) - goto err_commit; - - /* Wait for all writes (and relocs) into the batch to complete */ - err = i915_sw_fence_await_reservation(&pw->base.chain, - pw->batch->resv, NULL, false, - 0, I915_FENCE_GFP); - if (err < 0) - goto err_commit; - - /* Keep the batch alive and unwritten as we parse */ - dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma); - - /* Force execution to wait for completion of the parser */ - dma_resv_add_excl_fence(shadow->resv, &pw->base.dma); - - dma_fence_work_commit_imm(&pw->base); - return 0; - -err_commit: - i915_sw_fence_set_error_once(&pw->base.chain, err); - dma_fence_work_commit_imm(&pw->base); - return err; - -err_unmap_batch: - if (pw->batch_map) - i915_gem_object_unpin_map(batch); - else - i915_gem_object_unpin_pages(batch); -err_unmap_shadow: - i915_gem_object_unpin_map(shadow->obj); -err_trampoline: - if (trampoline) - i915_active_release(&trampoline->active); -err_shadow: - i915_active_release(&shadow->active); -err_batch: - i915_active_release(&eb->batch->vma->active); -err_free: - kfree(pw); - return err; -} - static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma) { /* @@ -2672,7 +2463,15 @@ static int eb_parse(struct i915_execbuffer *eb) goto err_trampoline; } - err = eb_parse_pipeline(eb, shadow, trampoline); + err = dma_resv_reserve_shared(shadow->resv, 1); + if (err) + goto err_trampoline; + + err = intel_engine_cmd_parser(eb->engine, + eb->batch->vma, + eb->batch_start_offset, + eb->batch_len, + shadow, trampoline); if (err) goto err_unpin_batch; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c index 4df505e4c53a..16162fc2782d 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_execbuffer.c @@ -125,6 +125,10 @@ static int igt_gpu_reloc(void *arg) intel_gt_pm_get(&eb.i915->gt); for_each_uabi_engine(eb.engine, eb.i915) { + if (intel_engine_requires_cmd_parser(eb.engine) || + intel_engine_using_cmd_parser(eb.engine)) + continue; + reloc_cache_init(&eb.reloc_cache, eb.i915); memset(map, POISON_INUSE, 4096); diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 98eb48c24c46..06024d321a1a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1977,6 +1977,21 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, if (drm_WARN_ON(&i915->drm, !engine)) return -EINVAL; + /* + * Due to d3_entered is used to indicate skipping PPGTT invalidation on + * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after + * vGPU reset if in resuming. + * In S0ix exit, the device power state also transite from D3 to D0 as + * S3 resume, but no vGPU reset (triggered by QEMU devic model). After + * S0ix exit, all engines continue to work. However the d3_entered + * remains set which will break next vGPU reset logic (miss the expected + * PPGTT invalidation). + * Engines can only work in D0. Thus the 1st elsp write gives GVT a + * chance to clear d3_entered. + */ + if (vgpu->d3_entered) + vgpu->d3_entered = false; + execlist = &vgpu->submission.execlist[engine->id]; execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data; diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 3992c25a191d..a3b4d99d64b9 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1145,19 +1145,41 @@ find_reg(const struct intel_engine_cs *engine, u32 addr) static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, unsigned long offset, unsigned long length, - void *dst, const void *src) + bool *needs_clflush_after) { - bool needs_clflush = - !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ); - - if (src) { - GEM_BUG_ON(!needs_clflush); - i915_unaligned_memcpy_from_wc(dst, src + offset, length); - } else { - struct scatterlist *sg; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; + int ret; + + ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush); + if (ret) + return ERR_PTR(ret); + + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + i915_gem_object_finish_access(dst_obj); + if (IS_ERR(dst)) + return dst; + + ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush); + if (ret) { + i915_gem_object_unpin_map(dst_obj); + return ERR_PTR(ret); + } + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && i915_has_memcpy_from_wc()) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_unaligned_memcpy_from_wc(dst, + src + offset, + length); + i915_gem_object_unpin_map(src_obj); + } + } + if (IS_ERR(src)) { + unsigned long x, n, remain; void *ptr; - unsigned int x, sg_ofs; - unsigned long remain; /* * We can avoid clflushing partial cachelines before the write @@ -1168,40 +1190,34 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * validate up to the end of the batch. */ remain = length; - if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) + if (dst_needs_clflush & CLFLUSH_BEFORE) remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - sg = i915_gem_object_get_sg(src_obj, offset >> PAGE_SHIFT, &sg_ofs, false); - - while (remain) { - unsigned long sg_max = sg->length >> PAGE_SHIFT; - - for (; remain && sg_ofs < sg_max; sg_ofs++) { - unsigned long len = min(remain, PAGE_SIZE - x); - void *map; - - map = kmap_atomic(nth_page(sg_page(sg), sg_ofs)); - if (needs_clflush) - drm_clflush_virt_range(map + x, len); - memcpy(ptr, map + x, len); - kunmap_atomic(map); - - ptr += len; - remain -= len; - x = 0; - } - - sg_ofs = 0; - sg = sg_next(sg); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); + + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + x, len); + memcpy(ptr, src + x, len); + kunmap_atomic(src); + + ptr += len; + remain -= len; + x = 0; } } + i915_gem_object_finish_access(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; + return dst; } @@ -1360,6 +1376,9 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, if (target_cmd_index == offset) return 0; + if (IS_ERR(jump_whitelist)) + return PTR_ERR(jump_whitelist); + if (!test_bit(target_cmd_index, jump_whitelist)) { DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", jump_target); @@ -1369,28 +1388,10 @@ static int check_bbstart(u32 *cmd, u32 offset, u32 length, return 0; } -/** - * intel_engine_cmd_parser_alloc_jump_whitelist() - preallocate jump whitelist for intel_engine_cmd_parser() - * @batch_length: length of the commands in batch_obj - * @trampoline: Whether jump trampolines are used. - * - * Preallocates a jump whitelist for parsing the cmd buffer in intel_engine_cmd_parser(). - * This has to be preallocated, because the command parser runs in signaling context, - * and may not allocate any memory. - * - * Return: NULL or pointer to a jump whitelist, or ERR_PTR() on failure. Use - * IS_ERR() to check for errors. Must bre freed() with kfree(). - * - * NULL is a valid value, meaning no allocation was required. - */ -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline) +static unsigned long *alloc_whitelist(u32 batch_length) { unsigned long *jmp; - if (trampoline) - return NULL; - /* * We expect batch_length to be less than 256KiB for known users, * i.e. we need at most an 8KiB bitmap allocation which should be @@ -1415,9 +1416,7 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * @batch_offset: byte offset in the batch at which execution starts * @batch_length: length of the commands in batch_obj * @shadow: validated copy of the batch buffer in question - * @jump_whitelist: buffer preallocated with intel_engine_cmd_parser_alloc_jump_whitelist() - * @shadow_map: mapping to @shadow vma - * @batch_map: mapping to @batch vma + * @trampoline: true if we need to trampoline into privileged execution * * Parses the specified batch buffer looking for privilege violations as * described in the overview. @@ -1425,21 +1424,21 @@ unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, * Return: non-zero if the parser finds violations or otherwise fails; -EACCES * if the batch appears legal but should use hardware parsing */ + int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map) + bool trampoline) { u32 *cmd, *batch_end, offset = 0; struct drm_i915_cmd_descriptor default_desc = noop_desc; const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; + unsigned long *jump_whitelist; u64 batch_addr, shadow_addr; int ret = 0; - bool trampoline = !jump_whitelist; GEM_BUG_ON(!IS_ALIGNED(batch_offset, sizeof(*cmd))); GEM_BUG_ON(!IS_ALIGNED(batch_length, sizeof(*cmd))); @@ -1447,8 +1446,18 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, batch->size)); GEM_BUG_ON(!batch_length); - cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length, - shadow_map, batch_map); + cmd = copy_batch(shadow->obj, batch->obj, + batch_offset, batch_length, + &needs_clflush_after); + if (IS_ERR(cmd)) { + DRM_DEBUG("CMD: Failed to copy batch\n"); + return PTR_ERR(cmd); + } + + jump_whitelist = NULL; + if (!trampoline) + /* Defer failure until attempted use */ + jump_whitelist = alloc_whitelist(batch_length); shadow_addr = gen8_canonical_addr(shadow->node.start); batch_addr = gen8_canonical_addr(batch->node.start + batch_offset); @@ -1549,6 +1558,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, i915_gem_object_flush_map(shadow->obj); + if (!IS_ERR_OR_NULL(jump_whitelist)) + kfree(jump_whitelist); + i915_gem_object_unpin_map(shadow->obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 38ff2fb89744..b30397b04529 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1906,17 +1906,12 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); -unsigned long *intel_engine_cmd_parser_alloc_jump_whitelist(u32 batch_length, - bool trampoline); - int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, unsigned long batch_offset, unsigned long batch_length, struct i915_vma *shadow, - unsigned long *jump_whitelist, - void *shadow_map, - const void *batch_map); + bool trampoline); #define I915_CMD_PARSER_TRAMPOLINE_SIZE 8 /* intel_device_info.c */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 1014c71cf7f5..37aef1308573 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1426,10 +1426,8 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } if (fence->context == rq->fence.context) continue; @@ -1527,10 +1525,8 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { - i915_sw_fence_set_error_once(&rq->submit, fence->error); + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) continue; - } /* * Requests on the same timeline are explicitly ordered, along diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 7eaa92fee421..e0a10f36acc1 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -325,7 +325,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->pipe_mask &= ~BIT(PIPE_C); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - } else if (HAS_DISPLAY(dev_priv) && GRAPHICS_VER(dev_priv) >= 9) { + } else if (HAS_DISPLAY(dev_priv) && DISPLAY_VER(dev_priv) >= 9) { u32 dfsm = intel_de_read(dev_priv, SKL_DFSM); if (dfsm & SKL_DFSM_PIPE_A_DISABLE) { @@ -340,7 +340,8 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->pipe_mask &= ~BIT(PIPE_C); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_C); } - if (GRAPHICS_VER(dev_priv) >= 12 && + + if (DISPLAY_VER(dev_priv) >= 12 && (dfsm & TGL_DFSM_PIPE_D_DISABLE)) { info->pipe_mask &= ~BIT(PIPE_D); info->cpu_transcoder_mask &= ~BIT(TRANSCODER_D); @@ -352,10 +353,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) if (dfsm & SKL_DFSM_DISPLAY_PM_DISABLE) info->display.has_fbc = 0; - if (GRAPHICS_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) + if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE)) info->display.has_dmc = 0; - if (GRAPHICS_VER(dev_priv) >= 10 && + if (DISPLAY_VER(dev_priv) >= 10 && (dfsm & CNL_DFSM_DISPLAY_DSC_DISABLE)) info->display.has_dsc = 0; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index d01c4c919504..704dace895cb 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -296,7 +296,7 @@ static const struct dpu_mdp_cfg sc7180_mdp[] = { static const struct dpu_mdp_cfg sm8250_mdp[] = { { .name = "top_0", .id = MDP_TOP, - .base = 0x0, .len = 0x45C, + .base = 0x0, .len = 0x494, .features = 0, .highest_bank_bit = 0x3, /* TODO: 2 for LP_DDR4 */ .clk_ctrls[DPU_CLK_CTRL_VIG0] = { diff --git a/drivers/gpu/drm/msm/dp/dp_catalog.c b/drivers/gpu/drm/msm/dp/dp_catalog.c index ca96e3514790..c0423e76eed7 100644 --- a/drivers/gpu/drm/msm/dp/dp_catalog.c +++ b/drivers/gpu/drm/msm/dp/dp_catalog.c @@ -771,6 +771,7 @@ int dp_catalog_panel_timing_cfg(struct dp_catalog *dp_catalog) dp_write_link(catalog, REG_DP_HSYNC_VSYNC_WIDTH_POLARITY, dp_catalog->width_blanking); dp_write_link(catalog, REG_DP_ACTIVE_HOR_VER, dp_catalog->dp_active); + dp_write_p0(catalog, MMSS_DP_INTF_CONFIG, 0); return 0; } diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index ee221d835fa0..eaddfd739885 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1526,7 +1526,7 @@ static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl) * running. Add the global reset just before disabling the * link clocks and core clocks. */ - ret = dp_ctrl_off(&ctrl->dp_ctrl); + ret = dp_ctrl_off_link_stream(&ctrl->dp_ctrl); if (ret) { DRM_ERROR("failed to disable DP controller\n"); return ret; diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 051c1be1de7e..867388a399ad 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -219,6 +219,7 @@ static int dp_display_bind(struct device *dev, struct device *master, goto end; } + dp->aux->drm_dev = drm; rc = dp_aux_register(dp->aux); if (rc) { DRM_ERROR("DRM DP AUX register failed\n"); @@ -1311,6 +1312,10 @@ static int dp_pm_resume(struct device *dev) else dp->dp_display.is_connected = false; + dp_display_handle_plugged_change(g_dp_display, + dp->dp_display.is_connected); + + mutex_unlock(&dp->event_mutex); return 0; diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index eed2a762e9dd..bcaddbba564d 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -142,6 +142,9 @@ static const struct iommu_flush_ops null_tlb_ops = { .tlb_add_page = msm_iommu_tlb_add_page, }; +static int msm_fault_handler(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags, void *arg); + struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) { struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(parent->dev); @@ -157,6 +160,13 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) if (!ttbr1_cfg) return ERR_PTR(-ENODEV); + /* + * Defer setting the fault handler until we have a valid adreno_smmu + * to avoid accidentially installing a GPU specific fault handler for + * the display's iommu + */ + iommu_set_fault_handler(iommu->domain, msm_fault_handler, iommu); + pagetable = kzalloc(sizeof(*pagetable), GFP_KERNEL); if (!pagetable) return ERR_PTR(-ENOMEM); @@ -300,7 +310,6 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain) iommu->domain = domain; msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU); - iommu_set_fault_handler(domain, msm_fault_handler, iommu); atomic_set(&iommu->pagetables, 0); diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 4f3a5357dd56..6d07e653f82d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -149,6 +149,8 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) */ if (bo->base.dev) drm_gem_object_release(&bo->base); + else + dma_resv_fini(&bo->base._resv); kfree(nvbo); } @@ -330,6 +332,10 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align, if (IS_ERR(nvbo)) return PTR_ERR(nvbo); + nvbo->bo.base.size = size; + dma_resv_init(&nvbo->bo.base._resv); + drm_vma_node_reset(&nvbo->bo.base.vma_node); + ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj); if (ret) return ret; diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index 2229f1af2ca8..46029c5610c8 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -447,7 +447,6 @@ static int rpi_touchscreen_remove(struct i2c_client *i2c) drm_panel_remove(&ts->base); mipi_dsi_device_unregister(ts->dsi); - kfree(ts->dsi); return 0; } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 21939d4352cf..1b80290c2b53 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -4166,7 +4166,7 @@ static const struct drm_display_mode yes_optoelectronics_ytc700tlag_05_201c_mode static const struct panel_desc yes_optoelectronics_ytc700tlag_05_201c = { .modes = &yes_optoelectronics_ytc700tlag_05_201c_mode, .num_modes = 1, - .bpc = 6, + .bpc = 8, .size = { .width = 154, .height = 90, diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 1b950b45cf4b..8d7fd65ccced 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -102,6 +102,9 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, return; } + if (!mem) + return; + man = ttm_manager_type(bdev, mem->mem_type); list_move_tail(&bo->lru, &man->lru[bo->priority]); diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 2f57f824e6db..763fa6f4e07d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -63,6 +63,9 @@ int ttm_mem_io_reserve(struct ttm_device *bdev, void ttm_mem_io_free(struct ttm_device *bdev, struct ttm_resource *mem) { + if (!mem) + return; + if (!mem->bus.offset && !mem->bus.addr) return; diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 5f31acec3ad7..74e3b460132b 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -44,6 +44,8 @@ static unsigned ttm_glob_use_count; struct ttm_global ttm_glob; EXPORT_SYMBOL(ttm_glob); +struct dentry *ttm_debugfs_root; + static void ttm_global_release(void) { struct ttm_global *glob = &ttm_glob; @@ -53,6 +55,7 @@ static void ttm_global_release(void) goto out; ttm_pool_mgr_fini(); + debugfs_remove(ttm_debugfs_root); __free_page(glob->dummy_read_page); memset(glob, 0, sizeof(*glob)); @@ -73,6 +76,13 @@ static int ttm_global_init(void) si_meminfo(&si); + ttm_debugfs_root = debugfs_create_dir("ttm", NULL); + if (IS_ERR(ttm_debugfs_root)) { + ret = PTR_ERR(ttm_debugfs_root); + ttm_debugfs_root = NULL; + goto out; + } + /* Limit the number of pages in the pool to about 50% of the total * system memory. */ @@ -100,6 +110,10 @@ static int ttm_global_init(void) debugfs_create_atomic_t("buffer_objects", 0444, ttm_debugfs_root, &glob->bo_count); out: + if (ret && ttm_debugfs_root) + debugfs_remove(ttm_debugfs_root); + if (ret) + --ttm_glob_use_count; mutex_unlock(&ttm_global_mutex); return ret; } diff --git a/drivers/gpu/drm/ttm/ttm_module.c b/drivers/gpu/drm/ttm/ttm_module.c index 997c458f68a9..7fcdef278c74 100644 --- a/drivers/gpu/drm/ttm/ttm_module.c +++ b/drivers/gpu/drm/ttm/ttm_module.c @@ -72,22 +72,6 @@ pgprot_t ttm_prot_from_caching(enum ttm_caching caching, pgprot_t tmp) return tmp; } -struct dentry *ttm_debugfs_root; - -static int __init ttm_init(void) -{ - ttm_debugfs_root = debugfs_create_dir("ttm", NULL); - return 0; -} - -static void __exit ttm_exit(void) -{ - debugfs_remove(ttm_debugfs_root); -} - -module_init(ttm_init); -module_exit(ttm_exit); - MODULE_AUTHOR("Thomas Hellstrom, Jerome Glisse"); MODULE_DESCRIPTION("TTM memory manager subsystem (for DRM device)"); MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index aab1b36ceb3c..c2876731ee2d 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -1857,38 +1857,46 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) vc4_hdmi_cec_update_clk_div(vc4_hdmi); if (vc4_hdmi->variant->external_irq_controller) { - ret = devm_request_threaded_irq(&pdev->dev, - platform_get_irq_byname(pdev, "cec-rx"), - vc4_cec_irq_handler_rx_bare, - vc4_cec_irq_handler_rx_thread, 0, - "vc4 hdmi cec rx", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"), + vc4_cec_irq_handler_rx_bare, + vc4_cec_irq_handler_rx_thread, 0, + "vc4 hdmi cec rx", vc4_hdmi); if (ret) goto err_delete_cec_adap; - ret = devm_request_threaded_irq(&pdev->dev, - platform_get_irq_byname(pdev, "cec-tx"), - vc4_cec_irq_handler_tx_bare, - vc4_cec_irq_handler_tx_thread, 0, - "vc4 hdmi cec tx", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-tx"), + vc4_cec_irq_handler_tx_bare, + vc4_cec_irq_handler_tx_thread, 0, + "vc4 hdmi cec tx", vc4_hdmi); if (ret) - goto err_delete_cec_adap; + goto err_remove_cec_rx_handler; } else { HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff); - ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0), - vc4_cec_irq_handler, - vc4_cec_irq_handler_thread, 0, - "vc4 hdmi cec", vc4_hdmi); + ret = request_threaded_irq(platform_get_irq(pdev, 0), + vc4_cec_irq_handler, + vc4_cec_irq_handler_thread, 0, + "vc4 hdmi cec", vc4_hdmi); if (ret) goto err_delete_cec_adap; } ret = cec_register_adapter(vc4_hdmi->cec_adap, &pdev->dev); if (ret < 0) - goto err_delete_cec_adap; + goto err_remove_handlers; return 0; +err_remove_handlers: + if (vc4_hdmi->variant->external_irq_controller) + free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi); + else + free_irq(platform_get_irq(pdev, 0), vc4_hdmi); + +err_remove_cec_rx_handler: + if (vc4_hdmi->variant->external_irq_controller) + free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi); + err_delete_cec_adap: cec_delete_adapter(vc4_hdmi->cec_adap); @@ -1897,6 +1905,15 @@ err_delete_cec_adap: static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi) { + struct platform_device *pdev = vc4_hdmi->pdev; + + if (vc4_hdmi->variant->external_irq_controller) { + free_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_hdmi); + free_irq(platform_get_irq_byname(pdev, "cec-tx"), vc4_hdmi); + } else { + free_irq(platform_get_irq(pdev, 0), vc4_hdmi); + } + cec_unregister_adapter(vc4_hdmi->cec_adap); } #else diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 160554903ef9..76937f716fbe 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -576,7 +576,7 @@ config HID_LOGITECH_HIDPP depends on HID_LOGITECH select POWER_SUPPLY help - Support for Logitech devices relyingon the HID++ Logitech specification + Support for Logitech devices relying on the HID++ Logitech specification Say Y if you want support for Logitech devices relying on the HID++ specification. Such devices are the various Logitech Touchpads (T650, diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index 96e2577fa37e..8d68796aa905 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -58,7 +58,7 @@ static void amd_stop_sensor_v2(struct amd_mp2_dev *privdata, u16 sensor_idx) cmd_base.cmd_v2.sensor_id = sensor_idx; cmd_base.cmd_v2.length = 16; - writeq(0x0, privdata->mmio + AMD_C2P_MSG2); + writeq(0x0, privdata->mmio + AMD_C2P_MSG1); writel(cmd_base.ul, privdata->mmio + AMD_C2P_MSG0); } diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6b8f0d004d34..dc6bd4299c54 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -501,6 +501,8 @@ static const struct hid_device_id apple_devices[] = { APPLE_RDESC_JIS }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), .driver_data = APPLE_HAS_FN }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), + .driver_data = APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), .driver_data = APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index fca8fc78a78a..fb807c8e989b 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -485,9 +485,6 @@ static void asus_kbd_backlight_set(struct led_classdev *led_cdev, { struct asus_kbd_leds *led = container_of(led_cdev, struct asus_kbd_leds, cdev); - if (led->brightness == brightness) - return; - led->brightness = brightness; schedule_work(&led->work); } diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index f43a8406cb9a..4ef1c3b8094e 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -742,7 +742,7 @@ static int ft260_is_interface_enabled(struct hid_device *hdev) int ret; ret = ft260_get_system_config(hdev, &cfg); - if (ret) + if (ret < 0) return ret; ft260_dbg("interface: 0x%02x\n", interface); @@ -754,23 +754,16 @@ static int ft260_is_interface_enabled(struct hid_device *hdev) switch (cfg.chip_mode) { case FT260_MODE_ALL: case FT260_MODE_BOTH: - if (interface == 1) { + if (interface == 1) hid_info(hdev, "uart interface is not supported\n"); - return 0; - } - ret = 1; + else + ret = 1; break; case FT260_MODE_UART: - if (interface == 0) { - hid_info(hdev, "uart is unsupported on interface 0\n"); - ret = 0; - } + hid_info(hdev, "uart interface is not supported\n"); break; case FT260_MODE_I2C: - if (interface == 1) { - hid_info(hdev, "i2c is unsupported on interface 1\n"); - ret = 0; - } + ret = 1; break; } return ret; @@ -785,7 +778,7 @@ static int ft260_byte_show(struct hid_device *hdev, int id, u8 *cfg, int len, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%hi\n", *field); + return scnprintf(buf, PAGE_SIZE, "%d\n", *field); } static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len, @@ -797,7 +790,7 @@ static int ft260_word_show(struct hid_device *hdev, int id, u8 *cfg, int len, if (ret < 0) return ret; - return scnprintf(buf, PAGE_SIZE, "%hi\n", le16_to_cpu(*field)); + return scnprintf(buf, PAGE_SIZE, "%d\n", le16_to_cpu(*field)); } #define FT260_ATTR_SHOW(name, reptype, id, type, func) \ @@ -1004,11 +997,9 @@ err_hid_stop: static void ft260_remove(struct hid_device *hdev) { - int ret; struct ft260_device *dev = hid_get_drvdata(hdev); - ret = ft260_is_interface_enabled(hdev); - if (ret <= 0) + if (!dev) return; sysfs_remove_group(&hdev->dev.kobj, &ft260_attr_group); diff --git a/drivers/hid/intel-ish-hid/ishtp-hid-client.c b/drivers/hid/intel-ish-hid/ishtp-hid-client.c index 6b1fa971b33e..91bf4d01e91a 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid-client.c +++ b/drivers/hid/intel-ish-hid/ishtp-hid-client.c @@ -784,6 +784,17 @@ static void hid_ishtp_cl_reset_handler(struct work_struct *work) } } +static void hid_ishtp_cl_resume_handler(struct work_struct *work) +{ + struct ishtp_cl_data *client_data = container_of(work, struct ishtp_cl_data, resume_work); + struct ishtp_cl *hid_ishtp_cl = client_data->hid_ishtp_cl; + + if (ishtp_wait_resume(ishtp_get_ishtp_device(hid_ishtp_cl))) { + client_data->suspended = false; + wake_up_interruptible(&client_data->ishtp_resume_wait); + } +} + ishtp_print_log ishtp_hid_print_trace; /** @@ -822,6 +833,8 @@ static int hid_ishtp_cl_probe(struct ishtp_cl_device *cl_device) init_waitqueue_head(&client_data->ishtp_resume_wait); INIT_WORK(&client_data->work, hid_ishtp_cl_reset_handler); + INIT_WORK(&client_data->resume_work, hid_ishtp_cl_resume_handler); + ishtp_hid_print_trace = ishtp_trace_callback(cl_device); @@ -921,7 +934,7 @@ static int hid_ishtp_cl_resume(struct device *device) hid_ishtp_trace(client_data, "%s hid_ishtp_cl %p\n", __func__, hid_ishtp_cl); - client_data->suspended = false; + schedule_work(&client_data->resume_work); return 0; } diff --git a/drivers/hid/intel-ish-hid/ishtp-hid.h b/drivers/hid/intel-ish-hid/ishtp-hid.h index f88443a7d935..6a5cc11aefd8 100644 --- a/drivers/hid/intel-ish-hid/ishtp-hid.h +++ b/drivers/hid/intel-ish-hid/ishtp-hid.h @@ -135,6 +135,7 @@ struct ishtp_cl_data { int multi_packet_cnt; struct work_struct work; + struct work_struct resume_work; struct ishtp_cl_device *cl_device; }; diff --git a/drivers/hid/intel-ish-hid/ishtp/bus.c b/drivers/hid/intel-ish-hid/ishtp/bus.c index f0802b047ed8..aa2c51624012 100644 --- a/drivers/hid/intel-ish-hid/ishtp/bus.c +++ b/drivers/hid/intel-ish-hid/ishtp/bus.c @@ -314,13 +314,6 @@ static int ishtp_cl_device_resume(struct device *dev) if (!device) return 0; - /* - * When ISH needs hard reset, it is done asynchrnously, hence bus - * resume will be called before full ISH resume - */ - if (device->ishtp_dev->resume_flag) - return 0; - driver = to_ishtp_cl_driver(dev->driver); if (driver && driver->driver.pm) { if (driver->driver.pm->resume) @@ -850,6 +843,28 @@ struct device *ishtp_device(struct ishtp_cl_device *device) EXPORT_SYMBOL(ishtp_device); /** + * ishtp_wait_resume() - Wait for IPC resume + * + * Wait for IPC resume + * + * Return: resume complete or not + */ +bool ishtp_wait_resume(struct ishtp_device *dev) +{ + /* 50ms to get resume response */ + #define WAIT_FOR_RESUME_ACK_MS 50 + + /* Waiting to get resume response */ + if (dev->resume_flag) + wait_event_interruptible_timeout(dev->resume_wait, + !dev->resume_flag, + msecs_to_jiffies(WAIT_FOR_RESUME_ACK_MS)); + + return (!dev->resume_flag); +} +EXPORT_SYMBOL_GPL(ishtp_wait_resume); + +/** * ishtp_get_pci_device() - Return PCI device dev pointer * This interface is used to return PCI device pointer * from ishtp_cl_device instance. diff --git a/drivers/hid/usbhid/Kconfig b/drivers/hid/usbhid/Kconfig index dcf3a235870f..7c2032f7f44d 100644 --- a/drivers/hid/usbhid/Kconfig +++ b/drivers/hid/usbhid/Kconfig @@ -38,7 +38,7 @@ config USB_HIDDEV help Say Y here if you want to support HID devices (from the USB specification standpoint) that aren't strictly user interface - devices, like monitor controls and Uninterruptable Power Supplies. + devices, like monitor controls and Uninterruptible Power Supplies. This module supports these devices separately using a separate event interface on /dev/usb/hiddevX (char 180:96 to 180:111). diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 81d7d12bcf34..81ba642adcb7 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2548,6 +2548,9 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, int slot; slot = input_mt_get_slot_by_key(input, hid_data->id); + if (slot < 0) + return; + input_mt_slot(input, slot); input_mt_report_slot_state(input, MT_TOOL_FINGER, prox); } @@ -3831,7 +3834,7 @@ int wacom_setup_touch_input_capabilities(struct input_dev *input_dev, wacom_wac->shared->touch->product == 0xF6) { input_dev->evbit[0] |= BIT_MASK(EV_SW); __set_bit(SW_MUTE_DEVICE, input_dev->swbit); - wacom_wac->shared->has_mute_touch_switch = true; + wacom_wac->has_mute_touch_switch = true; } fallthrough; diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 6d5014ebaab5..a6ea1eb1394e 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -635,8 +635,8 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id) status = readb(i2c->base + MPC_I2C_SR); if (status & CSR_MIF) { - /* Read again to allow register to stabilise */ - status = readb(i2c->base + MPC_I2C_SR); + /* Wait up to 100us for transfer to properly complete */ + readb_poll_timeout(i2c->base + MPC_I2C_SR, status, !(status & CSR_MCF), 0, 100); writeb(0, i2c->base + MPC_I2C_SR); mpc_i2c_do_intr(i2c, status); return IRQ_HANDLED; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index d5674026512a..a8688a92c760 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -120,6 +120,7 @@ static int bnxt_re_setup_chip_ctx(struct bnxt_re_dev *rdev, u8 wqe_mode) if (!chip_ctx) return -ENOMEM; chip_ctx->chip_num = bp->chip_num; + chip_ctx->hw_stats_size = bp->hw_ring_stats_size; rdev->chip_ctx = chip_ctx; /* rest members to follow eventually */ @@ -550,6 +551,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, dma_addr_t dma_map, u32 *fw_stats_ctx_id) { + struct bnxt_qplib_chip_ctx *chip_ctx = rdev->chip_ctx; struct hwrm_stat_ctx_alloc_output resp = {0}; struct hwrm_stat_ctx_alloc_input req = {0}; struct bnxt_en_dev *en_dev = rdev->en_dev; @@ -566,7 +568,7 @@ static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev, bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(1000); req.stats_dma_addr = cpu_to_le64(dma_map); - req.stats_dma_length = cpu_to_le16(sizeof(struct ctx_hw_stats_ext)); + req.stats_dma_length = cpu_to_le16(chip_ctx->hw_stats_size); req.stat_ctx_flags = STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE; bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 17f0701b3cee..44282a8cdd4f 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -56,6 +56,7 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, struct bnxt_qplib_stats *stats); static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats); /* PBL */ @@ -559,7 +560,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, goto fail; stats_alloc: /* Stats */ - rc = bnxt_qplib_alloc_stats_ctx(res->pdev, &ctx->stats); + rc = bnxt_qplib_alloc_stats_ctx(res->pdev, res->cctx, &ctx->stats); if (rc) goto fail; @@ -889,15 +890,12 @@ static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev, } static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev, + struct bnxt_qplib_chip_ctx *cctx, struct bnxt_qplib_stats *stats) { memset(stats, 0, sizeof(*stats)); stats->fw_id = -1; - /* 128 byte aligned context memory is required only for 57500. - * However making this unconditional, it does not harm previous - * generation. - */ - stats->size = ALIGN(sizeof(struct ctx_hw_stats), 128); + stats->size = cctx->hw_stats_size; stats->dma = dma_alloc_coherent(&pdev->dev, stats->size, &stats->dma_map, GFP_KERNEL); if (!stats->dma) { diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index c291f495ae91..91031502e8f5 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -54,6 +54,7 @@ struct bnxt_qplib_chip_ctx { u16 chip_num; u8 chip_rev; u8 chip_metal; + u16 hw_stats_size; struct bnxt_qplib_drv_modes modes; }; diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index b1023a7d0bd1..f1e5515256e0 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -2845,7 +2845,7 @@ static u64 irdma_sc_decode_fpm_commit(struct irdma_sc_dev *dev, __le64 *buf, * parses fpm commit info and copy base value * of hmc objects in hmc_info */ -static enum irdma_status_code +static void irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, struct irdma_hmc_obj_info *info, u32 *sd) { @@ -2915,7 +2915,6 @@ irdma_sc_parse_fpm_commit_buf(struct irdma_sc_dev *dev, __le64 *buf, else *sd = (u32)(size >> 21); - return 0; } /** @@ -4187,11 +4186,9 @@ enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, * @dev: sc device struct * @count: allocate count */ -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count) { writel(count, dev->hw_regs[IRDMA_AEQALLOC]); - - return 0; } /** @@ -4434,9 +4431,9 @@ static enum irdma_status_code irdma_sc_cfg_iw_fpm(struct irdma_sc_dev *dev, ret_code = irdma_sc_commit_fpm_val(dev->cqp, 0, hmc_info->hmc_fn_id, &commit_fpm_mem, true, wait_type); if (!ret_code) - ret_code = irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, - hmc_info->hmc_obj, - &hmc_info->sd_table.sd_cnt); + irdma_sc_parse_fpm_commit_buf(dev, dev->fpm_commit_buf, + hmc_info->hmc_obj, + &hmc_info->sd_table.sd_cnt); print_hex_dump_debug("HMC: COMMIT FPM BUFFER", DUMP_PREFIX_OFFSET, 16, 8, commit_fpm_mem.va, IRDMA_COMMIT_FPM_BUF_SIZE, false); diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 7afb8a6a0526..00de5ee9a260 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1920,7 +1920,7 @@ enum irdma_status_code irdma_ctrl_init_hw(struct irdma_pci_f *rf) * irdma_set_hw_rsrc - set hw memory resources. * @rf: RDMA PCI function */ -static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) +static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) { rf->allocated_qps = (void *)(rf->mem_rsrc + (sizeof(struct irdma_arp_entry) * rf->arp_table_size)); @@ -1937,8 +1937,6 @@ static u32 irdma_set_hw_rsrc(struct irdma_pci_f *rf) spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); spin_lock_init(&rf->qh_list_lock); - - return 0; } /** @@ -2000,9 +1998,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; - ret = irdma_set_hw_rsrc(rf); - if (ret) - goto set_hw_rsrc_fail; + irdma_set_hw_rsrc(rf); set_bit(0, rf->allocated_mrs); set_bit(0, rf->allocated_qps); @@ -2025,9 +2021,6 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; -set_hw_rsrc_fail: - kfree(rf->mem_rsrc); - rf->mem_rsrc = NULL; mem_rsrc_kzalloc_fail: kfree(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; diff --git a/drivers/infiniband/hw/irdma/main.c b/drivers/infiniband/hw/irdma/main.c index ea59432351fb..51a41359e0b4 100644 --- a/drivers/infiniband/hw/irdma/main.c +++ b/drivers/infiniband/hw/irdma/main.c @@ -215,10 +215,10 @@ static void irdma_remove(struct auxiliary_device *aux_dev) pr_debug("INIT: Gen2 PF[%d] device remove success\n", PCI_FUNC(pf->pdev->devfn)); } -static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf) +static void irdma_fill_device_info(struct irdma_device *iwdev, struct ice_pf *pf, + struct ice_vsi *vsi) { struct irdma_pci_f *rf = iwdev->rf; - struct ice_vsi *vsi = ice_get_main_vsi(pf); rf->cdev = pf; rf->gen_ops.register_qset = irdma_lan_register_qset; @@ -253,12 +253,15 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ struct iidc_auxiliary_dev, adev); struct ice_pf *pf = iidc_adev->pf; + struct ice_vsi *vsi = ice_get_main_vsi(pf); struct iidc_qos_params qos_info = {}; struct irdma_device *iwdev; struct irdma_pci_f *rf; struct irdma_l2params l2params = {}; int err; + if (!vsi) + return -EIO; iwdev = ib_alloc_device(irdma_device, ibdev); if (!iwdev) return -ENOMEM; @@ -268,7 +271,7 @@ static int irdma_probe(struct auxiliary_device *aux_dev, const struct auxiliary_ return -ENOMEM; } - irdma_fill_device_info(iwdev, pf); + irdma_fill_device_info(iwdev, pf, vsi); rf = iwdev->rf; if (irdma_ctrl_init_hw(rf)) { diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index 7387b83e826d..874bc25a938b 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -1222,8 +1222,7 @@ enum irdma_status_code irdma_sc_aeq_init(struct irdma_sc_aeq *aeq, struct irdma_aeq_init_info *info); enum irdma_status_code irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, struct irdma_aeqe_info *info); -enum irdma_status_code irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, - u32 count); +void irdma_sc_repost_aeq_entries(struct irdma_sc_dev *dev, u32 count); void irdma_sc_pd_init(struct irdma_sc_dev *dev, struct irdma_sc_pd *pd, u32 pd_id, int abi_ver); diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index a6d52c20091c..5fb92de1f015 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -931,7 +931,7 @@ enum irdma_status_code irdma_uk_mw_bind(struct irdma_qp_uk *qp, enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, struct irdma_post_rq_info *info) { - u32 total_size = 0, wqe_idx, i, byte_off; + u32 wqe_idx, i, byte_off; u32 addl_frag_cnt; __le64 *wqe; u64 hdr; @@ -939,9 +939,6 @@ enum irdma_status_code irdma_uk_post_receive(struct irdma_qp_uk *qp, if (qp->max_rq_frag_cnt < info->num_sges) return IRDMA_ERR_INVALID_FRAG_COUNT; - for (i = 0; i < info->num_sges; i++) - total_size += info->sg_list[i].len; - wqe = irdma_qp_get_next_recv_wqe(qp, &wqe_idx); if (!wqe) return IRDMA_ERR_QP_TOOMANY_WRS_POSTED; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9712f6902ba8..717147ed0519 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -557,7 +557,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) * @iwqp: qp ptr * @init_info: initialize info to return */ -static int irdma_setup_virt_qp(struct irdma_device *iwdev, +static void irdma_setup_virt_qp(struct irdma_device *iwdev, struct irdma_qp *iwqp, struct irdma_qp_init_info *init_info) { @@ -574,8 +574,6 @@ static int irdma_setup_virt_qp(struct irdma_device *iwdev, init_info->sq_pa = qpmr->sq_pbl.addr; init_info->rq_pa = qpmr->rq_pbl.addr; } - - return 0; } /** @@ -914,7 +912,7 @@ static struct ib_qp *irdma_create_qp(struct ib_pd *ibpd, } } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - err_code = irdma_setup_virt_qp(iwdev, iwqp, &init_info); + irdma_setup_virt_qp(iwdev, iwqp, &init_info); } else { init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; err_code = irdma_setup_kmode_qp(iwdev, iwqp, &init_info, init_attr); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 7abeb576b3c5..18b55d2eba40 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -997,7 +997,7 @@ int mlx5_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, MLX5_IB_CQ_PR_FLAGS_CQE_128_PAD)); MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); MLX5_SET(cqc, cqc, uar_page, index); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); if (cq->create_flags & IB_UVERBS_CQ_FLAGS_IGNORE_OVERRUN) MLX5_SET(cqc, cqc, oi, 1); diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eb9b0a2707f8..e994aefcc40f 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1437,11 +1437,10 @@ out: rcu_read_unlock(); } -static bool is_apu_thread_cq(struct mlx5_ib_dev *dev, const void *in) +static bool is_apu_cq(struct mlx5_ib_dev *dev, const void *in) { if (!MLX5_CAP_GEN(dev->mdev, apu) || - !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), - apu_thread_cq)) + !MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), apu_cq)) return false; return true; @@ -1501,7 +1500,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in, cmd_in_len, cmd_out, cmd_out_len); } else if (opcode == MLX5_CMD_OP_CREATE_CQ && - !is_apu_thread_cq(dev, cmd_in)) { + !is_apu_cq(dev, cmd_in)) { obj->flags |= DEVX_OBJ_FLAGS_CQ; obj->core_cq.comp = devx_cq_comp; err = mlx5_core_create_cq(dev->mdev, &obj->core_cq, diff --git a/drivers/infiniband/hw/mlx5/ib_rep.c b/drivers/infiniband/hw/mlx5/ib_rep.c index b25e0b33a11a..52821485371a 100644 --- a/drivers/infiniband/hw/mlx5/ib_rep.c +++ b/drivers/infiniband/hw/mlx5/ib_rep.c @@ -8,13 +8,15 @@ #include "srq.h" static int -mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) +mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, + struct mlx5_eswitch_rep *rep, + int vport_index) { struct mlx5_ib_dev *ibdev; - int vport_index; ibdev = mlx5_eswitch_uplink_get_proto_dev(dev->priv.eswitch, REP_IB); - vport_index = rep->vport_index; + if (!ibdev) + return -EINVAL; ibdev->port[vport_index].rep = rep; rep->rep_data[REP_IB].priv = ibdev; @@ -26,19 +28,39 @@ mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) return 0; } +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); + static int mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { u32 num_ports = mlx5_eswitch_get_total_vports(dev); const struct mlx5_ib_profile *profile; + struct mlx5_core_dev *peer_dev; struct mlx5_ib_dev *ibdev; + u32 peer_num_ports; int vport_index; int ret; + vport_index = rep->vport_index; + + if (mlx5_lag_is_shared_fdb(dev)) { + peer_dev = mlx5_lag_get_peer_mdev(dev); + peer_num_ports = mlx5_eswitch_get_total_vports(peer_dev); + if (mlx5_lag_is_master(dev)) { + /* Only 1 ib port is the representor for both uplinks */ + num_ports += peer_num_ports - 1; + } else { + if (rep->vport == MLX5_VPORT_UPLINK) + return 0; + vport_index += peer_num_ports; + dev = peer_dev; + } + } + if (rep->vport == MLX5_VPORT_UPLINK) profile = &raw_eth_profile; else - return mlx5_ib_set_vport_rep(dev, rep); + return mlx5_ib_set_vport_rep(dev, rep, vport_index); ibdev = ib_alloc_device(mlx5_ib_dev, ib_dev); if (!ibdev) @@ -64,6 +86,8 @@ mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) goto fail_add; rep->rep_data[REP_IB].priv = ibdev; + if (mlx5_lag_is_shared_fdb(dev)) + mlx5_ib_register_peer_vport_reps(dev); return 0; @@ -82,18 +106,45 @@ static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) static void mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) { + struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(rep->esw); struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); + int vport_index = rep->vport_index; struct mlx5_ib_port *port; - port = &dev->port[rep->vport_index]; + if (WARN_ON(!mdev)) + return; + + if (mlx5_lag_is_shared_fdb(mdev) && + !mlx5_lag_is_master(mdev)) { + struct mlx5_core_dev *peer_mdev; + + if (rep->vport == MLX5_VPORT_UPLINK) + return; + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + vport_index += mlx5_eswitch_get_total_vports(peer_mdev); + } + + if (!dev) + return; + + port = &dev->port[vport_index]; write_lock(&port->roce.netdev_lock); port->roce.netdev = NULL; write_unlock(&port->roce.netdev_lock); rep->rep_data[REP_IB].priv = NULL; port->rep = NULL; - if (rep->vport == MLX5_VPORT_UPLINK) + if (rep->vport == MLX5_VPORT_UPLINK) { + struct mlx5_core_dev *peer_mdev; + struct mlx5_eswitch *esw; + + if (mlx5_lag_is_shared_fdb(mdev)) { + peer_mdev = mlx5_lag_get_peer_mdev(mdev); + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_unregister_vport_reps(esw, REP_IB); + } __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); + } } static const struct mlx5_eswitch_rep_ops rep_ops = { @@ -102,6 +153,18 @@ static const struct mlx5_eswitch_rep_ops rep_ops = { .get_proto_dev = mlx5_ib_rep_to_dev, }; +static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_mdev = mlx5_lag_get_peer_mdev(mdev); + struct mlx5_eswitch *esw; + + if (!peer_mdev) + return; + + esw = peer_mdev->priv.eswitch; + mlx5_eswitch_register_vport_reps(esw, &rep_ops, REP_IB); +} + struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, u16 vport_num) { @@ -123,7 +186,7 @@ struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, rep = dev->port[port - 1].rep; - return mlx5_eswitch_add_send_to_vport_rule(esw, rep, sq->base.mqp.qpn); + return mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sq->base.mqp.qpn); } static int mlx5r_rep_probe(struct auxiliary_device *adev, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 094c976b1eed..ae05e143401c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -126,6 +126,7 @@ static int get_port_state(struct ib_device *ibdev, static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, struct net_device *ndev, + struct net_device *upper, u32 *port_num) { struct net_device *rep_ndev; @@ -137,6 +138,14 @@ static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev, if (!port->rep) continue; + if (upper == ndev && port->rep->vport == MLX5_VPORT_UPLINK) { + *port_num = i + 1; + return &port->roce; + } + + if (upper && port->rep->vport == MLX5_VPORT_UPLINK) + continue; + read_lock(&port->roce.netdev_lock); rep_ndev = mlx5_ib_get_rep_netdev(port->rep->esw, port->rep->vport); @@ -196,11 +205,12 @@ static int mlx5_netdev_event(struct notifier_block *this, } if (ibdev->is_rep) - roce = mlx5_get_rep_roce(ibdev, ndev, &port_num); + roce = mlx5_get_rep_roce(ibdev, ndev, upper, &port_num); if (!roce) return NOTIFY_DONE; - if ((upper == ndev || (!upper && ndev == roce->netdev)) - && ibdev->ib_active) { + if ((upper == ndev || + ((!upper || ibdev->is_rep) && ndev == roce->netdev)) && + ibdev->ib_active) { struct ib_event ibev = { }; enum ib_port_state port_state; @@ -3012,7 +3022,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) struct mlx5_flow_table *ft; int err; - if (!ns || !mlx5_lag_is_roce(mdev)) + if (!ns || !mlx5_lag_is_active(mdev)) return 0; err = mlx5_cmd_create_vport_lag(mdev); @@ -3074,9 +3084,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - return err; + if (!dev->is_rep && dev->profile != &raw_eth_profile) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; + } err = mlx5_eth_lag_init(dev); if (err) @@ -3085,7 +3097,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); return err; } @@ -3093,7 +3106,8 @@ err_disable_roce: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + if (!dev->is_rep && dev->profile != &raw_eth_profile) + mlx5_nic_vport_disable_roce(dev->mdev); } static int mlx5_ib_rn_get_params(struct ib_device *device, u32 port_num, @@ -3950,12 +3964,7 @@ static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) /* Register only for native ports */ err = mlx5_add_netdev_notifier(dev, port_num); - if (err || dev->is_rep || !mlx5_is_roce_init_enabled(mdev)) - /* - * We don't enable ETH interface for - * 1. IB representors - * 2. User disabled ROCE through devlink interface - */ + if (err) return err; err = mlx5_enable_eth(dev); @@ -3980,8 +3989,7 @@ static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - if (!dev->is_rep) - mlx5_disable_eth(dev); + mlx5_disable_eth(dev); port_num = mlx5_core_native_port_num(dev->mdev) - 1; mlx5_remove_netdev_notifier(dev, port_num); @@ -4037,7 +4045,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) { const char *name; - if (!mlx5_lag_is_roce(dev->mdev)) + if (!mlx5_lag_is_active(dev->mdev)) name = "mlx5_%d"; else name = "mlx5_bond_%d"; diff --git a/drivers/infiniband/hw/mlx5/std_types.c b/drivers/infiniband/hw/mlx5/std_types.c index c0ddf7b3c6e2..bbfcce3bdc84 100644 --- a/drivers/infiniband/hw/mlx5/std_types.c +++ b/drivers/infiniband/hw/mlx5/std_types.c @@ -114,14 +114,18 @@ out: static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num, struct mlx5_ib_uapi_query_port *info) { - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_eswitch_rep *rep; + struct mlx5_core_dev *mdev; int err; rep = dev->port[port_num - 1].rep; if (!rep) return -EOPNOTSUPP; + mdev = mlx5_eswitch_get_core_dev(rep->esw); + if (!mdev) + return -EINVAL; + info->vport = rep->vport; info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT; @@ -138,9 +142,9 @@ static int fill_switchdev_info(struct mlx5_ib_dev *dev, u32 port_num, if (err) return err; - if (mlx5_eswitch_vport_match_metadata_enabled(mdev->priv.eswitch)) { + if (mlx5_eswitch_vport_match_metadata_enabled(rep->esw)) { info->reg_c0.value = mlx5_eswitch_get_vport_metadata_for_match( - mdev->priv.eswitch, rep->vport); + rep->esw, rep->vport); info->reg_c0.mask = mlx5_eswitch_get_vport_metadata_mask(); info->flags |= MLX5_IB_UAPI_QUERY_PORT_VPORT_REG_C0; } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 6aabcb4de235..be4bcb420fab 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -113,13 +113,14 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; + int i; umem = ib_umem_get(pd->ibpd.device, start, length, access); if (IS_ERR(umem)) { - pr_warn("err %d from rxe_umem_get\n", - (int)PTR_ERR(umem)); + pr_warn("%s: Unable to pin memory region err = %d\n", + __func__, (int)PTR_ERR(umem)); err = PTR_ERR(umem); - goto err1; + goto err_out; } mr->umem = umem; @@ -129,9 +130,9 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("err %d from rxe_mr_alloc\n", err); - ib_umem_release(umem); - goto err1; + pr_warn("%s: Unable to allocate memory for map\n", + __func__); + goto err_release_umem; } mr->page_shift = PAGE_SHIFT; @@ -151,10 +152,10 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, vaddr = page_address(sg_page_iter_page(&sg_iter)); if (!vaddr) { - pr_warn("null vaddr\n"); - ib_umem_release(umem); + pr_warn("%s: Unable to get virtual address\n", + __func__); err = -ENOMEM; - goto err1; + goto err_cleanup_map; } buf->addr = (uintptr_t)vaddr; @@ -177,7 +178,13 @@ int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, return 0; -err1: +err_cleanup_map: + for (i = 0; i < mr->num_map; i++) + kfree(mr->map[i]); + kfree(mr->map); +err_release_umem: + ib_umem_release(umem); +err_out: return err; } diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index 02281d13505f..508ac295eb06 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -1573,6 +1573,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, struct media_request *req) { struct vb2_buffer *vb; + enum vb2_buffer_state orig_state; int ret; if (q->error) { @@ -1673,6 +1674,7 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, * Add to the queued buffers list, a buffer will stay on it until * dequeued in dqbuf. */ + orig_state = vb->state; list_add_tail(&vb->queued_entry, &q->queued_list); q->queued_count++; q->waiting_for_buffers = false; @@ -1703,8 +1705,17 @@ int vb2_core_qbuf(struct vb2_queue *q, unsigned int index, void *pb, if (q->streaming && !q->start_streaming_called && q->queued_count >= q->min_buffers_needed) { ret = vb2_start_streaming(q); - if (ret) + if (ret) { + /* + * Since vb2_core_qbuf will return with an error, + * we should return it to state DEQUEUED since + * the error indicates that the buffer wasn't queued. + */ + list_del(&vb->queued_entry); + q->queued_count--; + vb->state = orig_state; return ret; + } } dprintk(q, 2, "qbuf of buffer %d succeeded\n", vb->index); diff --git a/drivers/media/pci/intel/ipu3/cio2-bridge.c b/drivers/media/pci/intel/ipu3/cio2-bridge.c index 4657e99df033..59a36f922675 100644 --- a/drivers/media/pci/intel/ipu3/cio2-bridge.c +++ b/drivers/media/pci/intel/ipu3/cio2-bridge.c @@ -173,10 +173,8 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, int ret; for_each_acpi_dev_match(adev, cfg->hid, NULL, -1) { - if (!adev->status.enabled) { - acpi_dev_put(adev); + if (!adev->status.enabled) continue; - } if (bridge->n_sensors >= CIO2_NUM_PORTS) { acpi_dev_put(adev); @@ -185,7 +183,6 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, } sensor = &bridge->sensors[bridge->n_sensors]; - sensor->adev = adev; strscpy(sensor->name, cfg->hid, sizeof(sensor->name)); ret = cio2_bridge_read_acpi_buffer(adev, "SSDB", @@ -215,6 +212,7 @@ static int cio2_bridge_connect_sensor(const struct cio2_sensor_config *cfg, goto err_free_swnodes; } + sensor->adev = acpi_dev_get(adev); adev->fwnode.secondary = fwnode; dev_info(&cio2->dev, "Found supported sensor %s\n", diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c index 07f342db6701..7481f553f959 100644 --- a/drivers/media/pci/ngene/ngene-core.c +++ b/drivers/media/pci/ngene/ngene-core.c @@ -385,7 +385,7 @@ static int ngene_command_config_free_buf(struct ngene *dev, u8 *config) com.cmd.hdr.Opcode = CMD_CONFIGURE_FREE_BUFFER; com.cmd.hdr.Length = 6; - memcpy(&com.cmd.ConfigureBuffers.config, config, 6); + memcpy(&com.cmd.ConfigureFreeBuffers.config, config, 6); com.in_len = 6; com.out_len = 0; diff --git a/drivers/media/pci/ngene/ngene.h b/drivers/media/pci/ngene/ngene.h index 84f04e0e0cb9..3d296f1998a1 100644 --- a/drivers/media/pci/ngene/ngene.h +++ b/drivers/media/pci/ngene/ngene.h @@ -407,12 +407,14 @@ enum _BUFFER_CONFIGS { struct FW_CONFIGURE_FREE_BUFFERS { struct FW_HEADER hdr; - u8 UVI1_BufferLength; - u8 UVI2_BufferLength; - u8 TVO_BufferLength; - u8 AUD1_BufferLength; - u8 AUD2_BufferLength; - u8 TVA_BufferLength; + struct { + u8 UVI1_BufferLength; + u8 UVI2_BufferLength; + u8 TVO_BufferLength; + u8 AUD1_BufferLength; + u8 AUD2_BufferLength; + u8 TVA_BufferLength; + } __packed config; } __attribute__ ((__packed__)); struct FW_CONFIGURE_UART { diff --git a/drivers/media/platform/atmel/Kconfig b/drivers/media/platform/atmel/Kconfig index 99b51213f871..dda2f27da317 100644 --- a/drivers/media/platform/atmel/Kconfig +++ b/drivers/media/platform/atmel/Kconfig @@ -8,6 +8,7 @@ config VIDEO_ATMEL_ISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL Image Sensor Controller available as a v4l2 device. @@ -19,10 +20,17 @@ config VIDEO_ATMEL_XISC select VIDEOBUF2_DMA_CONTIG select REGMAP_MMIO select V4L2_FWNODE + select VIDEO_ATMEL_ISC_BASE help This module makes the ATMEL eXtended Image Sensor Controller available as a v4l2 device. +config VIDEO_ATMEL_ISC_BASE + tristate + default n + help + ATMEL ISC and XISC common code base. + config VIDEO_ATMEL_ISI tristate "ATMEL Image Sensor Interface (ISI) support" depends on VIDEO_V4L2 && OF diff --git a/drivers/media/platform/atmel/Makefile b/drivers/media/platform/atmel/Makefile index c5c01556c653..46d264ab7948 100644 --- a/drivers/media/platform/atmel/Makefile +++ b/drivers/media/platform/atmel/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only -atmel-isc-objs = atmel-sama5d2-isc.o atmel-isc-base.o -atmel-xisc-objs = atmel-sama7g5-isc.o atmel-isc-base.o +atmel-isc-objs = atmel-sama5d2-isc.o +atmel-xisc-objs = atmel-sama7g5-isc.o obj-$(CONFIG_VIDEO_ATMEL_ISI) += atmel-isi.o +obj-$(CONFIG_VIDEO_ATMEL_ISC_BASE) += atmel-isc-base.o obj-$(CONFIG_VIDEO_ATMEL_ISC) += atmel-isc.o obj-$(CONFIG_VIDEO_ATMEL_XISC) += atmel-xisc.o diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index 19daa49bf604..136ab7cf36ed 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -378,6 +378,7 @@ int isc_clk_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_clk_init); void isc_clk_cleanup(struct isc_device *isc) { @@ -392,6 +393,7 @@ void isc_clk_cleanup(struct isc_device *isc) clk_unregister(isc_clk->clk); } } +EXPORT_SYMBOL_GPL(isc_clk_cleanup); static int isc_queue_setup(struct vb2_queue *vq, unsigned int *nbuffers, unsigned int *nplanes, @@ -1578,6 +1580,7 @@ irqreturn_t isc_interrupt(int irq, void *dev_id) return ret; } +EXPORT_SYMBOL_GPL(isc_interrupt); static void isc_hist_count(struct isc_device *isc, u32 *min, u32 *max) { @@ -2212,6 +2215,7 @@ const struct v4l2_async_notifier_operations isc_async_ops = { .unbind = isc_async_unbind, .complete = isc_async_complete, }; +EXPORT_SYMBOL_GPL(isc_async_ops); void isc_subdev_cleanup(struct isc_device *isc) { @@ -2224,6 +2228,7 @@ void isc_subdev_cleanup(struct isc_device *isc) INIT_LIST_HEAD(&isc->subdev_entities); } +EXPORT_SYMBOL_GPL(isc_subdev_cleanup); int isc_pipeline_init(struct isc_device *isc) { @@ -2264,6 +2269,7 @@ int isc_pipeline_init(struct isc_device *isc) return 0; } +EXPORT_SYMBOL_GPL(isc_pipeline_init); /* regmap configuration */ #define ATMEL_ISC_REG_MAX 0xd5c @@ -2273,4 +2279,9 @@ const struct regmap_config isc_regmap_config = { .val_bits = 32, .max_register = ATMEL_ISC_REG_MAX, }; +EXPORT_SYMBOL_GPL(isc_regmap_config); +MODULE_AUTHOR("Songjun Wu"); +MODULE_AUTHOR("Eugen Hristev"); +MODULE_DESCRIPTION("Atmel ISC common code base"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c index 83705730e37e..795a012d4020 100644 --- a/drivers/media/usb/dvb-usb-v2/rtl28xxu.c +++ b/drivers/media/usb/dvb-usb-v2/rtl28xxu.c @@ -37,7 +37,16 @@ static int rtl28xxu_ctrl_msg(struct dvb_usb_device *d, struct rtl28xxu_req *req) } else { /* read */ requesttype = (USB_TYPE_VENDOR | USB_DIR_IN); - pipe = usb_rcvctrlpipe(d->udev, 0); + + /* + * Zero-length transfers must use usb_sndctrlpipe() and + * rtl28xxu_identify_state() uses a zero-length i2c read + * command to determine the chip type. + */ + if (req->size) + pipe = usb_rcvctrlpipe(d->udev, 0); + else + pipe = usb_sndctrlpipe(d->udev, 0); } ret = usb_control_msg(d->udev, pipe, 0, requesttype, req->value, @@ -612,9 +621,8 @@ static int rtl28xxu_read_config(struct dvb_usb_device *d) static int rtl28xxu_identify_state(struct dvb_usb_device *d, const char **name) { struct rtl28xxu_dev *dev = d_to_priv(d); - u8 buf[1]; int ret; - struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 1, buf}; + struct rtl28xxu_req req_demod_i2c = {0x0020, CMD_I2C_DA_RD, 0, NULL}; dev_dbg(&d->intf->dev, "\n"); diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 7a6f01ace78a..305ffad131a2 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -714,23 +714,20 @@ static int at24_probe(struct i2c_client *client) } /* - * If the 'label' property is not present for the AT24 EEPROM, - * then nvmem_config.id is initialised to NVMEM_DEVID_AUTO, - * and this will append the 'devid' to the name of the NVMEM - * device. This is purely legacy and the AT24 driver has always - * defaulted to this. However, if the 'label' property is - * present then this means that the name is specified by the - * firmware and this name should be used verbatim and so it is - * not necessary to append the 'devid'. + * We initialize nvmem_config.id to NVMEM_DEVID_AUTO even if the + * label property is set as some platform can have multiple eeproms + * with same label and we can not register each of those with same + * label. Failing to register those eeproms trigger cascade failure + * on such platform. */ + nvmem_config.id = NVMEM_DEVID_AUTO; + if (device_property_present(dev, "label")) { - nvmem_config.id = NVMEM_DEVID_NONE; err = device_property_read_string(dev, "label", &nvmem_config.name); if (err) return err; } else { - nvmem_config.id = NVMEM_DEVID_AUTO; nvmem_config.name = dev_name(dev); } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 56213a8a1ec5..995c613086aa 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -431,10 +431,10 @@ config VSOCKMON config MHI_NET tristate "MHI network driver" depends on MHI_BUS - select WWAN help This is the network driver for MHI bus. It can be used with - QCOM based WWAN modems (like SDX55). Say Y or M. + QCOM based WWAN modems for IP or QMAP/rmnet protocol (like SDX55). + Say Y or M. endif # NET_CORE @@ -606,4 +606,11 @@ config NET_FAILOVER a VM with direct attached VF by failing over to the paravirtual datapath when the VF is unplugged. +config NETDEV_LEGACY_INIT + bool + depends on ISA + help + Drivers that call netdev_boot_setup_check() should select this + symbol, everything else no longer needs it. + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index a48a664605a3..739838623cf6 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -18,7 +18,8 @@ obj-$(CONFIG_MACVLAN) += macvlan.o obj-$(CONFIG_MACVTAP) += macvtap.o obj-$(CONFIG_MII) += mii.o obj-$(CONFIG_MDIO) += mdio.o -obj-$(CONFIG_NET) += Space.o loopback.o +obj-$(CONFIG_NET) += loopback.o +obj-$(CONFIG_NETDEV_LEGACY_INIT) += Space.o obj-$(CONFIG_NETCONSOLE) += netconsole.o obj-y += phy/ obj-y += mdio/ @@ -36,7 +37,7 @@ obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o obj-$(CONFIG_VSOCKMON) += vsockmon.o -obj-$(CONFIG_MHI_NET) += mhi/ +obj-$(CONFIG_MHI_NET) += mhi_net.o # # Networking Drivers diff --git a/drivers/net/Space.c b/drivers/net/Space.c index df79e7370bcc..49e67c9fb5a4 100644 --- a/drivers/net/Space.c +++ b/drivers/net/Space.c @@ -30,6 +30,148 @@ #include <linux/netlink.h> #include <net/Space.h> +/* + * This structure holds boot-time configured netdevice settings. They + * are then used in the device probing. + */ +struct netdev_boot_setup { + char name[IFNAMSIZ]; + struct ifmap map; +}; +#define NETDEV_BOOT_SETUP_MAX 8 + + +/****************************************************************************** + * + * Device Boot-time Settings Routines + * + ******************************************************************************/ + +/* Boot time configuration table */ +static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; + +/** + * netdev_boot_setup_add - add new setup entry + * @name: name of the device + * @map: configured settings for the device + * + * Adds new setup entry to the dev_boot_setup list. The function + * returns 0 on error and 1 on success. This is a generic routine to + * all netdevices. + */ +static int netdev_boot_setup_add(char *name, struct ifmap *map) +{ + struct netdev_boot_setup *s; + int i; + + s = dev_boot_setup; + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { + memset(s[i].name, 0, sizeof(s[i].name)); + strlcpy(s[i].name, name, IFNAMSIZ); + memcpy(&s[i].map, map, sizeof(s[i].map)); + break; + } + } + + return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; +} + +/** + * netdev_boot_setup_check - check boot time settings + * @dev: the netdevice + * + * Check boot time settings for the device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found, 1 if they are. + */ +int netdev_boot_setup_check(struct net_device *dev) +{ + struct netdev_boot_setup *s = dev_boot_setup; + int i; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { + if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && + !strcmp(dev->name, s[i].name)) { + dev->irq = s[i].map.irq; + dev->base_addr = s[i].map.base_addr; + dev->mem_start = s[i].map.mem_start; + dev->mem_end = s[i].map.mem_end; + return 1; + } + } + return 0; +} +EXPORT_SYMBOL(netdev_boot_setup_check); + +/** + * netdev_boot_base - get address from boot time settings + * @prefix: prefix for network device + * @unit: id for network device + * + * Check boot time settings for the base address of device. + * The found settings are set for the device to be used + * later in the device probing. + * Returns 0 if no settings found. + */ +static unsigned long netdev_boot_base(const char *prefix, int unit) +{ + const struct netdev_boot_setup *s = dev_boot_setup; + char name[IFNAMSIZ]; + int i; + + sprintf(name, "%s%d", prefix, unit); + + /* + * If device already registered then return base of 1 + * to indicate not to probe for this interface + */ + if (__dev_get_by_name(&init_net, name)) + return 1; + + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) + if (!strcmp(name, s[i].name)) + return s[i].map.base_addr; + return 0; +} + +/* + * Saves at boot time configured settings for any netdevice. + */ +static int __init netdev_boot_setup(char *str) +{ + int ints[5]; + struct ifmap map; + + str = get_options(str, ARRAY_SIZE(ints), ints); + if (!str || !*str) + return 0; + + /* Save settings */ + memset(&map, 0, sizeof(map)); + if (ints[0] > 0) + map.irq = ints[1]; + if (ints[0] > 1) + map.base_addr = ints[2]; + if (ints[0] > 2) + map.mem_start = ints[3]; + if (ints[0] > 3) + map.mem_end = ints[4]; + + /* Add new entry to the list */ + return netdev_boot_setup_add(str, &map); +} + +__setup("netdev=", netdev_boot_setup); + +static int __init ether_boot_setup(char *str) +{ + return netdev_boot_setup(str); +} +__setup("ether=", ether_boot_setup); + + /* A unified ethernet device probe. This is the easiest way to have every * ethernet adaptor have the name "eth[0123...]". */ @@ -77,39 +219,15 @@ static struct devprobe2 isa_probes[] __initdata = { #ifdef CONFIG_SMC9194 {smc_init, 0}, #endif -#ifdef CONFIG_CS89x0 -#ifndef CONFIG_CS89x0_PLATFORM +#ifdef CONFIG_CS89x0_ISA {cs89x0_probe, 0}, #endif -#endif -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_BVME6000_NET) /* Intel */ - {i82596_probe, 0}, /* I82596 */ -#endif #ifdef CONFIG_NI65 {ni65_probe, 0}, #endif {NULL, 0}, }; -static struct devprobe2 m68k_probes[] __initdata = { -#ifdef CONFIG_ATARILANCE /* Lance-based Atari ethernet boards */ - {atarilance_probe, 0}, -#endif -#ifdef CONFIG_SUN3LANCE /* sun3 onboard Lance chip */ - {sun3lance_probe, 0}, -#endif -#ifdef CONFIG_SUN3_82586 /* sun3 onboard Intel 82586 chip */ - {sun3_82586_probe, 0}, -#endif -#ifdef CONFIG_APNE /* A1200 PCMCIA NE2000 */ - {apne_probe, 0}, -#endif -#ifdef CONFIG_MVME147_NET /* MVME147 internal Ethernet */ - {mvme147lance_probe, 0}, -#endif - {NULL, 0}, -}; - /* Unified ethernet device probe, segmented per architecture and * per bus interface. This drives the legacy devices only for now. */ @@ -121,8 +239,7 @@ static void __init ethif_probe2(int unit) if (base_addr == 1) return; - (void)(probe_list2(unit, m68k_probes, base_addr == 0) && - probe_list2(unit, isa_probes, base_addr == 0)); + probe_list2(unit, isa_probes, base_addr == 0); } /* Statically configured drivers -- order matters here. */ @@ -130,10 +247,6 @@ static int __init net_olddevs_init(void) { int num; -#ifdef CONFIG_SBNI - for (num = 0; num < 8; ++num) - sbni_probe(num); -#endif for (num = 0; num < 8; ++num) ethif_probe2(num); @@ -142,9 +255,6 @@ static int __init net_olddevs_init(void) cops_probe(1); cops_probe(2); #endif -#ifdef CONFIG_LTPC - ltpc_probe(); -#endif return 0; } diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index 43918398f0d3..90b9f1d6eda9 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -52,7 +52,9 @@ config LTPC config COPS tristate "COPS LocalTalk PC support" - depends on DEV_APPLETALK && (ISA || EISA) + depends on DEV_APPLETALK && ISA + depends on NETDEVICES + select NETDEV_LEGACY_INIT help This allows you to use COPS AppleTalk cards to connect to LocalTalk networks. You also need version 1.3.3 or later of the netatalk diff --git a/drivers/net/appletalk/ltpc.c b/drivers/net/appletalk/ltpc.c index 69c270885ff0..1f8925e75b3f 100644 --- a/drivers/net/appletalk/ltpc.c +++ b/drivers/net/appletalk/ltpc.c @@ -1015,7 +1015,7 @@ static const struct net_device_ops ltpc_netdev = { .ndo_set_rx_mode = set_multicast_list, }; -struct net_device * __init ltpc_probe(void) +static struct net_device * __init ltpc_probe(void) { struct net_device *dev; int err = -ENOMEM; @@ -1221,12 +1221,10 @@ static int __init ltpc_setup(char *str) } __setup("ltpc=", ltpc_setup); -#endif /* MODULE */ +#endif static struct net_device *dev_ltpc; -#ifdef MODULE - MODULE_LICENSE("GPL"); module_param(debug, int, 0); module_param_hw(io, int, ioport, 0); @@ -1244,7 +1242,6 @@ static int __init ltpc_module_init(void) return PTR_ERR_OR_ZERO(dev_ltpc); } module_init(ltpc_module_init); -#endif static void __exit ltpc_cleanup(void) { diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6908822d9773..a4a202b9a0a2 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -96,7 +96,7 @@ static int ad_marker_send(struct port *port, struct bond_marker *marker); static void ad_mux_machine(struct port *port, bool *update_slave_arr); static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port); static void ad_tx_machine(struct port *port); -static void ad_periodic_machine(struct port *port); +static void ad_periodic_machine(struct port *port, struct bond_params bond_params); static void ad_port_selection_logic(struct port *port, bool *update_slave_arr); static void ad_agg_selection_logic(struct aggregator *aggregator, bool *update_slave_arr); @@ -1294,10 +1294,11 @@ static void ad_tx_machine(struct port *port) /** * ad_periodic_machine - handle a port's periodic state machine * @port: the port we're looking at + * @bond_params: bond parameters we will use * * Turn ntt flag on priodically to perform periodic transmission of lacpdu's. */ -static void ad_periodic_machine(struct port *port) +static void ad_periodic_machine(struct port *port, struct bond_params bond_params) { periodic_states_t last_state; @@ -1306,8 +1307,8 @@ static void ad_periodic_machine(struct port *port) /* check if port was reinitialized */ if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) || - (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) - ) { + (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY)) || + !bond_params.lacp_active) { port->sm_periodic_state = AD_NO_PERIODIC; } /* check if state machine should change state */ @@ -2341,7 +2342,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } ad_rx_machine(NULL, port); - ad_periodic_machine(port); + ad_periodic_machine(port, bond->params); ad_port_selection_logic(port, &update_slave_arr); ad_mux_machine(port, &update_slave_arr); ad_tx_machine(port); diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 22e5632089ac..7d3752cbf761 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -17,7 +17,6 @@ #include <linux/if_bonding.h> #include <linux/if_vlan.h> #include <linux/in.h> -#include <net/ipx.h> #include <net/arp.h> #include <net/ipv6.h> #include <asm/byteorder.h> @@ -1351,8 +1350,6 @@ struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, if (!is_multicast_ether_addr(eth_data->h_dest)) { switch (skb->protocol) { case htons(ETH_P_IP): - case htons(ETH_P_IPX): - /* In case of IPX, it will falback to L2 hash */ case htons(ETH_P_IPV6): hash_index = bond_xmit_hash(bond, skb); if (bond->params.tlb_dynamic_lb) { @@ -1454,35 +1451,6 @@ struct slave *bond_xmit_alb_slave_get(struct bonding *bond, hash_size = sizeof(ip6hdr->daddr); break; } - case ETH_P_IPX: { - const struct ipxhdr *ipxhdr; - - if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) { - do_tx_balance = false; - break; - } - ipxhdr = (struct ipxhdr *)skb_network_header(skb); - - if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) { - /* something is wrong with this packet */ - do_tx_balance = false; - break; - } - - if (ipxhdr->ipx_type != IPX_TYPE_NCP) { - /* The only protocol worth balancing in - * this family since it has an "ARP" like - * mechanism - */ - do_tx_balance = false; - break; - } - - eth_data = eth_hdr(skb); - hash_start = (char *)eth_data->h_dest; - hash_size = ETH_ALEN; - break; - } case ETH_P_ARP: do_tx_balance = false; if (bond_info->rlb_enabled) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bec8ceaff98f..365953e8013e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond) } } +static bool bond_xdp_check(struct bonding *bond) +{ + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_ACTIVEBACKUP: + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + return true; + default: + return false; + } +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_update_slave_arr(bond, NULL); + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + if (bond->xdp_prog) { + NL_SET_ERR_MSG(extack, "Slave does not support XDP"); + slave_err(bond_dev, slave_dev, "Slave does not support XDP\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + } else { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = bond->xdp_prog, + .extack = extack, + }; + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(bond_dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + + res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (res < 0) { + /* ndo_bpf() sets extack error message */ + slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); + goto err_sysfs_del; + } + if (bond->xdp_prog) + bpf_prog_inc(bond->xdp_prog); + } + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", bond_is_active_slave(new_slave) ? "an active" : "a backup", new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); @@ -2252,7 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); - bond_upper_dev_unlink(bond, slave); + if (bond->xdp_prog) { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = NULL, + .extack = NULL, + }; + if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) + slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); + } + /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -2261,6 +2319,8 @@ static int __bond_release_one(struct net_device *bond_dev, if (BOND_MODE(bond) == BOND_MODE_8023AD) bond_3ad_unbind_slave(slave); + bond_upper_dev_unlink(bond, slave); + if (bond_mode_can_use_xmit_hash(bond)) bond_update_slave_arr(bond, slave); @@ -3613,55 +3673,80 @@ static struct notifier_block bond_netdev_notifier = { /*---------------------------- Hashing Policies -----------------------------*/ +/* Helper to access data in a packet, with or without a backing skb. + * If skb is given the data is linearized if necessary via pskb_may_pull. + */ +static inline const void *bond_pull_data(struct sk_buff *skb, + const void *data, int hlen, int n) +{ + if (likely(n <= hlen)) + return data; + else if (skb && likely(pskb_may_pull(skb, n))) + return skb->head; + + return NULL; +} + /* L2 hash helper */ -static inline u32 bond_eth_hash(struct sk_buff *skb) +static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) { - struct ethhdr *ep, hdr_tmp; + struct ethhdr *ep; - ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp); - if (ep) - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; - return 0; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + + ep = (struct ethhdr *)(data + mhoff); + return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); } -static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, - int *noff, int *proto, bool l34) +static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, + int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34) { const struct ipv6hdr *iph6; const struct iphdr *iph; - if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) + if (l2_proto == htons(ETH_P_IP)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph)); + if (!data) return false; - iph = (const struct iphdr *)(skb->data + *noff); + + iph = (const struct iphdr *)(data + *nhoff); iph_to_flow_copy_v4addrs(fk, iph); - *noff += iph->ihl << 2; + *nhoff += iph->ihl << 2; if (!ip_is_fragment(iph)) - *proto = iph->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) + *ip_proto = iph->protocol; + } else if (l2_proto == htons(ETH_P_IPV6)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6)); + if (!data) return false; - iph6 = (const struct ipv6hdr *)(skb->data + *noff); + + iph6 = (const struct ipv6hdr *)(data + *nhoff); iph_to_flow_copy_v6addrs(fk, iph6); - *noff += sizeof(*iph6); - *proto = iph6->nexthdr; + *nhoff += sizeof(*iph6); + *ip_proto = iph6->nexthdr; } else { return false; } - if (l34 && *proto >= 0) - fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); + if (l34 && *ip_proto >= 0) + fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen); return true; } -static u32 bond_vlan_srcmac_hash(struct sk_buff *skb) +static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) { - struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *mac_hdr; u32 srcmac_vendor = 0, srcmac_dev = 0; u16 vlan; int i; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + mac_hdr = (struct ethhdr *)(data + mhoff); + for (i = 0; i < 3; i++) srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i]; @@ -3677,26 +3762,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb) } /* Extract the appropriate headers based on bond's xmit policy */ -static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, - struct flow_keys *fk) +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data, + __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk) { bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; - int noff, proto = -1; + int ip_proto = -1; switch (bond->params.xmit_policy) { case BOND_XMIT_POLICY_ENCAP23: case BOND_XMIT_POLICY_ENCAP34: memset(fk, 0, sizeof(*fk)); return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, - fk, NULL, 0, 0, 0, 0); + fk, data, l2_proto, nhoff, hlen, 0); default: break; } fk->ports.ports = 0; memset(&fk->icmp, 0, sizeof(fk->icmp)); - noff = skb_network_offset(skb); - if (!bond_flow_ip(skb, fk, &noff, &proto, l34)) + if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34)) return false; /* ICMP error packets contains at least 8 bytes of the header @@ -3704,22 +3788,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, * to correlate ICMP error packets within the same flow which * generated the error. */ - if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { - skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, - skb_transport_offset(skb), - skb_headlen(skb)); - if (proto == IPPROTO_ICMP) { + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); + if (ip_proto == IPPROTO_ICMP) { if (!icmp_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmphdr); - } else if (proto == IPPROTO_ICMPV6) { + nhoff += sizeof(struct icmphdr); + } else if (ip_proto == IPPROTO_ICMPV6) { if (!icmpv6_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmp6hdr); + nhoff += sizeof(struct icmp6hdr); } - return bond_flow_ip(skb, fk, &noff, &proto, l34); + return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34); } return true; @@ -3735,33 +3817,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow) return hash >> 1; } -/** - * bond_xmit_hash - generate a hash value based on the xmit policy - * @bond: bonding device - * @skb: buffer to use for headers - * - * This function will extract the necessary headers from the skb buffer and use - * them to generate a hash based on the xmit_policy set in the bonding device +/* Generate hash based on xmit policy. If @skb is given it is used to linearize + * the data as required, but this function can be used without it if the data is + * known to be linear (e.g. with xdp_buff). */ -u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data, + __be16 l2_proto, int mhoff, int nhoff, int hlen) { struct flow_keys flow; u32 hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && - skb->l4_hash) - return skb->hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) - return bond_vlan_srcmac_hash(skb); + return bond_vlan_srcmac_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || - !bond_flow_dissect(bond, skb, &flow)) - return bond_eth_hash(skb); + !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow)) + return bond_eth_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { - hash = bond_eth_hash(skb); + hash = bond_eth_hash(skb, data, mhoff, hlen); } else { if (flow.icmp.id) memcpy(&hash, &flow.icmp, sizeof(hash)); @@ -3772,6 +3847,45 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) return bond_ip_hash(hash, &flow); } +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device + */ +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +{ + if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && + skb->l4_hash) + return skb->hash; + + return __bond_xmit_hash(bond, skb, skb->head, skb->protocol, + skb->mac_header, skb->network_header, + skb_headlen(skb)); +} + +/** + * bond_xmit_hash_xdp - generate a hash value based on the xmit policy + * @bond: bonding device + * @xdp: buffer to use for headers + * + * The XDP variant of bond_xmit_hash. + */ +static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) +{ + struct ethhdr *eth; + + if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) + return 0; + + eth = (struct ethhdr *)xdp->data; + + return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, + sizeof(struct ethhdr), xdp->data_end - xdp->data); +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -4420,6 +4534,47 @@ non_igmp: return NULL; } +static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct slave *slave; + int slave_cnt; + u32 slave_id; + const struct ethhdr *eth; + void *data = xdp->data; + + if (data + sizeof(struct ethhdr) > xdp->data_end) + goto non_igmp; + + eth = (struct ethhdr *)data; + data += sizeof(struct ethhdr); + + /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ + if (eth->h_proto == htons(ETH_P_IP)) { + const struct iphdr *iph; + + if (data + sizeof(struct iphdr) > xdp->data_end) + goto non_igmp; + + iph = (struct iphdr *)data; + + if (iph->protocol == IPPROTO_IGMP) { + slave = rcu_dereference(bond->curr_active_slave); + if (slave) + return slave; + return bond_get_slave_by_id(bond, 0); + } + } + +non_igmp: + slave_cnt = READ_ONCE(bond->slave_cnt); + if (likely(slave_cnt)) { + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); + } + return NULL; +} + static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { @@ -4433,8 +4588,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, return bond_tx_drop(bond_dev, skb); } -static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, - struct sk_buff *skb) +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond) { return rcu_dereference(bond->curr_active_slave); } @@ -4448,7 +4602,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); if (slave) return bond_dev_queue_xmit(bond, skb, slave->dev); @@ -4636,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, return slave; } +static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct bond_up_slave *slaves; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash_xdp(bond, xdp); + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + return slaves->arr[hash % count]; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4746,7 +4916,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, slave = bond_xmit_roundrobin_slave_get(bond, skb); break; case BOND_MODE_ACTIVEBACKUP: - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); break; case BOND_MODE_8023AD: case BOND_MODE_XOR: @@ -4920,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +static struct net_device * +bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + /* Caller needs to hold rcu_read_lock() */ + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); + break; + + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond); + break; + + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); + break; + + default: + /* Should never happen. Mode guarded by bond_xdp_check() */ + netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); + WARN_ON_ONCE(1); + return NULL; + } + + if (slave) + return slave->dev; + + return NULL; +} + +static int bond_xdp_xmit(struct net_device *bond_dev, + int n, struct xdp_frame **frames, u32 flags) +{ + int nxmit, err = -ENXIO; + + rcu_read_lock(); + + for (nxmit = 0; nxmit < n; nxmit++) { + struct xdp_frame *frame = frames[nxmit]; + struct xdp_frame *frames1[] = {frame}; + struct net_device *slave_dev; + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(frame, &xdp); + + slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); + if (!slave_dev) { + err = -ENXIO; + break; + } + + err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); + if (err < 1) + break; + } + + rcu_read_unlock(); + + /* If error happened on the first frame then we can pass the error up, otherwise + * report the number of frames that were xmitted. + */ + if (err < 0) + return (nxmit == 0 ? err : nxmit); + + return nxmit; +} + +static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bonding *bond = netdev_priv(dev); + struct list_head *iter; + struct slave *slave, *rollback_slave; + struct bpf_prog *old_prog; + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = prog, + .extack = extack, + }; + int err; + + ASSERT_RTNL(); + + if (!bond_xdp_check(bond)) + return -EOPNOTSUPP; + + old_prog = bond->xdp_prog; + bond->xdp_prog = prog; + + bond_for_each_slave(bond, slave, iter) { + struct net_device *slave_dev = slave->dev; + + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + NL_SET_ERR_MSG(extack, "Slave device does not support XDP"); + slave_err(dev, slave_dev, "Slave does not support XDP\n"); + err = -EOPNOTSUPP; + goto err; + } + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + err = -EOPNOTSUPP; + goto err; + } + + err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err < 0) { + /* ndo_bpf() sets extack error message */ + slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); + goto err; + } + if (prog) + bpf_prog_inc(prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + if (prog) + static_branch_inc(&bpf_master_redirect_enabled_key); + else + static_branch_dec(&bpf_master_redirect_enabled_key); + + return 0; + +err: + /* unwind the program changes */ + bond->xdp_prog = old_prog; + xdp.prog = old_prog; + xdp.extack = NULL; /* do not overwrite original error */ + + bond_for_each_slave(bond, rollback_slave, iter) { + struct net_device *slave_dev = rollback_slave->dev; + int err_unwind; + + if (slave == rollback_slave) + break; + + err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err_unwind < 0) + slave_err(dev, slave_dev, + "Error %d when unwinding XDP program change\n", err_unwind); + else if (xdp.prog) + bpf_prog_inc(xdp.prog); + } + return err; +} + +static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return bond_xdp_set(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) { if (speed == 0 || speed == SPEED_UNKNOWN) @@ -5008,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_get_xmit_slave = bond_xmit_get_slave, .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, + .ndo_bpf = bond_xdp, + .ndo_xdp_xmit = bond_xdp_xmit, + .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, }; static const struct device_type bond_type = { @@ -5477,6 +5818,7 @@ static int bond_check_params(struct bond_params *params) params->downdelay = downdelay; params->peer_notif_delay = 0; params->use_carrier = use_carrier; + params->lacp_active = 1; params->lacp_fast = lacp_fast; params->primary[0] = 0; params->primary_reselect = primary_reselect_value; diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c index 0561ece1ba45..5d54e11d18fa 100644 --- a/drivers/net/bonding/bond_netlink.c +++ b/drivers/net/bonding/bond_netlink.c @@ -100,6 +100,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, + [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, @@ -387,6 +388,16 @@ static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], if (err) return err; } + + if (data[IFLA_BOND_AD_LACP_ACTIVE]) { + int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); + + bond_opt_initval(&newval, lacp_active); + err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval); + if (err) + return err; + } + if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); @@ -490,6 +501,7 @@ static size_t bond_get_size(const struct net_device *bond_dev) nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ + nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ @@ -622,6 +634,10 @@ static int bond_fill_info(struct sk_buff *skb, packets_per_slave)) goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, + bond->params.lacp_active)) + goto nla_put_failure; + if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 0cf25de6f46d..a8fde3bc458f 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -58,6 +58,8 @@ static int bond_option_lp_interval_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_pps_set(struct bonding *bond, const struct bond_opt_value *newval); +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval); static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval); static int bond_option_ad_select_set(struct bonding *bond, @@ -135,6 +137,12 @@ static const struct bond_opt_value bond_intmax_tbl[] = { { NULL, -1, 0} }; +static const struct bond_opt_value bond_lacp_active[] = { + { "off", 0, 0}, + { "on", 1, BOND_VALFLAG_DEFAULT}, + { NULL, -1, 0} +}; + static const struct bond_opt_value bond_lacp_rate_tbl[] = { { "slow", AD_LACP_SLOW, 0}, { "fast", AD_LACP_FAST, 0}, @@ -283,6 +291,15 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = { .values = bond_intmax_tbl, .set = bond_option_updelay_set }, + [BOND_OPT_LACP_ACTIVE] = { + .id = BOND_OPT_LACP_ACTIVE, + .name = "lacp_active", + .desc = "Send LACPDU frames with configured lacp rate or acts as speak when spoken to", + .flags = BOND_OPTFLAG_IFDOWN, + .unsuppmodes = BOND_MODE_ALL_EX(BIT(BOND_MODE_8023AD)), + .values = bond_lacp_active, + .set = bond_option_lacp_active_set + }, [BOND_OPT_LACP_RATE] = { .id = BOND_OPT_LACP_RATE, .name = "lacp_rate", @@ -1333,6 +1350,16 @@ static int bond_option_pps_set(struct bonding *bond, return 0; } +static int bond_option_lacp_active_set(struct bonding *bond, + const struct bond_opt_value *newval) +{ + netdev_dbg(bond->dev, "Setting LACP active to %s (%llu)\n", + newval->string, newval->value); + bond->params.lacp_active = newval->value; + + return 0; +} + static int bond_option_lacp_rate_set(struct bonding *bond, const struct bond_opt_value *newval) { diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 0fb1da361bb1..f3e3bfd72556 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -133,6 +133,8 @@ static void bond_info_show_master(struct seq_file *seq) struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); + seq_printf(seq, "LACP active: %s\n", + (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 5f9e9a240226..b9e9842fed94 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -339,10 +339,24 @@ static ssize_t bonding_show_peer_notif_delay(struct device *d, static DEVICE_ATTR(peer_notif_delay, 0644, bonding_show_peer_notif_delay, bonding_sysfs_store_option); -/* Show the LACP interval. */ -static ssize_t bonding_show_lacp(struct device *d, - struct device_attribute *attr, - char *buf) +/* Show the LACP activity and interval. */ +static ssize_t bonding_show_lacp_active(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + const struct bond_opt_value *val; + + val = bond_opt_get_val(BOND_OPT_LACP_ACTIVE, bond->params.lacp_active); + + return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_active); +} +static DEVICE_ATTR(lacp_active, 0644, + bonding_show_lacp_active, bonding_sysfs_store_option); + +static ssize_t bonding_show_lacp_rate(struct device *d, + struct device_attribute *attr, + char *buf) { struct bonding *bond = to_bond(d); const struct bond_opt_value *val; @@ -352,7 +366,7 @@ static ssize_t bonding_show_lacp(struct device *d, return sprintf(buf, "%s %d\n", val->string, bond->params.lacp_fast); } static DEVICE_ATTR(lacp_rate, 0644, - bonding_show_lacp, bonding_sysfs_store_option); + bonding_show_lacp_rate, bonding_sysfs_store_option); static ssize_t bonding_show_min_links(struct device *d, struct device_attribute *attr, @@ -738,6 +752,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_downdelay.attr, &dev_attr_updelay.attr, &dev_attr_peer_notif_delay.attr, + &dev_attr_lacp_active.attr, &dev_attr_lacp_rate.attr, &dev_attr_ad_select.attr, &dev_attr_xmit_hash_policy.attr, diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 54ffb796a320..7734229aa078 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -649,7 +649,7 @@ static inline void flexcan_error_irq_disable(const struct flexcan_priv *priv) static int flexcan_clks_enable(const struct flexcan_priv *priv) { - int err; + int err = 0; if (priv->clk_ipg) { err = clk_prepare_enable(priv->clk_ipg); diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index dd17b8c53e1c..89d9c986a229 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -218,7 +218,7 @@ static int hi3110_spi_trans(struct spi_device *spi, int len) return ret; } -static u8 hi3110_cmd(struct spi_device *spi, u8 command) +static int hi3110_cmd(struct spi_device *spi, u8 command) { struct hi3110_priv *priv = spi_get_drvdata(spi); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 2b1e57552e1c..6c369a399c45 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2304,6 +2304,7 @@ static irqreturn_t mcp251xfd_irq(int irq, void *dev_id) err, priv->regs_status.intf); mcp251xfd_dump(priv); mcp251xfd_chip_interrupts_disable(priv); + mcp251xfd_timestamp_stop(priv); return handled; } diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 0a37af4a3fa4..2b5302e72435 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -255,6 +255,8 @@ struct ems_usb { unsigned int free_slots; /* remember number of available slots */ struct ems_cpc_msg active_params; /* active controller parameters */ + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; static void ems_usb_read_interrupt_callback(struct urb *urb) @@ -587,6 +589,7 @@ static int ems_usb_start(struct ems_usb *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -596,7 +599,7 @@ static int ems_usb_start(struct ems_usb *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -604,6 +607,8 @@ static int ems_usb_start(struct ems_usb *dev) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 2), buf, RX_BUFFER_SIZE, ems_usb_read_bulk_callback, dev); @@ -619,6 +624,9 @@ static int ems_usb_start(struct ems_usb *dev) break; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -684,6 +692,10 @@ static void unlink_all_urbs(struct ems_usb *dev) usb_kill_anchored_urbs(&dev->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + usb_kill_anchored_urbs(&dev->tx_submitted); atomic_set(&dev->active_tx_urbs, 0); diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 60f3e0ca080a..7370981e9b34 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -195,6 +195,8 @@ struct esd_usb2 { int net_count; u32 version; int rxinitdone; + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; struct esd_usb2_net_priv { @@ -545,6 +547,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -554,7 +557,7 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) } buf = usb_alloc_coherent(dev->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { dev_warn(dev->udev->dev.parent, "No memory left for USB buffer\n"); @@ -562,6 +565,8 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) goto freeurb; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, 1), buf, RX_BUFFER_SIZE, @@ -574,8 +579,12 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) usb_unanchor_urb(urb); usb_free_coherent(dev->udev, RX_BUFFER_SIZE, buf, urb->transfer_dma); + goto freeurb; } + dev->rxbuf[i] = buf; + dev->rxbuf_dma[i] = buf_dma; + freeurb: /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); @@ -663,6 +672,11 @@ static void unlink_all_urbs(struct esd_usb2 *dev) int i, j; usb_kill_anchored_urbs(&dev->rx_submitted); + + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(dev->udev, RX_BUFFER_SIZE, + dev->rxbuf[i], dev->rxbuf_dma[i]); + for (i = 0; i < dev->net_count; i++) { priv = dev->nets[i]; if (priv) { diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c index a45865bd7254..a1a154c08b7f 100644 --- a/drivers/net/can/usb/mcba_usb.c +++ b/drivers/net/can/usb/mcba_usb.c @@ -653,6 +653,8 @@ static int mcba_usb_start(struct mcba_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN), buf, MCBA_USB_RX_BUFF_SIZE, diff --git a/drivers/net/can/usb/peak_usb/pcan_usb.c b/drivers/net/can/usb/peak_usb/pcan_usb.c index e36e60c3703a..837b3fecd71e 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb.c @@ -123,7 +123,8 @@ #define PCAN_USB_BERR_MASK (PCAN_USB_ERR_RXERR | PCAN_USB_ERR_TXERR) /* identify bus event packets with rx/tx error counters */ -#define PCAN_USB_ERR_CNT 0x80 +#define PCAN_USB_ERR_CNT_DEC 0x00 /* counters are decreasing */ +#define PCAN_USB_ERR_CNT_INC 0x80 /* counters are increasing */ /* private to PCAN-USB adapter */ struct pcan_usb { @@ -535,11 +536,12 @@ static int pcan_usb_handle_bus_evt(struct pcan_usb_msg_context *mc, u8 ir) /* acccording to the content of the packet */ switch (ir) { - case PCAN_USB_ERR_CNT: + case PCAN_USB_ERR_CNT_DEC: + case PCAN_USB_ERR_CNT_INC: /* save rx/tx error counters from in the device context */ - pdev->bec.rxerr = mc->ptr[0]; - pdev->bec.txerr = mc->ptr[1]; + pdev->bec.rxerr = mc->ptr[1]; + pdev->bec.txerr = mc->ptr[2]; break; default: diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index b6e7ef0d5bc6..d1b83bd1b3cb 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -137,7 +137,8 @@ struct usb_8dev_priv { u8 *cmd_msg_buffer; struct mutex usb_8dev_cmd_lock; - + void *rxbuf[MAX_RX_URBS]; + dma_addr_t rxbuf_dma[MAX_RX_URBS]; }; /* tx frame */ @@ -733,6 +734,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) for (i = 0; i < MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf; + dma_addr_t buf_dma; /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); @@ -742,7 +744,7 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) } buf = usb_alloc_coherent(priv->udev, RX_BUFFER_SIZE, GFP_KERNEL, - &urb->transfer_dma); + &buf_dma); if (!buf) { netdev_err(netdev, "No memory left for USB buffer\n"); usb_free_urb(urb); @@ -750,6 +752,8 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + urb->transfer_dma = buf_dma; + usb_fill_bulk_urb(urb, priv->udev, usb_rcvbulkpipe(priv->udev, USB_8DEV_ENDP_DATA_RX), @@ -767,6 +771,9 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) break; } + priv->rxbuf[i] = buf; + priv->rxbuf_dma[i] = buf_dma; + /* Drop reference, USB core will take care of freeing it */ usb_free_urb(urb); } @@ -836,6 +843,10 @@ static void unlink_all_urbs(struct usb_8dev_priv *priv) usb_kill_anchored_urbs(&priv->rx_submitted); + for (i = 0; i < MAX_RX_URBS; ++i) + usb_free_coherent(priv->udev, RX_BUFFER_SIZE, + priv->rxbuf[i], priv->rxbuf_dma[i]); + usb_kill_anchored_urbs(&priv->tx_submitted); atomic_set(&priv->active_tx_urbs, 0); diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index b23e3488695b..bd1417a66cbf 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -2016,15 +2016,6 @@ int b53_br_flags(struct dsa_switch *ds, int port, } EXPORT_SYMBOL(b53_br_flags); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - b53_port_set_mcast_flood(ds->priv, port, mrouter); - - return 0; -} -EXPORT_SYMBOL(b53_set_mrouter); - static bool b53_possible_cpu_port(struct dsa_switch *ds, int port) { /* Broadcom switches will accept enabling Broadcom tags on the @@ -2268,7 +2259,6 @@ static const struct dsa_switch_ops b53_switch_ops = { .port_bridge_leave = b53_br_leave, .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, - .port_set_mrouter = b53_set_mrouter, .port_stp_state_set = b53_br_set_stp_state, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 82700a5714c1..9bf8319342b0 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -328,8 +328,6 @@ int b53_br_flags_pre(struct dsa_switch *ds, int port, int b53_br_flags(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); int b53_setup_devlink_resources(struct dsa_switch *ds); void b53_port_event(struct dsa_switch *ds, int port); void b53_phylink_validate(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 3b018fcf4412..6ce9ec1283e0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -1199,7 +1199,6 @@ static const struct dsa_switch_ops bcm_sf2_ops = { .port_pre_bridge_flags = b53_br_flags_pre, .port_bridge_flags = b53_br_flags, .port_stp_state_set = b53_br_set_stp_state, - .port_set_mrouter = b53_set_mrouter, .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_add = b53_vlan_add, diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 69f21b71614c..53e6150e95b6 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -366,8 +366,8 @@ mt7530_fdb_write(struct mt7530_priv *priv, u16 vid, int i; reg[1] |= vid & CVID_MASK; - if (vid > 1) - reg[1] |= ATA2_IVL; + reg[1] |= ATA2_IVL; + reg[1] |= ATA2_FID(FID_BRIDGED); reg[2] |= (aging & AGE_TIMER_MASK) << AGE_TIMER; reg[2] |= (port_mask & PORT_MAP_MASK) << PORT_MAP; /* STATIC_ENT indicate that entry is static wouldn't @@ -1021,6 +1021,10 @@ mt753x_cpu_port_enable(struct dsa_switch *ds, int port) mt7530_write(priv, MT7530_PCR_P(port), PCR_MATRIX(dsa_user_ports(priv->ds))); + /* Set to fallback mode for independent VLAN learning */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + return 0; } @@ -1143,7 +1147,8 @@ mt7530_stp_state_set(struct dsa_switch *ds, int port, u8 state) break; } - mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state); + mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK(FID_BRIDGED), + FID_PST(FID_BRIDGED, stp_state)); } static int @@ -1185,18 +1190,6 @@ mt7530_port_bridge_flags(struct dsa_switch *ds, int port, } static int -mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mt7530_priv *priv = ds->priv; - - mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)), - mrouter ? UNM_FFP(BIT(port)) : 0); - - return 0; -} - -static int mt7530_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { @@ -1229,6 +1222,10 @@ mt7530_port_bridge_join(struct dsa_switch *ds, int port, PCR_MATRIX_MASK, PCR_MATRIX(port_bitmap)); priv->ports[port].pm |= PCR_MATRIX(port_bitmap); + /* Set to fallback mode for independent VLAN learning */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + mutex_unlock(&priv->reg_mutex); return 0; @@ -1241,15 +1238,22 @@ mt7530_port_set_vlan_unaware(struct dsa_switch *ds, int port) bool all_user_ports_removed = true; int i; - /* When a port is removed from the bridge, the port would be set up - * back to the default as is at initial boot which is a VLAN-unaware - * port. + /* This is called after .port_bridge_leave when leaving a VLAN-aware + * bridge. Don't set standalone ports to fallback mode. */ - mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, - MT7530_PORT_MATRIX_MODE); - mt7530_rmw(priv, MT7530_PVC_P(port), VLAN_ATTR_MASK | PVC_EG_TAG_MASK, + if (dsa_to_port(ds, port)->bridge_dev) + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_FALLBACK_MODE); + + mt7530_rmw(priv, MT7530_PVC_P(port), + VLAN_ATTR_MASK | PVC_EG_TAG_MASK | ACC_FRM_MASK, VLAN_ATTR(MT7530_VLAN_TRANSPARENT) | - PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); + PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT) | + MT7530_VLAN_ACC_ALL); + + /* Set PVID to 0 */ + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); for (i = 0; i < MT7530_NUM_PORTS; i++) { if (dsa_is_user_port(ds, i) && @@ -1276,15 +1280,19 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port) struct mt7530_priv *priv = ds->priv; /* Trapped into security mode allows packet forwarding through VLAN - * table lookup. CPU port is set to fallback mode to let untagged - * frames pass through. + * table lookup. */ - if (dsa_is_cpu_port(ds, port)) - mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, - MT7530_PORT_FALLBACK_MODE); - else + if (dsa_is_user_port(ds, port)) { mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, MT7530_PORT_SECURITY_MODE); + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID(priv->ports[port].pvid)); + + /* Only accept tagged frames if PVID is not set */ + if (!priv->ports[port].pvid) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + } /* Set the port as a user port which is to be able to recognize VID * from incoming packets before fetching entry within the VLAN table. @@ -1329,6 +1337,13 @@ mt7530_port_bridge_leave(struct dsa_switch *ds, int port, PCR_MATRIX(BIT(MT7530_CPU_PORT))); priv->ports[port].pm = PCR_MATRIX(BIT(MT7530_CPU_PORT)); + /* When a port is removed from the bridge, the port would be set up + * back to the default as is at initial boot which is a VLAN-unaware + * port. + */ + mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK, + MT7530_PORT_MATRIX_MODE); + mutex_unlock(&priv->reg_mutex); } @@ -1511,7 +1526,8 @@ mt7530_hw_vlan_add(struct mt7530_priv *priv, /* Validate the entry with independent learning, create egress tag per * VLAN and joining the port as one of the port members. */ - val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | VLAN_VALID; + val = IVL_MAC | VTAG_EN | PORT_MEM(new_members) | FID(FID_BRIDGED) | + VLAN_VALID; mt7530_write(priv, MT7530_VAWD1, val); /* Decide whether adding tag or not for those outgoing packets from the @@ -1601,9 +1617,28 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port, mt7530_hw_vlan_update(priv, vlan->vid, &new_entry, mt7530_hw_vlan_add); if (pvid) { - mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, - G0_PORT_VID(vlan->vid)); priv->ports[port].pvid = vlan->vid; + + /* Accept all frames if PVID is set */ + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_ALL); + + /* Only configure PVID if VLAN filtering is enabled */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PPBV1_P(port), + G0_PORT_VID_MASK, + G0_PORT_VID(vlan->vid)); + } else if (vlan->vid && priv->ports[port].pvid == vlan->vid) { + /* This VLAN is overwritten without PVID, so unset it */ + priv->ports[port].pvid = G0_PORT_VID_DEF; + + /* Only accept tagged frames if the port is VLAN-aware */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } mutex_unlock(&priv->reg_mutex); @@ -1617,11 +1652,9 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, { struct mt7530_hw_vlan_entry target_entry; struct mt7530_priv *priv = ds->priv; - u16 pvid; mutex_lock(&priv->reg_mutex); - pvid = priv->ports[port].pvid; mt7530_hw_vlan_entry_init(&target_entry, port, 0); mt7530_hw_vlan_update(priv, vlan->vid, &target_entry, mt7530_hw_vlan_del); @@ -1629,11 +1662,18 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port, /* PVID is being restored to the default whenever the PVID port * is being removed from the VLAN. */ - if (pvid == vlan->vid) - pvid = G0_PORT_VID_DEF; + if (priv->ports[port].pvid == vlan->vid) { + priv->ports[port].pvid = G0_PORT_VID_DEF; + + /* Only accept tagged frames if the port is VLAN-aware */ + if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) + mt7530_rmw(priv, MT7530_PVC_P(port), ACC_FRM_MASK, + MT7530_VLAN_ACC_TAGGED); + + mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); + } - mt7530_rmw(priv, MT7530_PPBV1_P(port), G0_PORT_VID_MASK, pvid); - priv->ports[port].pvid = pvid; mutex_unlock(&priv->reg_mutex); @@ -1717,15 +1757,7 @@ static enum dsa_tag_protocol mtk_get_tag_protocol(struct dsa_switch *ds, int port, enum dsa_tag_protocol mp) { - struct mt7530_priv *priv = ds->priv; - - if (port != MT7530_CPU_PORT) { - dev_warn(priv->dev, - "port not matched with tagging CPU port\n"); - return DSA_TAG_PROTO_NONE; - } else { - return DSA_TAG_PROTO_MTK; - } + return DSA_TAG_PROTO_MTK; } #ifdef CONFIG_GPIOLIB @@ -2054,6 +2086,7 @@ mt7530_setup(struct dsa_switch *ds) * as two netdev instances. */ dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent; + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; if (priv->id == ID_MT7530) { @@ -2124,6 +2157,9 @@ mt7530_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); + /* Disable learning by default on all ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + if (dsa_is_cpu_port(ds, i)) { ret = mt753x_cpu_port_enable(ds, i); if (ret) @@ -2131,8 +2167,9 @@ mt7530_setup(struct dsa_switch *ds) } else { mt7530_port_disable(ds, i); - /* Disable learning by default on all user ports */ - mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + /* Set default PVID to 0 on all user ports */ + mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } /* Enable consistent egress tag */ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, @@ -2289,6 +2326,9 @@ mt7531_setup(struct dsa_switch *ds) mt7530_rmw(priv, MT7530_PCR_P(i), PCR_MATRIX_MASK, PCR_MATRIX_CLR); + /* Disable learning by default on all ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + mt7530_set(priv, MT7531_DBG_CNT(i), MT7531_DIS_CLR); if (dsa_is_cpu_port(ds, i)) { @@ -2298,8 +2338,9 @@ mt7531_setup(struct dsa_switch *ds) } else { mt7530_port_disable(ds, i); - /* Disable learning by default on all user ports */ - mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + /* Set default PVID to 0 on all user ports */ + mt7530_rmw(priv, MT7530_PPBV1_P(i), G0_PORT_VID_MASK, + G0_PORT_VID_DEF); } /* Enable consistent egress tag */ @@ -2307,6 +2348,7 @@ mt7531_setup(struct dsa_switch *ds) PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); } + ds->assisted_learning_on_cpu_port = true; ds->mtu_enforcement_ingress = true; /* Flush the FDB table */ @@ -3060,7 +3102,6 @@ static const struct dsa_switch_ops mt7530_switch_ops = { .port_stp_state_set = mt7530_stp_state_set, .port_pre_bridge_flags = mt7530_port_pre_bridge_flags, .port_bridge_flags = mt7530_port_bridge_flags, - .port_set_mrouter = mt7530_port_set_mrouter, .port_bridge_join = mt7530_port_bridge_join, .port_bridge_leave = mt7530_port_bridge_leave, .port_fdb_add = mt7530_port_fdb_add, diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h index b19b389ff10a..fe4cd2ac26d0 100644 --- a/drivers/net/dsa/mt7530.h +++ b/drivers/net/dsa/mt7530.h @@ -80,6 +80,7 @@ enum mt753x_bpdu_port_fw { #define STATIC_ENT 3 #define MT7530_ATA2 0x78 #define ATA2_IVL BIT(15) +#define ATA2_FID(x) (((x) & 0x7) << 12) /* Register for address table write data */ #define MT7530_ATWD 0x7c @@ -148,11 +149,18 @@ enum mt7530_vlan_cmd { #define VTAG_EN BIT(28) /* VLAN Member Control */ #define PORT_MEM(x) (((x) & 0xff) << 16) +/* Filter ID */ +#define FID(x) (((x) & 0x7) << 1) /* VLAN Entry Valid */ #define VLAN_VALID BIT(0) #define PORT_MEM_SHFT 16 #define PORT_MEM_MASK 0xff +enum mt7530_fid { + FID_STANDALONE = 0, + FID_BRIDGED = 1, +}; + #define MT7530_VAWD2 0x98 /* Egress Tag Control */ #define ETAG_CTRL_P(p, x) (((x) & 0x3) << ((p) << 1)) @@ -179,8 +187,8 @@ enum mt7530_vlan_egress_attr { /* Register for port STP state control */ #define MT7530_SSP_P(x) (0x2000 + ((x) * 0x100)) -#define FID_PST(x) ((x) & 0x3) -#define FID_PST_MASK FID_PST(0x3) +#define FID_PST(fid, state) (((state) & 0x3) << ((fid) * 2)) +#define FID_PST_MASK(fid) FID_PST(fid, 0x3) enum mt7530_stp_state { MT7530_STP_DISABLED = 0, @@ -230,6 +238,7 @@ enum mt7530_port_mode { #define PVC_EG_TAG_MASK PVC_EG_TAG(7) #define VLAN_ATTR(x) (((x) & 0x3) << 6) #define VLAN_ATTR_MASK VLAN_ATTR(3) +#define ACC_FRM_MASK GENMASK(1, 0) enum mt7530_vlan_port_eg_tag { MT7530_VLAN_EG_DISABLED = 0, @@ -241,13 +250,19 @@ enum mt7530_vlan_port_attr { MT7530_VLAN_TRANSPARENT = 3, }; +enum mt7530_vlan_port_acc_frm { + MT7530_VLAN_ACC_ALL = 0, + MT7530_VLAN_ACC_TAGGED = 1, + MT7530_VLAN_ACC_UNTAGGED = 2, +}; + #define STAG_VPID (((x) & 0xffff) << 16) /* Register for port port-and-protocol based vlan 1 control */ #define MT7530_PPBV1_P(x) (0x2014 + ((x) * 0x100)) #define G0_PORT_VID(x) (((x) & 0xfff) << 0) #define G0_PORT_VID_MASK G0_PORT_VID(0xfff) -#define G0_PORT_VID_DEF G0_PORT_VID(1) +#define G0_PORT_VID_DEF G0_PORT_VID(0) /* Register for port MAC control register */ #define MT7530_PMCR_P(x) (0x3000 + ((x) * 0x100)) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index af764b8445b7..c45ca2473743 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -2176,7 +2176,7 @@ static int mv88e6xxx_port_vlan_leave(struct mv88e6xxx_chip *chip, int i, err; if (!vid) - return -EOPNOTSUPP; + return 0; err = mv88e6xxx_vtu_get(chip, vid, &vlan); if (err) @@ -5797,7 +5797,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, struct netlink_ext_ack *extack) { struct mv88e6xxx_chip *chip = ds->priv; - bool do_fast_age = false; int err = -EOPNOTSUPP; mv88e6xxx_reg_lock(chip); @@ -5809,9 +5808,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, err = mv88e6xxx_port_set_assoc_vector(chip, port, pav); if (err) goto out; - - if (!learning) - do_fast_age = true; } if (flags.mask & BR_FLOOD) { @@ -5843,26 +5839,6 @@ static int mv88e6xxx_port_bridge_flags(struct dsa_switch *ds, int port, out: mv88e6xxx_reg_unlock(chip); - if (do_fast_age) - mv88e6xxx_port_fast_age(ds, port); - - return err; -} - -static int mv88e6xxx_port_set_mrouter(struct dsa_switch *ds, int port, - bool mrouter, - struct netlink_ext_ack *extack) -{ - struct mv88e6xxx_chip *chip = ds->priv; - int err; - - if (!chip->info->ops->port_set_mcast_flood) - return -EOPNOTSUPP; - - mv88e6xxx_reg_lock(chip); - err = chip->info->ops->port_set_mcast_flood(chip, port, mrouter); - mv88e6xxx_reg_unlock(chip); - return err; } @@ -6167,7 +6143,6 @@ static const struct dsa_switch_ops mv88e6xxx_switch_ops = { .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_pre_bridge_flags = mv88e6xxx_port_pre_bridge_flags, .port_bridge_flags = mv88e6xxx_port_bridge_flags, - .port_set_mrouter = mv88e6xxx_port_set_mrouter, .port_stp_state_set = mv88e6xxx_port_stp_state_set, .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index ca2ad77b71f1..6686192e1883 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -837,16 +837,24 @@ static int ar9331_mdio_write(void *ctx, u32 reg, u32 val) return 0; } - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); + /* In case of this switch we work with 32bit registers on top of 16bit + * bus. Some registers (for example access to forwarding database) have + * trigger bit on the first 16bit half of request, the result and + * configuration of request in the second half. + * To make it work properly, we should do the second part of transfer + * before the first one is done. + */ + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, + val >> 16); if (ret < 0) goto error; - ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2, - val >> 16); + ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val); if (ret < 0) goto error; return 0; + error: dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n"); return ret; diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h index 9cd7dbdd7db9..2e899c9f036d 100644 --- a/drivers/net/dsa/sja1105/sja1105.h +++ b/drivers/net/dsa/sja1105/sja1105.h @@ -233,7 +233,6 @@ struct sja1105_private { phy_interface_t phy_mode[SJA1105_MAX_NUM_PORTS]; bool fixed_link[SJA1105_MAX_NUM_PORTS]; bool vlan_aware; - unsigned long learn_ena; unsigned long ucast_egress_floods; unsigned long bcast_egress_floods; const struct sja1105_info *info; diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c index bd3ad18c150e..f2049f52833c 100644 --- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c +++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c @@ -304,6 +304,15 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, hostcmd = SJA1105_HOSTCMD_INVALIDATE; } sja1105_packing(p, &hostcmd, 25, 23, size, op); +} + +static void +sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, + enum packing_op op) +{ + int entry_size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); /* Hack - The hardware takes the 'index' field within * struct sja1105_l2_lookup_entry as the index on which this command @@ -313,26 +322,18 @@ sja1105pqrs_common_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, * such that our API doesn't need to ask for a full-blown entry * structure when e.g. a delete is requested. */ - sja1105_packing(buf, &cmd->index, 15, 6, - SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY, op); -} - -static void -sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, - enum packing_op op) -{ - int size = SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY; - - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 15, 6, entry_size, op); } static void sja1110_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd, enum packing_op op) { - int size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + int entry_size = SJA1110_SIZE_L2_LOOKUP_ENTRY; + + sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, entry_size); - return sja1105pqrs_common_l2_lookup_cmd_packing(buf, cmd, op, size); + sja1105_packing(buf, &cmd->index, 10, 1, entry_size, op); } /* The switch is so retarded that it makes our command/entry abstraction diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5ab1676a7448..6a52db1ef24c 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -176,7 +176,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) struct sja1105_mac_config_entry *mac; struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int i; + struct dsa_port *dp; table = &priv->static_config.tables[BLK_IDX_MAC_CONFIG]; @@ -195,14 +195,21 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) mac = table->entries; - for (i = 0; i < ds->num_ports; i++) { - mac[i] = default_mac; + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + mac[dp->index] = default_mac; /* Let sja1105_bridge_stp_state_set() keep address learning - * enabled for the CPU port. + * enabled for the DSA ports. CPU ports use software-assisted + * learning to ensure that only FDB entries belonging to the + * bridge are learned, and that they are learned towards all + * CPU ports in a cross-chip topology if multiple CPU ports + * exist. */ - if (dsa_is_cpu_port(ds, i)) - priv->learn_ena |= BIT(i); + if (dsa_port_is_dsa(dp)) + dp->learning = true; } return 0; @@ -460,7 +467,7 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv) pvid.vlan_bc |= BIT(port); pvid.tag_port &= ~BIT(port); - if (dsa_is_cpu_port(ds, port)) { + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { priv->tag_8021q_pvid[port] = SJA1105_DEFAULT_VLAN; priv->bridge_pvid[port] = SJA1105_DEFAULT_VLAN; } @@ -474,8 +481,11 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv) { struct sja1105_l2_forwarding_entry *l2fwd; struct dsa_switch *ds = priv->ds; + struct dsa_switch_tree *dst; struct sja1105_table *table; - int i, j; + struct dsa_link *dl; + int port, tc; + int from, to; table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING]; @@ -493,47 +503,109 @@ static int sja1105_init_l2_forwarding(struct sja1105_private *priv) l2fwd = table->entries; - /* First 5 entries define the forwarding rules */ - for (i = 0; i < ds->num_ports; i++) { - unsigned int upstream = dsa_upstream_port(priv->ds, i); + /* First 5 entries in the L2 Forwarding Table define the forwarding + * rules and the VLAN PCP to ingress queue mapping. + * Set up the ingress queue mapping first. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + for (tc = 0; tc < SJA1105_NUM_TC; tc++) + l2fwd[port].vlan_pmap[tc] = tc; + } - if (dsa_is_unused_port(ds, i)) + /* Then manage the forwarding domain for user ports. These can forward + * only to the always-on domain (CPU port and DSA links) + */ + for (from = 0; from < ds->num_ports; from++) { + if (!dsa_is_user_port(ds, from)) continue; - for (j = 0; j < SJA1105_NUM_TC; j++) - l2fwd[i].vlan_pmap[j] = j; + for (to = 0; to < ds->num_ports; to++) { + if (!dsa_is_cpu_port(ds, to) && + !dsa_is_dsa_port(ds, to)) + continue; - /* All ports start up with egress flooding enabled, - * including the CPU port. - */ - priv->ucast_egress_floods |= BIT(i); - priv->bcast_egress_floods |= BIT(i); + l2fwd[from].bc_domain |= BIT(to); + l2fwd[from].fl_domain |= BIT(to); - if (i == upstream) + sja1105_port_allow_traffic(l2fwd, from, to, true); + } + } + + /* Then manage the forwarding domain for DSA links and CPU ports (the + * always-on domain). These can send packets to any enabled port except + * themselves. + */ + for (from = 0; from < ds->num_ports; from++) { + if (!dsa_is_cpu_port(ds, from) && !dsa_is_dsa_port(ds, from)) continue; - sja1105_port_allow_traffic(l2fwd, i, upstream, true); - sja1105_port_allow_traffic(l2fwd, upstream, i, true); + for (to = 0; to < ds->num_ports; to++) { + if (dsa_is_unused_port(ds, to)) + continue; - l2fwd[i].bc_domain = BIT(upstream); - l2fwd[i].fl_domain = BIT(upstream); + if (from == to) + continue; + + l2fwd[from].bc_domain |= BIT(to); + l2fwd[from].fl_domain |= BIT(to); + + sja1105_port_allow_traffic(l2fwd, from, to, true); + } + } + + /* In odd topologies ("H" connections where there is a DSA link to + * another switch which also has its own CPU port), TX packets can loop + * back into the system (they are flooded from CPU port 1 to the DSA + * link, and from there to CPU port 2). Prevent this from happening by + * cutting RX from DSA links towards our CPU port, if the remote switch + * has its own CPU port and therefore doesn't need ours for network + * stack termination. + */ + dst = ds->dst; + + list_for_each_entry(dl, &dst->rtable, list) { + if (dl->dp->ds != ds || dl->link_dp->cpu_dp == dl->dp->cpu_dp) + continue; + + from = dl->dp->index; + to = dsa_upstream_port(ds, from); + + dev_warn(ds->dev, + "H topology detected, cutting RX from DSA link %d to CPU port %d to prevent TX packet loops\n", + from, to); + + sja1105_port_allow_traffic(l2fwd, from, to, false); - l2fwd[upstream].bc_domain |= BIT(i); - l2fwd[upstream].fl_domain |= BIT(i); + l2fwd[from].bc_domain &= ~BIT(to); + l2fwd[from].fl_domain &= ~BIT(to); + } + + /* Finally, manage the egress flooding domain. All ports start up with + * flooding enabled, including the CPU port and DSA links. + */ + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + priv->ucast_egress_floods |= BIT(port); + priv->bcast_egress_floods |= BIT(port); } /* Next 8 entries define VLAN PCP mapping from ingress to egress. * Create a one-to-one mapping. */ - for (i = 0; i < SJA1105_NUM_TC; i++) { - for (j = 0; j < ds->num_ports; j++) { - if (dsa_is_unused_port(ds, j)) + for (tc = 0; tc < SJA1105_NUM_TC; tc++) { + for (port = 0; port < ds->num_ports; port++) { + if (dsa_is_unused_port(ds, port)) continue; - l2fwd[ds->num_ports + i].vlan_pmap[j] = i; + l2fwd[ds->num_ports + tc].vlan_pmap[port] = tc; } - l2fwd[ds->num_ports + i].type_egrpcp2outputq = true; + l2fwd[ds->num_ports + tc].type_egrpcp2outputq = true; } return 0; @@ -688,6 +760,72 @@ static void sja1110_select_tdmaconfigidx(struct sja1105_private *priv) general_params->tdmaconfigidx = tdmaconfigidx; } +static int sja1105_init_topology(struct sja1105_private *priv, + struct sja1105_general_params_entry *general_params) +{ + struct dsa_switch *ds = priv->ds; + int port; + + /* The host port is the destination for traffic matching mac_fltres1 + * and mac_fltres0 on all ports except itself. Default to an invalid + * value. + */ + general_params->host_port = ds->num_ports; + + /* Link-local traffic received on casc_port will be forwarded + * to host_port without embedding the source port and device ID + * info in the destination MAC address, and no RX timestamps will be + * taken either (presumably because it is a cascaded port and a + * downstream SJA switch already did that). + * To disable the feature, we need to do different things depending on + * switch generation. On SJA1105 we need to set an invalid port, while + * on SJA1110 which support multiple cascaded ports, this field is a + * bitmask so it must be left zero. + */ + if (!priv->info->multiple_cascade_ports) + general_params->casc_port = ds->num_ports; + + for (port = 0; port < ds->num_ports; port++) { + bool is_upstream = dsa_is_upstream_port(ds, port); + bool is_dsa_link = dsa_is_dsa_port(ds, port); + + /* Upstream ports can be dedicated CPU ports or + * upstream-facing DSA links + */ + if (is_upstream) { + if (general_params->host_port == ds->num_ports) { + general_params->host_port = port; + } else { + dev_err(ds->dev, + "Port %llu is already a host port, configuring %d as one too is not supported\n", + general_params->host_port, port); + return -EINVAL; + } + } + + /* Cascade ports are downstream-facing DSA links */ + if (is_dsa_link && !is_upstream) { + if (priv->info->multiple_cascade_ports) { + general_params->casc_port |= BIT(port); + } else if (general_params->casc_port == ds->num_ports) { + general_params->casc_port = port; + } else { + dev_err(ds->dev, + "Port %llu is already a cascade port, configuring %d as one too is not supported\n", + general_params->casc_port, port); + return -EINVAL; + } + } + } + + if (general_params->host_port == ds->num_ports) { + dev_err(ds->dev, "No host port configured\n"); + return -EINVAL; + } + + return 0; +} + static int sja1105_init_general_params(struct sja1105_private *priv) { struct sja1105_general_params_entry default_general_params = { @@ -706,12 +844,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .mac_flt0 = SJA1105_LINKLOCAL_FILTER_B_MASK, .incl_srcpt0 = false, .send_meta0 = false, - /* The destination for traffic matching mac_fltres1 and - * mac_fltres0 on all ports except host_port. Such traffic - * receieved on host_port itself would be dropped, except - * by installing a temporary 'management route' - */ - .host_port = priv->ds->num_ports, /* Default to an invalid value */ .mirr_port = priv->ds->num_ports, /* No TTEthernet */ @@ -731,16 +863,12 @@ static int sja1105_init_general_params(struct sja1105_private *priv) .header_type = ETH_P_SJA1110, }; struct sja1105_general_params_entry *general_params; - struct dsa_switch *ds = priv->ds; struct sja1105_table *table; - int port; + int rc; - for (port = 0; port < ds->num_ports; port++) { - if (dsa_is_cpu_port(ds, port)) { - default_general_params.host_port = port; - break; - } - } + rc = sja1105_init_topology(priv, &default_general_params); + if (rc) + return rc; table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; @@ -763,19 +891,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv) sja1110_select_tdmaconfigidx(priv); - /* Link-local traffic received on casc_port will be forwarded - * to host_port without embedding the source port and device ID - * info in the destination MAC address, and no RX timestamps will be - * taken either (presumably because it is a cascaded port and a - * downstream SJA switch already did that). - * To disable the feature, we need to do different things depending on - * switch generation. On SJA1105 we need to set an invalid port, while - * on SJA1110 which support multiple cascaded ports, this field is a - * bitmask so it must be left zero. - */ - if (!priv->info->multiple_cascade_ports) - general_params->casc_port = ds->num_ports; - return 0; } @@ -903,7 +1018,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) for (port = 0; port < ds->num_ports; port++) { int mtu = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN; - if (dsa_is_cpu_port(priv->ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) mtu += VLAN_HLEN; policing[port].smax = 65535; /* Burst size in bytes */ @@ -1372,10 +1487,11 @@ static int sja1105et_is_fdb_entry_in_bin(struct sja1105_private *priv, int bin, int sja1105et_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; struct device *dev = ds->dev; int last_unused = -1; + int start, end, i; int bin, way, rc; bin = sja1105et_fdb_hash(priv, addr, vid); @@ -1387,7 +1503,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, * mask? If yes, we need to do nothing. If not, we need * to rewrite the entry by adding this port to it. */ - if (l2_lookup.destports & BIT(port)) + if ((l2_lookup.destports & BIT(port)) && l2_lookup.lockeds) return 0; l2_lookup.destports |= BIT(port); } else { @@ -1418,6 +1534,7 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, index, NULL, false); } } + l2_lookup.lockeds = true; l2_lookup.index = sja1105et_fdb_index(bin, way); rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, @@ -1426,6 +1543,29 @@ int sja1105et_fdb_add(struct dsa_switch *ds, int port, if (rc < 0) return rc; + /* Invalidate a dynamically learned entry if that exists */ + start = sja1105et_fdb_index(bin, 0); + end = sja1105et_fdb_index(bin, way); + + for (i = start; i < end; i++) { + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &tmp); + if (rc == -ENOENT) + continue; + if (rc) + return rc; + + if (tmp.macaddr != ether_addr_to_u64(addr) || tmp.vlanid != vid) + continue; + + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + i, NULL, false); + if (rc) + return rc; + + break; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1467,32 +1607,30 @@ int sja1105et_fdb_del(struct dsa_switch *ds, int port, int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid) { - struct sja1105_l2_lookup_entry l2_lookup = {0}; + struct sja1105_l2_lookup_entry l2_lookup = {0}, tmp; struct sja1105_private *priv = ds->priv; int rc, i; /* Search for an existing entry in the FDB table */ l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_aware) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.destports = BIT(port); + tmp = l2_lookup; + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, - SJA1105_SEARCH, &l2_lookup); - if (rc == 0) { - /* Found and this port is already in the entry's + SJA1105_SEARCH, &tmp); + if (rc == 0 && tmp.index != SJA1105_MAX_L2_LOOKUP_COUNT - 1) { + /* Found a static entry and this port is already in the entry's * port mask => job done */ - if (l2_lookup.destports & BIT(port)) + if ((tmp.destports & BIT(port)) && tmp.lockeds) return 0; + + l2_lookup = tmp; + /* l2_lookup.index is populated by the switch in case it * found something. */ @@ -1514,16 +1652,46 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port, dev_err(ds->dev, "FDB is full, cannot add entry.\n"); return -EINVAL; } - l2_lookup.lockeds = true; l2_lookup.index = i; skip_finding_an_index: + l2_lookup.lockeds = true; + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, l2_lookup.index, &l2_lookup, true); if (rc < 0) return rc; + /* The switch learns dynamic entries and looks up the FDB left to + * right. It is possible that our addition was concurrent with the + * dynamic learning of the same address, so now that the static entry + * has been installed, we are certain that address learning for this + * particular address has been turned off, so the dynamic entry either + * is in the FDB at an index smaller than the static one, or isn't (it + * can also be at a larger index, but in that case it is inactive + * because the static FDB entry will match first, and the dynamic one + * will eventually age out). Search for a dynamically learned address + * prior to our static one and invalidate it. + */ + tmp = l2_lookup; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + SJA1105_SEARCH, &tmp); + if (rc < 0) { + dev_err(ds->dev, + "port %d failed to read back entry for %pM vid %d: %pe\n", + port, addr, vid, ERR_PTR(rc)); + return rc; + } + + if (tmp.index < l2_lookup.index) { + rc = sja1105_dynamic_config_write(priv, BLK_IDX_L2_LOOKUP, + tmp.index, NULL, false); + if (rc < 0) + return rc; + } + return sja1105_static_fdb_change(priv, port, &l2_lookup, true); } @@ -1537,15 +1705,8 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port, l2_lookup.macaddr = ether_addr_to_u64(addr); l2_lookup.vlanid = vid; - l2_lookup.iotag = SJA1105_S_TAG; l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0); - if (priv->vlan_aware) { - l2_lookup.mask_vlanid = VLAN_VID_MASK; - l2_lookup.mask_iotag = BIT(0); - } else { - l2_lookup.mask_vlanid = 0; - l2_lookup.mask_iotag = 0; - } + l2_lookup.mask_vlanid = VLAN_VID_MASK; l2_lookup.destports = BIT(port); rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, @@ -1633,6 +1794,46 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port, return 0; } +static void sja1105_fast_age(struct dsa_switch *ds, int port) +{ + struct sja1105_private *priv = ds->priv; + int i; + + for (i = 0; i < SJA1105_MAX_L2_LOOKUP_COUNT; i++) { + struct sja1105_l2_lookup_entry l2_lookup = {0}; + u8 macaddr[ETH_ALEN]; + int rc; + + rc = sja1105_dynamic_config_read(priv, BLK_IDX_L2_LOOKUP, + i, &l2_lookup); + /* No fdb entry at i, not an issue */ + if (rc == -ENOENT) + continue; + if (rc) { + dev_err(ds->dev, "Failed to read FDB: %pe\n", + ERR_PTR(rc)); + return; + } + + if (!(l2_lookup.destports & BIT(port))) + continue; + + /* Don't delete static FDB entries */ + if (l2_lookup.lockeds) + continue; + + u64_to_ether_addr(l2_lookup.macaddr, macaddr); + + rc = sja1105_fdb_del(ds, port, macaddr, l2_lookup.vlanid); + if (rc) { + dev_err(ds->dev, + "Failed to delete FDB entry %pM vid %lld: %pe\n", + macaddr, l2_lookup.vlanid, ERR_PTR(rc)); + return; + } + } +} + static int sja1105_mdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb) { @@ -1741,6 +1942,7 @@ static int sja1105_bridge_member(struct dsa_switch *ds, int port, static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, u8 state) { + struct dsa_port *dp = dsa_to_port(ds, port); struct sja1105_private *priv = ds->priv; struct sja1105_mac_config_entry *mac; @@ -1766,12 +1968,12 @@ static void sja1105_bridge_stp_state_set(struct dsa_switch *ds, int port, case BR_STATE_LEARNING: mac[port].ingress = true; mac[port].egress = false; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; case BR_STATE_FORWARDING: mac[port].ingress = true; mac[port].egress = true; - mac[port].dyn_learn = !!(priv->learn_ena & BIT(port)); + mac[port].dyn_learn = dp->learning; break; default: dev_err(ds->dev, "invalid STP state: %d\n", state); @@ -2231,8 +2433,8 @@ static int sja1105_bridge_vlan_add(struct dsa_switch *ds, int port, return -EBUSY; } - /* Always install bridge VLANs as egress-tagged on the CPU port. */ - if (dsa_is_cpu_port(ds, port)) + /* Always install bridge VLANs as egress-tagged on CPU and DSA ports */ + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) flags = 0; rc = sja1105_vlan_add(priv, port, vlan->vid, flags); @@ -2401,6 +2603,7 @@ static int sja1105_setup(struct dsa_switch *ds) ds->num_tx_queues = SJA1105_NUM_TC; ds->mtu_enforcement_ingress = true; + ds->assisted_learning_on_cpu_port = true; rc = sja1105_devlink_setup(ds); if (rc < 0) @@ -2585,7 +2788,7 @@ static int sja1105_change_mtu(struct dsa_switch *ds, int port, int new_mtu) new_mtu += VLAN_ETH_HLEN + ETH_FCS_LEN; - if (dsa_is_cpu_port(ds, port)) + if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) new_mtu += VLAN_HLEN; policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries; @@ -2732,23 +2935,13 @@ static int sja1105_port_set_learning(struct sja1105_private *priv, int port, bool enabled) { struct sja1105_mac_config_entry *mac; - int rc; mac = priv->static_config.tables[BLK_IDX_MAC_CONFIG].entries; mac[port].dyn_learn = enabled; - rc = sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, - &mac[port], true); - if (rc) - return rc; - - if (enabled) - priv->learn_ena |= BIT(port); - else - priv->learn_ena &= ~BIT(port); - - return 0; + return sja1105_dynamic_config_write(priv, BLK_IDX_MAC_CONFIG, port, + &mac[port], true); } static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to, @@ -2883,6 +3076,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = { .port_fdb_dump = sja1105_fdb_dump, .port_fdb_add = sja1105_fdb_add, .port_fdb_del = sja1105_fdb_del, + .port_fast_age = sja1105_fast_age, .port_bridge_join = sja1105_bridge_join, .port_bridge_leave = sja1105_bridge_leave, .port_pre_bridge_flags = sja1105_port_pre_bridge_flags, diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c index 96cc5fc36eb5..87c906e744fb 100644 --- a/drivers/net/ethernet/3com/3c509.c +++ b/drivers/net/ethernet/3com/3c509.c @@ -302,7 +302,6 @@ static int el3_isa_match(struct device *pdev, unsigned int ndev) return -ENOMEM; SET_NETDEV_DEV(dev, pdev); - netdev_boot_setup_check(dev); if (!request_region(ioaddr, EL3_IO_EXTENT, "3c509-isa")) { free_netdev(dev); @@ -421,7 +420,6 @@ static int el3_pnp_probe(struct pnp_dev *pdev, const struct pnp_device_id *id) return -ENOMEM; } SET_NETDEV_DEV(dev, &pdev->dev); - netdev_boot_setup_check(dev); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_PNP); pnp_set_drvdata(pdev, dev); @@ -514,7 +512,9 @@ static int el3_common_init(struct net_device *dev) { struct el3_private *lp = netdev_priv(dev); int err; - const char *if_names[] = {"10baseT", "AUI", "undefined", "BNC"}; + static const char * const if_names[] = { + "10baseT", "AUI", "undefined", "BNC" + }; spin_lock_init(&lp->lock); @@ -588,7 +588,6 @@ static int el3_eisa_probe(struct device *device) } SET_NETDEV_DEV(dev, device); - netdev_boot_setup_check(dev); el3_dev_fill(dev, phys_addr, ioaddr, irq, if_port, EL3_EISA); eisa_set_drvdata (edev, dev); diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 47b4215bb93b..8d90fed5d33e 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -407,7 +407,7 @@ MODULE_PARM_DESC(max_interrupt_work, "3c515 maximum events handled per interrupt /* we will need locking (and refcounting) if we ever use it for more */ static LIST_HEAD(root_corkscrew_dev); -int init_module(void) +static int corkscrew_init_module(void) { int found = 0; if (debug >= 0) @@ -416,6 +416,7 @@ int init_module(void) found++; return found ? 0 : -ENODEV; } +module_init(corkscrew_init_module); #else struct net_device *tc515_probe(int unit) diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig index a52a3740f0c9..706bd59bf645 100644 --- a/drivers/net/ethernet/3com/Kconfig +++ b/drivers/net/ethernet/3com/Kconfig @@ -34,6 +34,7 @@ config EL3 config 3C515 tristate "3c515 ISA \"Fast EtherLink\"" depends on ISA && ISA_DMA_API && !PPC32 + select NETDEV_LEGACY_INIT help If you have a 3Com ISA EtherLink XL "Corkscrew" 3c515 Fast Ethernet network card, say Y here. diff --git a/drivers/net/ethernet/8390/Kconfig b/drivers/net/ethernet/8390/Kconfig index 9f4b302fd2ce..a4130e643342 100644 --- a/drivers/net/ethernet/8390/Kconfig +++ b/drivers/net/ethernet/8390/Kconfig @@ -102,6 +102,7 @@ config MCF8390 config NE2000 tristate "NE2000/NE1000 support" depends on (ISA || (Q40 && m) || MACH_TX49XX || ATARI_ETHERNEC) + select NETDEV_LEGACY_INIT if ISA select CRC32 help If you have a network (Ethernet) card of this type, say Y here. @@ -169,6 +170,7 @@ config STNIC config ULTRA tristate "SMC Ultra support" depends on ISA + select NETDEV_LEGACY_INIT select CRC32 help If you have a network (Ethernet) card of this type, say Y here. @@ -186,6 +188,7 @@ config ULTRA config WD80x3 tristate "WD80*3 support" depends on ISA + select NETDEV_LEGACY_INIT select CRC32 help If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/8390/apne.c b/drivers/net/ethernet/8390/apne.c index fe6c834c422e..da1ae37a9d73 100644 --- a/drivers/net/ethernet/8390/apne.c +++ b/drivers/net/ethernet/8390/apne.c @@ -75,7 +75,6 @@ #define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ -struct net_device * __init apne_probe(int unit); static int apne_probe1(struct net_device *dev, int ioaddr); static void apne_reset_8390(struct net_device *dev); @@ -120,7 +119,7 @@ static u32 apne_msg_enable; module_param_named(msg_enable, apne_msg_enable, uint, 0444); MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bitmap)"); -struct net_device * __init apne_probe(int unit) +static struct net_device * __init apne_probe(void) { struct net_device *dev; struct ei_device *ei_local; @@ -150,10 +149,6 @@ struct net_device * __init apne_probe(int unit) dev = alloc_ei_netdev(); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } ei_local = netdev_priv(dev); ei_local->msg_enable = apne_msg_enable; @@ -554,12 +549,11 @@ static irqreturn_t apne_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -#ifdef MODULE static struct net_device *apne_dev; static int __init apne_module_init(void) { - apne_dev = apne_probe(-1); + apne_dev = apne_probe(); return PTR_ERR_OR_ZERO(apne_dev); } @@ -579,7 +573,6 @@ static void __exit apne_module_exit(void) } module_init(apne_module_init); module_exit(apne_module_exit); -#endif static int init_pcmcia(void) { diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c index 9595dd1f32ca..6c6bdd5913ec 100644 --- a/drivers/net/ethernet/8390/ax88796.c +++ b/drivers/net/ethernet/8390/ax88796.c @@ -101,6 +101,13 @@ static inline struct ax_device *to_ax_dev(struct net_device *dev) return (struct ax_device *)(ei_local + 1); } +void ax_NS8390_reinit(struct net_device *dev) +{ + ax_NS8390_init(dev, 1); +} + +EXPORT_SYMBOL_GPL(ax_NS8390_reinit); + /* * ax_initial_check * diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index e9756d0ea5b8..53660bc8d6ff 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -923,7 +923,7 @@ static void __init ne_add_devices(void) } #ifdef MODULE -int __init init_module(void) +static int __init ne_init(void) { int retval; ne_add_devices(); @@ -940,6 +940,7 @@ int __init init_module(void) ne_loop_rm_unreg(0); return retval; } +module_init(ne_init); #else /* MODULE */ static int __init ne_init(void) { @@ -951,6 +952,7 @@ static int __init ne_init(void) } module_init(ne_init); +#ifdef CONFIG_NETDEV_LEGACY_INIT struct net_device * __init ne_probe(int unit) { int this_dev; @@ -991,6 +993,7 @@ struct net_device * __init ne_probe(int unit) return ERR_PTR(-ENODEV); } +#endif #endif /* MODULE */ static void __exit ne_exit(void) diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c index 1d8ed7357b7f..0890fa493f70 100644 --- a/drivers/net/ethernet/8390/smc-ultra.c +++ b/drivers/net/ethernet/8390/smc-ultra.c @@ -522,7 +522,6 @@ static void ultra_pio_input(struct net_device *dev, int count, /* We know skbuffs are padded to at least word alignment. */ insw(ioaddr + IOPD, buf, (count+1)>>1); } - static void ultra_pio_output(struct net_device *dev, int count, const unsigned char *buf, const int start_page) { @@ -572,8 +571,7 @@ MODULE_LICENSE("GPL"); /* This is set up so that only a single autoprobe takes place per call. ISA device autoprobes on a running machine are not recommended. */ -int __init -init_module(void) +static int __init ultra_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -600,6 +598,7 @@ init_module(void) return 0; return -ENXIO; } +module_init(ultra_init_module); static void cleanup_card(struct net_device *dev) { @@ -613,8 +612,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void __exit -cleanup_module(void) +static void __exit ultra_cleanup_module(void) { int this_dev; @@ -627,4 +625,5 @@ cleanup_module(void) } } } +module_exit(ultra_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c index c834123560f1..263a942d81fa 100644 --- a/drivers/net/ethernet/8390/wd.c +++ b/drivers/net/ethernet/8390/wd.c @@ -519,7 +519,7 @@ MODULE_LICENSE("GPL"); /* This is set up so that only a single autoprobe takes place per call. ISA device autoprobes on a running machine are not recommended. */ -int __init init_module(void) +static int __init wd_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -548,6 +548,7 @@ int __init init_module(void) return 0; return -ENXIO; } +module_init(wd_init_module); static void cleanup_card(struct net_device *dev) { @@ -556,8 +557,7 @@ static void cleanup_card(struct net_device *dev) iounmap(ei_status.mem); } -void __exit -cleanup_module(void) +static void __exit wd_cleanup_module(void) { int this_dev; @@ -570,4 +570,5 @@ cleanup_module(void) } } } +module_exit(wd_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/8390/xsurf100.c b/drivers/net/ethernet/8390/xsurf100.c index e2c963821ffe..fe7a74707aa4 100644 --- a/drivers/net/ethernet/8390/xsurf100.c +++ b/drivers/net/ethernet/8390/xsurf100.c @@ -22,8 +22,6 @@ #define XS100_8390_DATA_WRITE32_BASE 0x0C80 #define XS100_8390_DATA_AREA_SIZE 0x80 -#define __NS8390_init ax_NS8390_init - /* force unsigned long back to 'void __iomem *' */ #define ax_convert_addr(_a) ((void __force __iomem *)(_a)) @@ -42,10 +40,7 @@ /* Ensure we have our RCR base value */ #define AX88796_PLATFORM -static unsigned char version[] = - "ax88796.c: Copyright 2005,2007 Simtec Electronics\n"; - -#include "lib8390.c" +#include "8390.h" /* from ne.c */ #define NE_CMD EI_SHIFT(0x00) @@ -232,7 +227,7 @@ static void xs100_block_output(struct net_device *dev, int count, if (jiffies - dma_start > 2 * HZ / 100) { /* 20ms */ netdev_warn(dev, "timeout waiting for Tx RDC.\n"); ei_local->reset_8390(dev); - ax_NS8390_init(dev, 1); + ax_NS8390_reinit(dev); break; } } diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig index d0b0609bbe23..c6a3abec86f5 100644 --- a/drivers/net/ethernet/amd/Kconfig +++ b/drivers/net/ethernet/amd/Kconfig @@ -46,6 +46,7 @@ config AMD8111_ETH config LANCE tristate "AMD LANCE and PCnet (AT1500 and NE2100) support" depends on ISA && ISA_DMA_API && !ARM && !PPC32 + select NETDEV_LEGACY_INIT help If you have a network (Ethernet) card of this type, say Y here. Some LinkSys cards are of this type. @@ -132,6 +133,7 @@ config PCMCIA_NMCLAN config NI65 tristate "NI6510 support" depends on ISA && ISA_DMA_API && !ARM && !PPC32 + select NETDEV_LEGACY_INIT help If you have a network (Ethernet) card of this type, say Y here. diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c index 36f54d13a2eb..9d2f49fd945e 100644 --- a/drivers/net/ethernet/amd/atarilance.c +++ b/drivers/net/ethernet/amd/atarilance.c @@ -367,7 +367,7 @@ static void *slow_memcpy( void *dst, const void *src, size_t len ) } -struct net_device * __init atarilance_probe(int unit) +struct net_device * __init atarilance_probe(void) { int i; static int found; @@ -382,10 +382,6 @@ struct net_device * __init atarilance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } for( i = 0; i < N_LANCE_ADDR; ++i ) { if (lance_probe1( dev, &lance_addr_list[i] )) { @@ -1137,13 +1133,11 @@ static int lance_set_mac_address( struct net_device *dev, void *addr ) return 0; } - -#ifdef MODULE static struct net_device *atarilance_dev; static int __init atarilance_module_init(void) { - atarilance_dev = atarilance_probe(-1); + atarilance_dev = atarilance_probe(); return PTR_ERR_OR_ZERO(atarilance_dev); } @@ -1155,4 +1149,3 @@ static void __exit atarilance_module_exit(void) } module_init(atarilance_module_init); module_exit(atarilance_module_exit); -#endif /* MODULE */ diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c index 2178e6b89dbd..945bf1d87507 100644 --- a/drivers/net/ethernet/amd/lance.c +++ b/drivers/net/ethernet/amd/lance.c @@ -327,7 +327,7 @@ MODULE_PARM_DESC(dma, "LANCE/PCnet ISA DMA channel (ignored for some devices)"); MODULE_PARM_DESC(irq, "LANCE/PCnet IRQ number (ignored for some devices)"); MODULE_PARM_DESC(lance_debug, "LANCE/PCnet debug level (0-7)"); -int __init init_module(void) +static int __init lance_init_module(void) { struct net_device *dev; int this_dev, found = 0; @@ -356,6 +356,7 @@ int __init init_module(void) return 0; return -ENXIO; } +module_init(lance_init_module); static void cleanup_card(struct net_device *dev) { @@ -368,7 +369,7 @@ static void cleanup_card(struct net_device *dev) kfree(lp); } -void __exit cleanup_module(void) +static void __exit lance_cleanup_module(void) { int this_dev; @@ -381,6 +382,7 @@ void __exit cleanup_module(void) } } } +module_exit(lance_cleanup_module); #endif /* MODULE */ MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c index 3f2e4cdd0b83..da97fccea9ea 100644 --- a/drivers/net/ethernet/amd/mvme147.c +++ b/drivers/net/ethernet/amd/mvme147.c @@ -68,7 +68,7 @@ static const struct net_device_ops lance_netdev_ops = { }; /* Initialise the one and only on-board 7990 */ -struct net_device * __init mvme147lance_probe(int unit) +static struct net_device * __init mvme147lance_probe(void) { struct net_device *dev; static int called; @@ -86,9 +86,6 @@ struct net_device * __init mvme147lance_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) - sprintf(dev->name, "eth%d", unit); - /* Fill the dev fields */ dev->base_addr = (unsigned long)MVME147_LANCE_BASE; dev->netdev_ops = &lance_netdev_ops; @@ -179,22 +176,21 @@ static int m147lance_close(struct net_device *dev) return 0; } -#ifdef MODULE MODULE_LICENSE("GPL"); static struct net_device *dev_mvme147_lance; -int __init init_module(void) +static int __init m147lance_init(void) { - dev_mvme147_lance = mvme147lance_probe(-1); + dev_mvme147_lance = mvme147lance_probe(); return PTR_ERR_OR_ZERO(dev_mvme147_lance); } +module_init(m147lance_init); -void __exit cleanup_module(void) +static void __exit m147lance_exit(void) { struct m147lance_private *lp = netdev_priv(dev_mvme147_lance); unregister_netdev(dev_mvme147_lance); free_pages(lp->ram, 3); free_netdev(dev_mvme147_lance); } - -#endif /* MODULE */ +module_exit(m147lance_exit); diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c index 5c1cfb0c4a42..b5df7ad5a83f 100644 --- a/drivers/net/ethernet/amd/ni65.c +++ b/drivers/net/ethernet/amd/ni65.c @@ -1230,18 +1230,20 @@ MODULE_PARM_DESC(irq, "ni6510 IRQ number (ignored for some cards)"); MODULE_PARM_DESC(io, "ni6510 I/O base address"); MODULE_PARM_DESC(dma, "ni6510 ISA DMA channel (ignored for some cards)"); -int __init init_module(void) +static int __init ni65_init_module(void) { dev_ni65 = ni65_probe(-1); return PTR_ERR_OR_ZERO(dev_ni65); } +module_init(ni65_init_module); -void __exit cleanup_module(void) +static void __exit ni65_cleanup_module(void) { unregister_netdev(dev_ni65); cleanup_card(dev_ni65); free_netdev(dev_ni65); } +module_exit(ni65_cleanup_module); #endif /* MODULE */ MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c index f8d7a9387a56..4a845bc071b2 100644 --- a/drivers/net/ethernet/amd/sun3lance.c +++ b/drivers/net/ethernet/amd/sun3lance.c @@ -245,7 +245,7 @@ static void set_multicast_list( struct net_device *dev ); /************************* End of Prototypes **************************/ -struct net_device * __init sun3lance_probe(int unit) +static struct net_device * __init sun3lance_probe(void) { struct net_device *dev; static int found; @@ -272,10 +272,6 @@ struct net_device * __init sun3lance_probe(int unit) dev = alloc_etherdev(sizeof(struct lance_private)); if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } if (!lance_probe(dev)) goto out; @@ -924,17 +920,16 @@ static void set_multicast_list( struct net_device *dev ) } -#ifdef MODULE - static struct net_device *sun3lance_dev; -int __init init_module(void) +static int __init sun3lance_init(void) { - sun3lance_dev = sun3lance_probe(-1); + sun3lance_dev = sun3lance_probe(); return PTR_ERR_OR_ZERO(sun3lance_dev); } +module_init(sun3lance_init); -void __exit cleanup_module(void) +static void __exit sun3lance_cleanup(void) { unregister_netdev(sun3lance_dev); #ifdef CONFIG_SUN3 @@ -942,6 +937,4 @@ void __exit cleanup_module(void) #endif free_netdev(sun3lance_dev); } - -#endif /* MODULE */ - +module_exit(sun3lance_cleanup); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index 1a6ec1a12d53..b5d954cb409a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2669,7 +2669,8 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } /* Allocated memory for FW statistics */ - if (bnx2x_alloc_fw_stats_mem(bp)) + rc = bnx2x_alloc_fw_stats_mem(bp); + if (rc) LOAD_ERROR_EXIT(bp, load_error0); /* request pf to initialize status blocks */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4a92ea7fa7d2..865fcb8cf29f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -3163,6 +3163,58 @@ static int bnxt_alloc_tx_rings(struct bnxt *bp) return 0; } +static void bnxt_free_cp_arrays(struct bnxt_cp_ring_info *cpr) +{ + kfree(cpr->cp_desc_ring); + cpr->cp_desc_ring = NULL; + kfree(cpr->cp_desc_mapping); + cpr->cp_desc_mapping = NULL; +} + +static int bnxt_alloc_cp_arrays(struct bnxt_cp_ring_info *cpr, int n) +{ + cpr->cp_desc_ring = kcalloc(n, sizeof(*cpr->cp_desc_ring), GFP_KERNEL); + if (!cpr->cp_desc_ring) + return -ENOMEM; + cpr->cp_desc_mapping = kcalloc(n, sizeof(*cpr->cp_desc_mapping), + GFP_KERNEL); + if (!cpr->cp_desc_mapping) + return -ENOMEM; + return 0; +} + +static void bnxt_free_all_cp_arrays(struct bnxt *bp) +{ + int i; + + if (!bp->bnapi) + return; + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + + if (!bnapi) + continue; + bnxt_free_cp_arrays(&bnapi->cp_ring); + } +} + +static int bnxt_alloc_all_cp_arrays(struct bnxt *bp) +{ + int i, n = bp->cp_nr_pages; + + for (i = 0; i < bp->cp_nr_rings; i++) { + struct bnxt_napi *bnapi = bp->bnapi[i]; + int rc; + + if (!bnapi) + continue; + rc = bnxt_alloc_cp_arrays(&bnapi->cp_ring, n); + if (rc) + return rc; + } + return 0; +} + static void bnxt_free_cp_rings(struct bnxt *bp) { int i; @@ -3190,6 +3242,7 @@ static void bnxt_free_cp_rings(struct bnxt *bp) if (cpr2) { ring = &cpr2->cp_ring_struct; bnxt_free_ring(bp, &ring->ring_mem); + bnxt_free_cp_arrays(cpr2); kfree(cpr2); cpr->cp_ring_arr[j] = NULL; } @@ -3208,6 +3261,12 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) if (!cpr) return NULL; + rc = bnxt_alloc_cp_arrays(cpr, bp->cp_nr_pages); + if (rc) { + bnxt_free_cp_arrays(cpr); + kfree(cpr); + return NULL; + } ring = &cpr->cp_ring_struct; rmem = &ring->ring_mem; rmem->nr_pages = bp->cp_nr_pages; @@ -3218,6 +3277,7 @@ static struct bnxt_cp_ring_info *bnxt_alloc_cp_sub_ring(struct bnxt *bp) rc = bnxt_alloc_ring(bp, rmem); if (rc) { bnxt_free_ring(bp, rmem); + bnxt_free_cp_arrays(cpr); kfree(cpr); cpr = NULL; } @@ -3650,9 +3710,15 @@ void bnxt_set_ring_params(struct bnxt *bp) if (jumbo_factor > agg_factor) agg_factor = jumbo_factor; } - agg_ring_size = ring_size * agg_factor; + if (agg_factor) { + if (ring_size > BNXT_MAX_RX_DESC_CNT_JUM_ENA) { + ring_size = BNXT_MAX_RX_DESC_CNT_JUM_ENA; + netdev_warn(bp->dev, "RX ring size reduced from %d to %d because the jumbo ring is now enabled\n", + bp->rx_ring_size, ring_size); + bp->rx_ring_size = ring_size; + } + agg_ring_size = ring_size * agg_factor; - if (agg_ring_size) { bp->rx_agg_nr_pages = bnxt_calc_nr_ring_pages(agg_ring_size, RX_DESC_CNT); if (bp->rx_agg_nr_pages > MAX_RX_AGG_PAGES) { @@ -4253,6 +4319,7 @@ static void bnxt_free_mem(struct bnxt *bp, bool irq_re_init) bnxt_free_tx_rings(bp); bnxt_free_rx_rings(bp); bnxt_free_cp_rings(bp); + bnxt_free_all_cp_arrays(bp); bnxt_free_ntp_fltrs(bp, irq_re_init); if (irq_re_init) { bnxt_free_ring_stats(bp); @@ -4373,6 +4440,10 @@ static int bnxt_alloc_mem(struct bnxt *bp, bool irq_re_init) goto alloc_mem_err; } + rc = bnxt_alloc_all_cp_arrays(bp); + if (rc) + goto alloc_mem_err; + bnxt_init_ring_struct(bp); rc = bnxt_alloc_rx_rings(bp); @@ -12168,9 +12239,8 @@ static void bnxt_fw_reset_task(struct work_struct *work) /* Make sure fw_reset_state is 0 before clearing the flag */ smp_mb__before_atomic(); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_ulp_start(bp, rc); - if (!rc) - bnxt_reenable_sriov(bp); + bnxt_ulp_start(bp, 0); + bnxt_reenable_sriov(bp); bnxt_vf_reps_alloc(bp); bnxt_vf_reps_open(bp); bnxt_ptp_reapply_pps(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index e379c48c1df9..9c3324e76ff7 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -596,15 +596,17 @@ struct nqe_cn { #define MAX_TPA_SEGS_P5 0x3f #if (BNXT_PAGE_SHIFT == 16) -#define MAX_RX_PAGES 1 +#define MAX_RX_PAGES_AGG_ENA 1 +#define MAX_RX_PAGES 4 #define MAX_RX_AGG_PAGES 4 #define MAX_TX_PAGES 1 -#define MAX_CP_PAGES 8 +#define MAX_CP_PAGES 16 #else -#define MAX_RX_PAGES 8 +#define MAX_RX_PAGES_AGG_ENA 8 +#define MAX_RX_PAGES 32 #define MAX_RX_AGG_PAGES 32 #define MAX_TX_PAGES 8 -#define MAX_CP_PAGES 64 +#define MAX_CP_PAGES 128 #endif #define RX_DESC_CNT (BNXT_PAGE_SIZE / sizeof(struct rx_bd)) @@ -622,6 +624,7 @@ struct nqe_cn { #define HW_CMPD_RING_SIZE (sizeof(struct tx_cmp) * CP_DESC_CNT) #define BNXT_MAX_RX_DESC_CNT (RX_DESC_CNT * MAX_RX_PAGES - 1) +#define BNXT_MAX_RX_DESC_CNT_JUM_ENA (RX_DESC_CNT * MAX_RX_PAGES_AGG_ENA - 1) #define BNXT_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) #define BNXT_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) @@ -972,11 +975,11 @@ struct bnxt_cp_ring_info { struct dim dim; union { - struct tx_cmp *cp_desc_ring[MAX_CP_PAGES]; - struct nqe_cn *nq_desc_ring[MAX_CP_PAGES]; + struct tx_cmp **cp_desc_ring; + struct nqe_cn **nq_desc_ring; }; - dma_addr_t cp_desc_mapping[MAX_CP_PAGES]; + dma_addr_t *cp_desc_mapping; struct bnxt_stats_mem stats; u32 hw_stats_ctx_id; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 64381be935a8..2cd8bb37e641 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -743,14 +743,17 @@ static void bnxt_dl_params_unregister(struct bnxt *bp) int bnxt_dl_register(struct bnxt *bp) { + const struct devlink_ops *devlink_ops; struct devlink_port_attrs attrs = {}; struct devlink *dl; int rc; if (BNXT_PF(bp)) - dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_dl_ops; else - dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl)); + devlink_ops = &bnxt_vf_dl_ops; + + dl = devlink_alloc(devlink_ops, sizeof(struct bnxt_dl), &bp->pdev->dev); if (!dl) { netdev_warn(bp->dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -763,7 +766,7 @@ int bnxt_dl_register(struct bnxt *bp) bp->hwrm_spec_code > 0x10803) bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY; - rc = devlink_register(dl, &bp->pdev->dev); + rc = devlink_register(dl); if (rc) { netdev_warn(bp->dev, "devlink_register failed. rc=%d\n", rc); goto err_dl_free; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 786ca51e669b..485252d12245 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -768,8 +768,13 @@ static void bnxt_get_ringparam(struct net_device *dev, { struct bnxt *bp = netdev_priv(dev); - ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; - ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + if (bp->flags & BNXT_FLAG_AGG_RINGS) { + ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT_JUM_ENA; + ering->rx_jumbo_max_pending = BNXT_MAX_RX_JUM_DESC_CNT; + } else { + ering->rx_max_pending = BNXT_MAX_RX_DESC_CNT; + ering->rx_jumbo_max_pending = 0; + } ering->tx_max_pending = BNXT_MAX_TX_DESC_CNT; ering->rx_pending = bp->rx_ring_size; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index e33e311e2341..7f55ebbfd04b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -560,6 +560,12 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) bnxt_ptp_get_current_time(bp); ptp->next_period = now + HZ; + if (time_after_eq(now, ptp->next_overflow_check)) { + spin_lock_bh(&ptp->ptp_lock); + timecounter_read(&ptp->tc); + spin_unlock_bh(&ptp->ptp_lock); + ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; + } return HZ; } @@ -713,6 +719,7 @@ int bnxt_ptp_init(struct bnxt *bp) ptp->cc.shift = 0; ptp->cc.mult = 1; + ptp->next_overflow_check = jiffies + BNXT_PHC_OVERFLOW_PERIOD; timecounter_init(&ptp->tc, &ptp->cc, ktime_to_ns(ktime_get_real())); ptp->ptp_info = bnxt_ptp_caps; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 88923346ab50..cc3cdbaab6cf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -83,6 +83,10 @@ struct bnxt_ptp_cfg { u64 current_time; u64 old_time; unsigned long next_period; + unsigned long next_overflow_check; + /* 48-bit PHC overflows in 78 hours. Check overflow every 19 hours. */ + #define BNXT_PHC_OVERFLOW_PERIOD (19 * 3600 * HZ) + u16 tx_seqid; struct bnxt *bp; atomic_t tx_avail; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 63e2237e0cb4..8507198992df 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3972,8 +3972,6 @@ static int bcmgenet_probe(struct platform_device *pdev) */ dev->needed_headroom += 64; - netdev_boot_setup_check(dev); - priv->dev = dev; priv->pdev = pdev; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index a4a5209a9386..2907e13b9df6 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1457,7 +1457,7 @@ static void free_netsgbuf(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -1500,7 +1500,7 @@ static void free_netsgbuf_with_resp(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -3750,7 +3750,8 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) } devlink = devlink_alloc(&liquidio_devlink_ops, - sizeof(struct lio_devlink_priv)); + sizeof(struct lio_devlink_priv), + &octeon_dev->pci_dev->dev); if (!devlink) { dev_err(&octeon_dev->pci_dev->dev, "devlink alloc failed\n"); goto setup_nic_dev_free; @@ -3759,7 +3760,7 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio_devlink = devlink_priv(devlink); lio_devlink->oct = octeon_dev; - if (devlink_register(devlink, &octeon_dev->pci_dev->dev)) { + if (devlink_register(devlink)) { devlink_free(devlink); dev_err(&octeon_dev->pci_dev->dev, "devlink registration failed\n"); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 3085dd455a17..c6fe0f2a4d0e 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -843,7 +843,7 @@ static void free_netsgbuf(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; @@ -887,7 +887,7 @@ static void free_netsgbuf_with_resp(void *buf) while (frags--) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - pci_unmap_page((lio->oct_dev)->pci_dev, + dma_unmap_page(&lio->oct_dev->pci_dev->dev, g->sg[(i >> 2)].ptr[(i & 3)], skb_frag_size(frag), DMA_TO_DEVICE); i++; diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 9361f964bb9b..691e1475d55e 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1322,18 +1322,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "Unable to get usable DMA configuration\n"); goto err_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); - goto err_release_regions; - } - /* MAP PF's configuration registers */ nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); if (!nic->reg_base) { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index efaaa57d4ed5..d1667b759522 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2130,18 +2130,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_disable_device; } - err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) { dev_err(dev, "Unable to get usable DMA configuration\n"); goto err_release_regions; } - err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); - if (err) { - dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); - goto err_release_regions; - } - qcount = netif_get_num_default_rss_queues(); /* Restrict multiqset support only for host bound VFs */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 6260b3bebd2b..786ceae34488 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1441,7 +1441,7 @@ static int cxgb4_set_hash_filter(struct net_device *dev, } else if (iconf & USE_ENC_IDX_F) { if (f->fs.val.encap_vld) { struct port_info *pi = netdev_priv(f->dev); - u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; + static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; /* allocate MPS TCAM entry */ ret = t4_alloc_encap_mac_filt(adapter, pi->viid, @@ -1688,7 +1688,7 @@ int __cxgb4_set_filter(struct net_device *dev, int ftid, } else if (iconf & USE_ENC_IDX_F) { if (f->fs.val.encap_vld) { struct port_info *pi = netdev_priv(f->dev); - u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; + static const u8 match_all_mac[] = { 0, 0, 0, 0, 0, 0 }; /* allocate MPS TCAM entry */ ret = t4_alloc_encap_mac_filt(adapter, pi->viid, diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig index d8af9e64dd1e..dac1764ba740 100644 --- a/drivers/net/ethernet/cirrus/Kconfig +++ b/drivers/net/ethernet/cirrus/Kconfig @@ -6,7 +6,7 @@ config NET_VENDOR_CIRRUS bool "Cirrus devices" default y - depends on ISA || EISA || ARM || MAC + depends on ISA || EISA || ARM || MAC || COMPILE_TEST help If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,9 +18,16 @@ config NET_VENDOR_CIRRUS if NET_VENDOR_CIRRUS config CS89x0 - tristate "CS89x0 support" - depends on ISA || EISA || ARM + tristate + +config CS89x0_ISA + tristate "CS89x0 ISA driver support" + depends on HAS_IOPORT_MAP + depends on ISA depends on !PPC32 + depends on CS89x0_PLATFORM=n + select NETDEV_LEGACY_INIT + select CS89x0 help Support for CS89x0 chipset based Ethernet cards. If you have a network (Ethernet) card of this type, say Y and read the file @@ -30,15 +37,15 @@ config CS89x0 will be called cs89x0. config CS89x0_PLATFORM - bool "CS89x0 platform driver support" if HAS_IOPORT_MAP - default !HAS_IOPORT_MAP - depends on CS89x0 + tristate "CS89x0 platform driver support" + depends on ARM || COMPILE_TEST + select CS89x0 help - Say Y to compile the cs89x0 driver as a platform driver. This - makes this driver suitable for use on certain evaluation boards - such as the iMX21ADS. + Say Y to compile the cs89x0 platform driver. This makes this driver + suitable for use on certain evaluation boards such as the iMX21ADS. - If you are unsure, say N. + To compile this driver as a module, choose M here. The module + will be called cs89x0. config EP93XX_ETH tristate "EP93xx Ethernet support" diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c index 33ace3307059..d0c4c8b7a15a 100644 --- a/drivers/net/ethernet/cirrus/cs89x0.c +++ b/drivers/net/ethernet/cirrus/cs89x0.c @@ -104,7 +104,7 @@ static char version[] __initdata = * them to system IRQ numbers. This mapping is card specific and is set to * the configuration of the Cirrus Eval board for this chip. */ -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) static unsigned int netcard_portlist[] __used __initdata = { 0x300, 0x320, 0x340, 0x360, 0x200, 0x220, 0x240, 0x260, 0x280, 0x2a0, 0x2c0, 0x2e0, 0 @@ -292,7 +292,7 @@ write_irq(struct net_device *dev, int chip_type, int irq) int i; if (chip_type == CS8900) { -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) /* Search the mapping table for the corresponding IRQ pin. */ for (i = 0; i != ARRAY_SIZE(cs8900_irq_map); i++) if (cs8900_irq_map[i] == irq) @@ -859,7 +859,7 @@ net_open(struct net_device *dev) goto bad_out; } } else { -#if !defined(CONFIG_CS89x0_PLATFORM) +#if IS_ENABLED(CONFIG_CS89x0_ISA) if (((1 << dev->irq) & lp->irq_map) == 0) { pr_err("%s: IRQ %d is not in our map of allowable IRQs, which is %x\n", dev->name, dev->irq, lp->irq_map); @@ -1523,7 +1523,7 @@ cs89x0_probe1(struct net_device *dev, void __iomem *ioaddr, int modular) dev->irq = i; } else { i = lp->isa_config & INT_NO_MASK; -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) if (lp->chip_type == CS8900) { /* Translate the IRQ using the IRQ mapping table. */ if (i >= ARRAY_SIZE(cs8900_irq_map)) @@ -1576,7 +1576,7 @@ out1: return retval; } -#ifndef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_ISA) /* * This function converts the I/O port address used by the cs89x0_probe() and * init_module() functions to the I/O memory address used by the @@ -1682,11 +1682,7 @@ out: pr_warn("no cs8900 or cs8920 detected. Be sure to disable PnP with SETUP\n"); return ERR_PTR(err); } -#endif -#endif - -#if defined(MODULE) && !defined(CONFIG_CS89x0_PLATFORM) - +#else static struct net_device *dev_cs89x0; /* Support the 'debug' module parm even if we're compiled for non-debug to @@ -1757,9 +1753,9 @@ MODULE_LICENSE("GPL"); * (hw or software util) */ -int __init init_module(void) +static int __init cs89x0_isa_init_module(void) { - struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); + struct net_device *dev; struct net_local *lp; int ret = 0; @@ -1768,6 +1764,7 @@ int __init init_module(void) #else debug = 0; #endif + dev = alloc_etherdev(sizeof(struct net_local)); if (!dev) return -ENOMEM; @@ -1826,9 +1823,9 @@ out: free_netdev(dev); return ret; } +module_init(cs89x0_isa_init_module); -void __exit -cleanup_module(void) +static void __exit cs89x0_isa_cleanup_module(void) { struct net_local *lp = netdev_priv(dev_cs89x0); @@ -1838,9 +1835,11 @@ cleanup_module(void) release_region(dev_cs89x0->base_addr, NETCARD_IO_EXTENT); free_netdev(dev_cs89x0); } -#endif /* MODULE && !CONFIG_CS89x0_PLATFORM */ +module_exit(cs89x0_isa_cleanup_module); +#endif /* MODULE */ +#endif /* CONFIG_CS89x0_ISA */ -#ifdef CONFIG_CS89x0_PLATFORM +#if IS_ENABLED(CONFIG_CS89x0_PLATFORM) static int __init cs89x0_platform_probe(struct platform_device *pdev) { struct net_device *dev = alloc_etherdev(sizeof(struct net_local)); diff --git a/drivers/net/ethernet/dec/tulip/media.c b/drivers/net/ethernet/dec/tulip/media.c index 011604787b8e..55d6fc99f40b 100644 --- a/drivers/net/ethernet/dec/tulip/media.c +++ b/drivers/net/ethernet/dec/tulip/media.c @@ -362,7 +362,7 @@ void tulip_select_media(struct net_device *dev, int startup) iowrite32(0x33, ioaddr + CSR12); new_csr6 = 0x01860000; /* Trigger autonegotiation. */ - iowrite32(startup ? 0x0201F868 : 0x0001F868, ioaddr + 0xB8); + iowrite32(0x0001F868, ioaddr + 0xB8); } else { iowrite32(0x32, ioaddr + CSR12); new_csr6 = 0x00420000; diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c index 07a48f6bf0fa..85b99099c6b9 100644 --- a/drivers/net/ethernet/dec/tulip/winbond-840.c +++ b/drivers/net/ethernet/dec/tulip/winbond-840.c @@ -357,7 +357,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) int i, option = find_cnt < MAX_UNITS ? options[find_cnt] : 0; void __iomem *ioaddr; - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; pci_set_master(pdev); @@ -379,7 +379,7 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = pci_iomap(pdev, TULIP_BAR, netdev_res_size); if (!ioaddr) - goto err_out_free_res; + goto err_out_netdev; for (i = 0; i < 3; i++) ((__le16 *)dev->dev_addr)[i] = cpu_to_le16(eeprom_read(ioaddr, i)); @@ -458,8 +458,6 @@ static int w840_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_out_cleardev: pci_iounmap(pdev, ioaddr); -err_out_free_res: - pci_release_regions(pdev); err_out_netdev: free_netdev (dev); return -ENODEV; @@ -1526,7 +1524,6 @@ static void w840_remove1(struct pci_dev *pdev) if (dev) { struct netdev_private *np = netdev_priv(dev); unregister_netdev(dev); - pci_release_regions(pdev); pci_iounmap(pdev, np->base_addr); free_netdev(dev); } diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile index c2ef74052ef8..3d9842af7f10 100644 --- a/drivers/net/ethernet/freescale/dpaa2/Makefile +++ b/drivers/net/ethernet/freescale/dpaa2/Makefile @@ -11,7 +11,7 @@ fsl-dpaa2-eth-objs := dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o dpa fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o fsl-dpaa2-ptp-objs := dpaa2-ptp.o dprtc.o -fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o +fsl-dpaa2-switch-objs := dpaa2-switch.o dpaa2-switch-ethtool.o dpsw.o dpaa2-switch-flower.o dpaa2-mac.o dpmac.o # Needed by the tracing framework CFLAGS_dpaa2-eth.o := -I$(src) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c index 833696245565..605a39f892b9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.c @@ -68,7 +68,7 @@ dpaa2_eth_dl_trap_item_lookup(struct dpaa2_eth_priv *priv, u16 trap_id) struct dpaa2_eth_trap_item *dpaa2_eth_dl_get_trap(struct dpaa2_eth_priv *priv, struct dpaa2_fapr *fapr) { - struct dpaa2_faf_error_bit { + static const struct dpaa2_faf_error_bit { int position; enum devlink_trap_generic_id trap_id; } faf_bits[] = { @@ -196,7 +196,8 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) struct dpaa2_eth_devlink_priv *dl_priv; int err; - priv->devlink = devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv)); + priv->devlink = + devlink_alloc(&dpaa2_eth_devlink_ops, sizeof(*dl_priv), dev); if (!priv->devlink) { dev_err(dev, "devlink_alloc failed\n"); return -ENOMEM; @@ -204,7 +205,7 @@ int dpaa2_eth_dl_register(struct dpaa2_eth_priv *priv) dl_priv = devlink_priv(priv->devlink); dl_priv->dpaa2_priv = priv; - err = devlink_register(priv->devlink, dev); + err = devlink_register(priv->devlink); if (err) { dev_err(dev, "devlink_register() = %d\n", err); goto devlink_free; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index f664021c3ad1..7065c71ed7b8 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4138,7 +4138,7 @@ static int dpaa2_eth_connect_mac(struct dpaa2_eth_priv *priv) int err; dpni_dev = to_fsl_mc_device(priv->net_dev->dev.parent); - dpmac_dev = fsl_mc_get_endpoint(dpni_dev); + dpmac_dev = fsl_mc_get_endpoint(dpni_dev, 0); if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) return PTR_ERR(dpmac_dev); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c index 70e04321c420..720c9230cab5 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-ethtool.c @@ -15,18 +15,18 @@ static struct { enum dpsw_counter id; char name[ETH_GSTRING_LEN]; } dpaa2_switch_ethtool_counters[] = { - {DPSW_CNT_ING_FRAME, "rx frames"}, - {DPSW_CNT_ING_BYTE, "rx bytes"}, - {DPSW_CNT_ING_FLTR_FRAME, "rx filtered frames"}, - {DPSW_CNT_ING_FRAME_DISCARD, "rx discarded frames"}, - {DPSW_CNT_ING_BCAST_FRAME, "rx b-cast frames"}, - {DPSW_CNT_ING_BCAST_BYTES, "rx b-cast bytes"}, - {DPSW_CNT_ING_MCAST_FRAME, "rx m-cast frames"}, - {DPSW_CNT_ING_MCAST_BYTE, "rx m-cast bytes"}, - {DPSW_CNT_EGR_FRAME, "tx frames"}, - {DPSW_CNT_EGR_BYTE, "tx bytes"}, - {DPSW_CNT_EGR_FRAME_DISCARD, "tx discarded frames"}, - {DPSW_CNT_ING_NO_BUFF_DISCARD, "rx discarded no buffer frames"}, + {DPSW_CNT_ING_FRAME, "[hw] rx frames"}, + {DPSW_CNT_ING_BYTE, "[hw] rx bytes"}, + {DPSW_CNT_ING_FLTR_FRAME, "[hw] rx filtered frames"}, + {DPSW_CNT_ING_FRAME_DISCARD, "[hw] rx discarded frames"}, + {DPSW_CNT_ING_BCAST_FRAME, "[hw] rx bcast frames"}, + {DPSW_CNT_ING_BCAST_BYTES, "[hw] rx bcast bytes"}, + {DPSW_CNT_ING_MCAST_FRAME, "[hw] rx mcast frames"}, + {DPSW_CNT_ING_MCAST_BYTE, "[hw] rx mcast bytes"}, + {DPSW_CNT_EGR_FRAME, "[hw] tx frames"}, + {DPSW_CNT_EGR_BYTE, "[hw] tx bytes"}, + {DPSW_CNT_EGR_FRAME_DISCARD, "[hw] tx discarded frames"}, + {DPSW_CNT_ING_NO_BUFF_DISCARD, "[hw] rx nobuffer discards"}, }; #define DPAA2_SWITCH_NUM_COUNTERS ARRAY_SIZE(dpaa2_switch_ethtool_counters) @@ -62,6 +62,10 @@ dpaa2_switch_get_link_ksettings(struct net_device *netdev, struct dpsw_link_state state = {0}; int err = 0; + if (dpaa2_switch_port_is_type_phy(port_priv)) + return phylink_ethtool_ksettings_get(port_priv->mac->phylink, + link_ksettings); + err = dpsw_if_get_link_state(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx, @@ -95,6 +99,10 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, bool if_running; int err = 0, ret; + if (dpaa2_switch_port_is_type_phy(port_priv)) + return phylink_ethtool_ksettings_set(port_priv->mac->phylink, + link_ksettings); + /* Interface needs to be down to change link settings */ if_running = netif_running(netdev); if (if_running) { @@ -134,11 +142,17 @@ dpaa2_switch_set_link_ksettings(struct net_device *netdev, return err; } -static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) +static int +dpaa2_switch_ethtool_get_sset_count(struct net_device *netdev, int sset) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + int num_ss_stats = DPAA2_SWITCH_NUM_COUNTERS; + switch (sset) { case ETH_SS_STATS: - return DPAA2_SWITCH_NUM_COUNTERS; + if (port_priv->mac) + num_ss_stats += dpaa2_mac_get_sset_count(); + return num_ss_stats; default: return -EOPNOTSUPP; } @@ -147,14 +161,19 @@ static int dpaa2_switch_ethtool_get_sset_count(struct net_device *dev, int sset) static void dpaa2_switch_ethtool_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { + struct ethsw_port_priv *port_priv = netdev_priv(netdev); + u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) - memcpy(data + i * ETH_GSTRING_LEN, - dpaa2_switch_ethtool_counters[i].name, + for (i = 0; i < DPAA2_SWITCH_NUM_COUNTERS; i++) { + memcpy(p, dpaa2_switch_ethtool_counters[i].name, ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + if (port_priv->mac) + dpaa2_mac_get_strings(p); break; } } @@ -176,6 +195,9 @@ static void dpaa2_switch_ethtool_get_stats(struct net_device *netdev, netdev_err(netdev, "dpsw_if_get_counter[%s] err %d\n", dpaa2_switch_ethtool_counters[i].name, err); } + + if (port_priv->mac) + dpaa2_mac_get_ethtool_stats(port_priv->mac, data + i); } const struct ethtool_ops dpaa2_switch_port_ethtool_ops = { diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 71129724d9ca..d260993ab2dc 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -594,12 +594,18 @@ static int dpaa2_switch_port_change_mtu(struct net_device *netdev, int mtu) return 0; } -static int dpaa2_switch_port_carrier_state_sync(struct net_device *netdev) +static int dpaa2_switch_port_link_state_update(struct net_device *netdev) { struct ethsw_port_priv *port_priv = netdev_priv(netdev); struct dpsw_link_state state; int err; + /* When we manage the MAC/PHY using phylink there is no need + * to manually update the netif_carrier. + */ + if (dpaa2_switch_port_is_type_phy(port_priv)) + return 0; + /* Interrupts are received even though no one issued an 'ifconfig up' * on the switch interface. Ignore these link state update interrupts */ @@ -677,12 +683,14 @@ static int dpaa2_switch_port_open(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; - /* Explicitly set carrier off, otherwise - * netif_carrier_ok() will return true and cause 'ip link show' - * to report the LOWER_UP flag, even though the link - * notification wasn't even received. - */ - netif_carrier_off(netdev); + if (!dpaa2_switch_port_is_type_phy(port_priv)) { + /* Explicitly set carrier off, otherwise + * netif_carrier_ok() will return true and cause 'ip link show' + * to report the LOWER_UP flag, even though the link + * notification wasn't even received. + */ + netif_carrier_off(netdev); + } err = dpsw_if_enable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, @@ -692,23 +700,12 @@ static int dpaa2_switch_port_open(struct net_device *netdev) return err; } - /* sync carrier state */ - err = dpaa2_switch_port_carrier_state_sync(netdev); - if (err) { - netdev_err(netdev, - "dpaa2_switch_port_carrier_state_sync err %d\n", err); - goto err_carrier_sync; - } - dpaa2_switch_enable_ctrl_if_napi(ethsw); - return 0; + if (dpaa2_switch_port_is_type_phy(port_priv)) + phylink_start(port_priv->mac->phylink); -err_carrier_sync: - dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, - port_priv->ethsw_data->dpsw_handle, - port_priv->idx); - return err; + return 0; } static int dpaa2_switch_port_stop(struct net_device *netdev) @@ -717,6 +714,13 @@ static int dpaa2_switch_port_stop(struct net_device *netdev) struct ethsw_core *ethsw = port_priv->ethsw_data; int err; + if (dpaa2_switch_port_is_type_phy(port_priv)) { + phylink_stop(port_priv->mac->phylink); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } + err = dpsw_if_disable(port_priv->ethsw_data->mc_io, 0, port_priv->ethsw_data->dpsw_handle, port_priv->idx); @@ -1419,41 +1423,103 @@ bool dpaa2_switch_port_dev_check(const struct net_device *netdev) return netdev->netdev_ops == &dpaa2_switch_port_ops; } -static void dpaa2_switch_links_state_update(struct ethsw_core *ethsw) +static int dpaa2_switch_port_connect_mac(struct ethsw_port_priv *port_priv) { - int i; + struct fsl_mc_device *dpsw_port_dev, *dpmac_dev; + struct dpaa2_mac *mac; + int err; - for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { - dpaa2_switch_port_carrier_state_sync(ethsw->ports[i]->netdev); - dpaa2_switch_port_set_mac_addr(ethsw->ports[i]); + dpsw_port_dev = to_fsl_mc_device(port_priv->netdev->dev.parent); + dpmac_dev = fsl_mc_get_endpoint(dpsw_port_dev, port_priv->idx); + + if (PTR_ERR(dpmac_dev) == -EPROBE_DEFER) + return PTR_ERR(dpmac_dev); + + if (IS_ERR(dpmac_dev) || dpmac_dev->dev.type != &fsl_mc_bus_dpmac_type) + return 0; + + mac = kzalloc(sizeof(*mac), GFP_KERNEL); + if (!mac) + return -ENOMEM; + + mac->mc_dev = dpmac_dev; + mac->mc_io = port_priv->ethsw_data->mc_io; + mac->net_dev = port_priv->netdev; + + err = dpaa2_mac_open(mac); + if (err) + goto err_free_mac; + port_priv->mac = mac; + + if (dpaa2_switch_port_is_type_phy(port_priv)) { + err = dpaa2_mac_connect(mac); + if (err) { + netdev_err(port_priv->netdev, + "Error connecting to the MAC endpoint %pe\n", + ERR_PTR(err)); + goto err_close_mac; + } } + + return 0; + +err_close_mac: + dpaa2_mac_close(mac); + port_priv->mac = NULL; +err_free_mac: + kfree(mac); + return err; +} + +static void dpaa2_switch_port_disconnect_mac(struct ethsw_port_priv *port_priv) +{ + if (dpaa2_switch_port_is_type_phy(port_priv)) + dpaa2_mac_disconnect(port_priv->mac); + + if (!dpaa2_switch_port_has_mac(port_priv)) + return; + + dpaa2_mac_close(port_priv->mac); + kfree(port_priv->mac); + port_priv->mac = NULL; } static irqreturn_t dpaa2_switch_irq0_handler_thread(int irq_num, void *arg) { struct device *dev = (struct device *)arg; struct ethsw_core *ethsw = dev_get_drvdata(dev); - - /* Mask the events and the if_id reserved bits to be cleared on read */ - u32 status = DPSW_IRQ_EVENT_LINK_CHANGED | 0xFFFF0000; - int err; + struct ethsw_port_priv *port_priv; + u32 status = ~0; + int err, if_id; err = dpsw_get_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, DPSW_IRQ_INDEX_IF, &status); if (err) { dev_err(dev, "Can't get irq status (err %d)\n", err); - - err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, - DPSW_IRQ_INDEX_IF, 0xFFFFFFFF); - if (err) - dev_err(dev, "Can't clear irq status (err %d)\n", err); goto out; } - if (status & DPSW_IRQ_EVENT_LINK_CHANGED) - dpaa2_switch_links_state_update(ethsw); + if_id = (status & 0xFFFF0000) >> 16; + port_priv = ethsw->ports[if_id]; + + if (status & DPSW_IRQ_EVENT_LINK_CHANGED) { + dpaa2_switch_port_link_state_update(port_priv->netdev); + dpaa2_switch_port_set_mac_addr(port_priv); + } + + if (status & DPSW_IRQ_EVENT_ENDPOINT_CHANGED) { + if (dpaa2_switch_port_has_mac(port_priv)) + dpaa2_switch_port_disconnect_mac(port_priv); + else + dpaa2_switch_port_connect_mac(port_priv); + } out: + err = dpsw_clear_irq_status(ethsw->mc_io, 0, ethsw->dpsw_handle, + DPSW_IRQ_INDEX_IF, status); + if (err) + dev_err(dev, "Can't clear irq status (err %d)\n", err); + return IRQ_HANDLED; } @@ -3133,6 +3199,7 @@ static int dpaa2_switch_remove(struct fsl_mc_device *sw_dev) for (i = 0; i < ethsw->sw_attr.num_ifs; i++) { port_priv = ethsw->ports[i]; unregister_netdev(port_priv->netdev); + dpaa2_switch_port_disconnect_mac(port_priv); free_netdev(port_priv->netdev); } @@ -3212,6 +3279,10 @@ static int dpaa2_switch_probe_port(struct ethsw_core *ethsw, goto err_port_probe; port_priv->learn_ena = false; + err = dpaa2_switch_port_connect_mac(port_priv); + if (err) + goto err_port_probe; + return 0; err_port_probe: @@ -3288,12 +3359,6 @@ static int dpaa2_switch_probe(struct fsl_mc_device *sw_dev) ðsw->fq[i].napi, dpaa2_switch_poll, NAPI_POLL_WEIGHT); - err = dpsw_enable(ethsw->mc_io, 0, ethsw->dpsw_handle); - if (err) { - dev_err(ethsw->dev, "dpsw_enable err %d\n", err); - goto err_free_netdev; - } - /* Setup IRQs */ err = dpaa2_switch_setup_irqs(sw_dev); if (err) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h index f69d940f3c5b..0002dca4d417 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.h @@ -21,6 +21,7 @@ #include <net/pkt_cls.h> #include <soc/fsl/dpaa2-io.h> +#include "dpaa2-mac.h" #include "dpsw.h" /* Number of IRQs supported */ @@ -159,6 +160,7 @@ struct ethsw_port_priv { bool learn_ena; struct dpaa2_switch_filter_block *filter_block; + struct dpaa2_mac *mac; }; /* Switch data */ @@ -225,6 +227,22 @@ static inline bool dpaa2_switch_supports_cpu_traffic(struct ethsw_core *ethsw) return true; } +static inline bool +dpaa2_switch_port_is_type_phy(struct ethsw_port_priv *port_priv) +{ + if (port_priv->mac && + (port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_PHY || + port_priv->mac->attr.link_type == DPMAC_LINK_TYPE_BACKPLANE)) + return true; + + return false; +} + +static inline bool dpaa2_switch_port_has_mac(struct ethsw_port_priv *port_priv) +{ + return port_priv->mac ? true : false; +} + bool dpaa2_switch_port_dev_check(const struct net_device *netdev); int dpaa2_switch_port_vlans_add(struct net_device *netdev, diff --git a/drivers/net/ethernet/freescale/dpaa2/dpsw.h b/drivers/net/ethernet/freescale/dpaa2/dpsw.h index 892df905b876..b90bd363f47a 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpsw.h +++ b/drivers/net/ethernet/freescale/dpaa2/dpsw.h @@ -99,6 +99,11 @@ int dpsw_reset(struct fsl_mc_io *mc_io, u32 cmd_flags, u16 token); #define DPSW_IRQ_EVENT_LINK_CHANGED 0x0001 /** + * DPSW_IRQ_EVENT_ENDPOINT_CHANGED - Indicates a change in endpoint + */ +#define DPSW_IRQ_EVENT_ENDPOINT_CHANGED 0x0002 + +/** * struct dpsw_irq_cfg - IRQ configuration * @addr: Address that must be written to signal a message-based interrupt * @val: Value to write into irq_addr address diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index ae3259164395..d2e9a6c02d1e 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -189,6 +189,8 @@ #define FEC_RXIC0 0xfff #define FEC_RXIC1 0xfff #define FEC_RXIC2 0xfff +#define FEC_LPI_SLEEP 0xfff +#define FEC_LPI_WAKE 0xfff #endif /* CONFIG_M5272 */ diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 40ea318d7396..fdff37b87de7 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -2042,6 +2042,34 @@ failed_clk_ptp: return ret; } +static int fec_enet_parse_rgmii_delay(struct fec_enet_private *fep, + struct device_node *np) +{ + u32 rgmii_tx_delay, rgmii_rx_delay; + + /* For rgmii tx internal delay, valid values are 0ps and 2000ps */ + if (!of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_tx_delay)) { + if (rgmii_tx_delay != 0 && rgmii_tx_delay != 2000) { + dev_err(&fep->pdev->dev, "The only allowed RGMII TX delay values are: 0ps, 2000ps"); + return -EINVAL; + } else if (rgmii_tx_delay == 2000) { + fep->rgmii_txc_dly = true; + } + } + + /* For rgmii rx internal delay, valid values are 0ps and 2000ps */ + if (!of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_rx_delay)) { + if (rgmii_rx_delay != 0 && rgmii_rx_delay != 2000) { + dev_err(&fep->pdev->dev, "The only allowed RGMII RX delay values are: 0ps, 2000ps"); + return -EINVAL; + } else if (rgmii_rx_delay == 2000) { + fep->rgmii_rxc_dly = true; + } + } + + return 0; +} + static int fec_enet_mii_probe(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); @@ -3719,7 +3747,6 @@ fec_probe(struct platform_device *pdev) char irq_name[8]; int irq_cnt; struct fec_devinfo *dev_info; - u32 rgmii_delay; fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs); @@ -3777,12 +3804,6 @@ fec_probe(struct platform_device *pdev) if (ret) goto failed_stop_mode; - /* For rgmii internal delay, valid values are 0ps and 2000ps */ - if (of_property_read_u32(np, "tx-internal-delay-ps", &rgmii_delay)) - fep->rgmii_txc_dly = true; - if (of_property_read_u32(np, "rx-internal-delay-ps", &rgmii_delay)) - fep->rgmii_rxc_dly = true; - phy_node = of_parse_phandle(np, "phy-handle", 0); if (!phy_node && of_phy_is_fixed_link(np)) { ret = of_phy_register_fixed_link(np); @@ -3806,6 +3827,10 @@ fec_probe(struct platform_device *pdev) fep->phy_interface = interface; } + ret = fec_enet_parse_rgmii_delay(fep, np); + if (ret) + goto failed_rgmii_delay; + fep->clk_ipg = devm_clk_get(&pdev->dev, "ipg"); if (IS_ERR(fep->clk_ipg)) { ret = PTR_ERR(fep->clk_ipg); @@ -3835,9 +3860,11 @@ fec_probe(struct platform_device *pdev) fep->clk_ref_rate = clk_get_rate(fep->clk_ref); /* clk_2x_txclk is optional, depends on board */ - fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk"); - if (IS_ERR(fep->clk_2x_txclk)) - fep->clk_2x_txclk = NULL; + if (fep->rgmii_txc_dly || fep->rgmii_rxc_dly) { + fep->clk_2x_txclk = devm_clk_get(&pdev->dev, "enet_2x_txclk"); + if (IS_ERR(fep->clk_2x_txclk)) + fep->clk_2x_txclk = NULL; + } fep->bufdesc_ex = fep->quirks & FEC_QUIRK_HAS_BUFDESC_EX; fep->clk_ptp = devm_clk_get(&pdev->dev, "ptp"); @@ -3955,6 +3982,7 @@ failed_clk_ahb: failed_clk_ipg: fec_enet_clk_enable(ndev, false); failed_clk: +failed_rgmii_delay: if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(phy_node); @@ -3989,13 +4017,13 @@ fec_drv_remove(struct platform_device *pdev) if (of_phy_is_fixed_link(np)) of_phy_deregister_fixed_link(np); of_node_put(fep->phy_node); - free_netdev(ndev); clk_disable_unprepare(fep->clk_ahb); clk_disable_unprepare(fep->clk_ipg); pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + free_netdev(ndev); return 0; } diff --git a/drivers/net/ethernet/hisilicon/Kconfig b/drivers/net/ethernet/hisilicon/Kconfig index 094e4a37a295..2ba0e7bd3466 100644 --- a/drivers/net/ethernet/hisilicon/Kconfig +++ b/drivers/net/ethernet/hisilicon/Kconfig @@ -91,6 +91,7 @@ config HNS3 tristate "Hisilicon Network Subsystem Support HNS3 (Framework)" depends on PCI select NET_DEVLINK + select PAGE_POOL help This selects the framework support for Hisilicon Network Subsystem 3. This layer facilitates clients like ENET, RoCE and user-space ethernet diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index cb8d5da3654f..fcbeb1fbe5b8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3205,6 +3205,21 @@ static int hns3_alloc_buffer(struct hns3_enet_ring *ring, unsigned int order = hns3_page_order(ring); struct page *p; + if (ring->page_pool) { + p = page_pool_dev_alloc_frag(ring->page_pool, + &cb->page_offset, + hns3_buf_size(ring)); + if (unlikely(!p)) + return -ENOMEM; + + cb->priv = p; + cb->buf = page_address(p); + cb->dma = page_pool_get_dma_addr(p); + cb->type = DESC_TYPE_PP_FRAG; + cb->reuse_flag = 0; + return 0; + } + p = dev_alloc_pages(order); if (!p) return -ENOMEM; @@ -3227,8 +3242,13 @@ static void hns3_free_buffer(struct hns3_enet_ring *ring, if (cb->type & (DESC_TYPE_SKB | DESC_TYPE_BOUNCE_HEAD | DESC_TYPE_BOUNCE_ALL | DESC_TYPE_SGL_SKB)) napi_consume_skb(cb->priv, budget); - else if (!HNAE3_IS_TX_RING(ring) && cb->pagecnt_bias) - __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); + else if (!HNAE3_IS_TX_RING(ring)) { + if (cb->type & DESC_TYPE_PAGE && cb->pagecnt_bias) + __page_frag_cache_drain(cb->priv, cb->pagecnt_bias); + else if (cb->type & DESC_TYPE_PP_FRAG) + page_pool_put_full_page(ring->page_pool, cb->priv, + false); + } memset(cb, 0, sizeof(*cb)); } @@ -3315,7 +3335,7 @@ static int hns3_alloc_and_map_buffer(struct hns3_enet_ring *ring, int ret; ret = hns3_alloc_buffer(ring, cb); - if (ret) + if (ret || ring->page_pool) goto out; ret = hns3_map_buffer(ring, cb); @@ -3337,7 +3357,8 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i) if (ret) return ret; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); return 0; } @@ -3367,7 +3388,8 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, { hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; - ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; } @@ -3539,6 +3561,12 @@ static void hns3_nic_reuse_page(struct sk_buff *skb, int i, u32 frag_size = size - pull_len; bool reused; + if (ring->page_pool) { + skb_add_rx_frag(skb, i, desc_cb->priv, frag_offset, + frag_size, truesize); + return; + } + /* Avoid re-using remote or pfmem page */ if (unlikely(!dev_page_is_reusable(desc_cb->priv))) goto out; @@ -3856,6 +3884,9 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, /* We can reuse buffer as-is, just make sure it is reusable */ if (dev_page_is_reusable(desc_cb->priv)) desc_cb->reuse_flag = 1; + else if (desc_cb->type & DESC_TYPE_PP_FRAG) + page_pool_put_full_page(ring->page_pool, desc_cb->priv, + false); else /* This page cannot be reused so discard it */ __page_frag_cache_drain(desc_cb->priv, desc_cb->pagecnt_bias); @@ -3863,6 +3894,10 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length, hns3_rx_ring_move_fw(ring); return 0; } + + if (ring->page_pool) + skb_mark_for_recycle(skb); + u64_stats_update_begin(&ring->syncp); ring->stats.seg_pkt_cnt++; u64_stats_update_end(&ring->syncp); @@ -3901,6 +3936,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring) "alloc rx fraglist skb fail\n"); return -ENXIO; } + + if (ring->page_pool) + skb_mark_for_recycle(new_skb); + ring->frag_num = 0; if (ring->tail_skb) { @@ -4705,6 +4744,29 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv) priv->ring = NULL; } +static void hns3_alloc_page_pool(struct hns3_enet_ring *ring) +{ + struct page_pool_params pp_params = { + .flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG | + PP_FLAG_DMA_SYNC_DEV, + .order = hns3_page_order(ring), + .pool_size = ring->desc_num * hns3_buf_size(ring) / + (PAGE_SIZE << hns3_page_order(ring)), + .nid = dev_to_node(ring_to_dev(ring)), + .dev = ring_to_dev(ring), + .dma_dir = DMA_FROM_DEVICE, + .offset = 0, + .max_len = PAGE_SIZE << hns3_page_order(ring), + }; + + ring->page_pool = page_pool_create(&pp_params); + if (IS_ERR(ring->page_pool)) { + dev_warn(ring_to_dev(ring), "page pool creation failed: %ld\n", + PTR_ERR(ring->page_pool)); + ring->page_pool = NULL; + } +} + static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) { int ret; @@ -4724,6 +4786,8 @@ static int hns3_alloc_ring_memory(struct hns3_enet_ring *ring) goto out_with_desc_cb; if (!HNAE3_IS_TX_RING(ring)) { + hns3_alloc_page_pool(ring); + ret = hns3_alloc_ring_buffers(ring); if (ret) goto out_with_desc; @@ -4764,6 +4828,11 @@ void hns3_fini_ring(struct hns3_enet_ring *ring) devm_kfree(ring_to_dev(ring), tx_spare); ring->tx_spare = NULL; } + + if (!HNAE3_IS_TX_RING(ring) && ring->page_pool) { + page_pool_destroy(ring->page_pool); + ring->page_pool = NULL; + } } static int hns3_buf_size2type(u32 buf_size) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 15af3d93857b..27809d68d6ed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -6,6 +6,7 @@ #include <linux/dim.h> #include <linux/if_vlan.h> +#include <net/page_pool.h> #include "hnae3.h" @@ -307,6 +308,7 @@ enum hns3_desc_type { DESC_TYPE_BOUNCE_ALL = 1 << 3, DESC_TYPE_BOUNCE_HEAD = 1 << 4, DESC_TYPE_SGL_SKB = 1 << 5, + DESC_TYPE_PP_FRAG = 1 << 6, }; struct hns3_desc_cb { @@ -451,6 +453,7 @@ struct hns3_enet_ring { struct hnae3_queue *tqp; int queue_index; struct device *dev; /* will be used for DMA mapping of descriptors */ + struct page_pool *page_pool; /* statistic */ struct ring_stats stats; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c index 06d29945d4e1..448f29aa4e6b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_devlink.c @@ -112,14 +112,14 @@ int hclge_devlink_init(struct hclge_dev *hdev) int ret; devlink = devlink_alloc(&hclge_devlink_ops, - sizeof(struct hclge_devlink_priv)); + sizeof(struct hclge_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c index 3b1f84502e36..befa9bcc2f2f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.c @@ -5,9 +5,27 @@ #include "hclge_main.h" #include "hnae3.h" +static int hclge_ptp_get_cycle(struct hclge_dev *hdev) +{ + struct hclge_ptp *ptp = hdev->ptp; + + ptp->cycle.quo = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG) & + HCLGE_PTP_CYCLE_QUO_MASK; + ptp->cycle.numer = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); + ptp->cycle.den = readl(hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + + if (ptp->cycle.den == 0) { + dev_err(&hdev->pdev->dev, "invalid ptp cycle denominator!\n"); + return -EINVAL; + } + + return 0; +} + static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) { struct hclge_dev *hdev = hclge_ptp_get_hdev(ptp); + struct hclge_ptp_cycle *cycle = &hdev->ptp->cycle; u64 adj_val, adj_base, diff; unsigned long flags; bool is_neg = false; @@ -18,7 +36,7 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) is_neg = true; } - adj_base = HCLGE_PTP_CYCLE_ADJ_BASE * HCLGE_PTP_CYCLE_ADJ_UNIT; + adj_base = (u64)cycle->quo * (u64)cycle->den + (u64)cycle->numer; adj_val = adj_base * ppb; diff = div_u64(adj_val, 1000000000ULL); @@ -29,16 +47,16 @@ static int hclge_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) /* This clock cycle is defined by three part: quotient, numerator * and denominator. For example, 2.5ns, the quotient is 2, - * denominator is fixed to HCLGE_PTP_CYCLE_ADJ_UNIT, and numerator - * is 0.5 * HCLGE_PTP_CYCLE_ADJ_UNIT. + * denominator is fixed to ptp->cycle.den, and numerator + * is 0.5 * ptp->cycle.den. */ - quo = div_u64_rem(adj_val, HCLGE_PTP_CYCLE_ADJ_UNIT, &numerator); + quo = div_u64_rem(adj_val, cycle->den, &numerator); spin_lock_irqsave(&hdev->ptp->lock, flags); - writel(quo, hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); + writel(quo & HCLGE_PTP_CYCLE_QUO_MASK, + hdev->ptp->io_base + HCLGE_PTP_CYCLE_QUO_REG); writel(numerator, hdev->ptp->io_base + HCLGE_PTP_CYCLE_NUM_REG); - writel(HCLGE_PTP_CYCLE_ADJ_UNIT, - hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); + writel(cycle->den, hdev->ptp->io_base + HCLGE_PTP_CYCLE_DEN_REG); writel(HCLGE_PTP_CYCLE_ADJ_EN, hdev->ptp->io_base + HCLGE_PTP_CYCLE_CFG_REG); spin_unlock_irqrestore(&hdev->ptp->lock, flags); @@ -475,6 +493,10 @@ int hclge_ptp_init(struct hclge_dev *hdev) ret = hclge_ptp_create_clock(hdev); if (ret) return ret; + + ret = hclge_ptp_get_cycle(hdev); + if (ret) + return ret; } ret = hclge_ptp_int_en(hdev, true); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h index 5a202b775471..dbf5f4c08019 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_ptp.h @@ -29,6 +29,7 @@ #define HCLGE_PTP_TIME_ADJ_REG 0x60 #define HCLGE_PTP_TIME_ADJ_EN BIT(0) #define HCLGE_PTP_CYCLE_QUO_REG 0x64 +#define HCLGE_PTP_CYCLE_QUO_MASK GENMASK(7, 0) #define HCLGE_PTP_CYCLE_DEN_REG 0x68 #define HCLGE_PTP_CYCLE_NUM_REG 0x6C #define HCLGE_PTP_CYCLE_CFG_REG 0x70 @@ -37,9 +38,7 @@ #define HCLGE_PTP_CUR_TIME_SEC_L_REG 0x78 #define HCLGE_PTP_CUR_TIME_NSEC_REG 0x7C -#define HCLGE_PTP_CYCLE_ADJ_BASE 2 #define HCLGE_PTP_CYCLE_ADJ_MAX 500000000 -#define HCLGE_PTP_CYCLE_ADJ_UNIT 100000000 #define HCLGE_PTP_SEC_H_OFFSET 32u #define HCLGE_PTP_SEC_L_MASK GENMASK(31, 0) @@ -47,6 +46,12 @@ #define HCLGE_PTP_FLAG_TX_EN 1 #define HCLGE_PTP_FLAG_RX_EN 2 +struct hclge_ptp_cycle { + u32 quo; + u32 numer; + u32 den; +}; + struct hclge_ptp { struct hclge_dev *hdev; struct ptp_clock *clock; @@ -58,6 +63,7 @@ struct hclge_ptp { spinlock_t lock; /* protects ptp registers */ u32 ptp_cfg; u32 last_tx_seqid; + struct hclge_ptp_cycle cycle; unsigned long tx_start; unsigned long tx_cnt; unsigned long tx_skipped; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c index 21a45279fd99..1e6061fb8ed4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_devlink.c @@ -112,15 +112,16 @@ int hclgevf_devlink_init(struct hclgevf_dev *hdev) struct devlink *devlink; int ret; - devlink = devlink_alloc(&hclgevf_devlink_ops, - sizeof(struct hclgevf_devlink_priv)); + devlink = + devlink_alloc(&hclgevf_devlink_ops, + sizeof(struct hclgevf_devlink_priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); priv->hdev = hdev; - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) { dev_err(&pdev->dev, "failed to register devlink, ret = %d\n", ret); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 58d5646444b0..6e11ee339f12 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -293,9 +293,9 @@ static const struct devlink_ops hinic_devlink_ops = { .flash_update = hinic_devlink_flash_update, }; -struct devlink *hinic_devlink_alloc(void) +struct devlink *hinic_devlink_alloc(struct device *dev) { - return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev)); + return devlink_alloc(&hinic_devlink_ops, sizeof(struct hinic_dev), dev); } void hinic_devlink_free(struct devlink *devlink) @@ -303,11 +303,11 @@ void hinic_devlink_free(struct devlink *devlink) devlink_free(devlink); } -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev) +int hinic_devlink_register(struct hinic_devlink_priv *priv) { struct devlink *devlink = priv_to_devlink(priv); - return devlink_register(devlink, dev); + return devlink_register(devlink); } void hinic_devlink_unregister(struct hinic_devlink_priv *priv) diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h index a090ebcfaabb..9e315011015c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.h @@ -108,9 +108,9 @@ struct host_image_st { u32 device_id; }; -struct devlink *hinic_devlink_alloc(void); +struct devlink *hinic_devlink_alloc(struct device *dev); void hinic_devlink_free(struct devlink *devlink); -int hinic_devlink_register(struct hinic_devlink_priv *priv, struct device *dev); +int hinic_devlink_register(struct hinic_devlink_priv *priv); void hinic_devlink_unregister(struct hinic_devlink_priv *priv); int hinic_health_reporters_create(struct hinic_devlink_priv *priv); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c index 428108eb10d2..56b6b04e209b 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c @@ -754,7 +754,7 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev) return err; } - err = hinic_devlink_register(hwdev->devlink_dev, &pdev->dev); + err = hinic_devlink_register(hwdev->devlink_dev); if (err) { dev_err(&hwif->pdev->dev, "Failed to register devlink\n"); hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt); diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 405ee4d2d2b1..881d0b247561 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -1183,7 +1183,7 @@ static int nic_dev_init(struct pci_dev *pdev) struct devlink *devlink; int err, num_qps; - devlink = hinic_devlink_alloc(); + devlink = hinic_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "Hinic devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index fc8c7cd67471..b8a40146b895 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -1110,9 +1110,6 @@ static void print_eth(unsigned char *add, char *str) add, add + 6, add, add[12], add[13], str); } -static int io = 0x300; -static int irq = 10; - static const struct net_device_ops i596_netdev_ops = { .ndo_open = i596_open, .ndo_stop = i596_close, @@ -1123,7 +1120,7 @@ static const struct net_device_ops i596_netdev_ops = { .ndo_validate_addr = eth_validate_addr, }; -struct net_device * __init i82596_probe(int unit) +static struct net_device * __init i82596_probe(void) { struct net_device *dev; int i; @@ -1140,14 +1137,6 @@ struct net_device * __init i82596_probe(int unit) if (!dev) return ERR_PTR(-ENOMEM); - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } else { - dev->base_addr = io; - dev->irq = irq; - } - #ifdef ENABLE_MVME16x_NET if (MACH_IS_MVME16x) { if (mvme16x_config & MVME16x_CONFIG_NO_ETHERNET) { @@ -1515,22 +1504,22 @@ static void set_multicast_list(struct net_device *dev) } } -#ifdef MODULE static struct net_device *dev_82596; static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "i82596 debug mask"); -int __init init_module(void) +static int __init i82596_init(void) { if (debug >= 0) i596_debug = debug; - dev_82596 = i82596_probe(-1); + dev_82596 = i82596_probe(); return PTR_ERR_OR_ZERO(dev_82596); } +module_init(i82596_init); -void __exit cleanup_module(void) +static void __exit i82596_cleanup(void) { unregister_netdev(dev_82596); #ifdef __mc68000__ @@ -1544,5 +1533,4 @@ void __exit cleanup_module(void) free_page ((u32)(dev_82596->mem_start)); free_netdev(dev_82596); } - -#endif /* MODULE */ +module_exit(i82596_cleanup); diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c index 4564ee02c95f..893e0ddcb611 100644 --- a/drivers/net/ethernet/i825xx/sun3_82586.c +++ b/drivers/net/ethernet/i825xx/sun3_82586.c @@ -29,6 +29,7 @@ static int rfdadd = 0; /* rfdadd=1 may be better for 8K MEM cards */ static int fifo=0x8; /* don't change */ #include <linux/kernel.h> +#include <linux/module.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/ioport.h> @@ -276,7 +277,7 @@ static void alloc586(struct net_device *dev) memset((char *)p->scb,0,sizeof(struct scb_struct)); } -struct net_device * __init sun3_82586_probe(int unit) +static int __init sun3_82586_probe(void) { struct net_device *dev; unsigned long ioaddr; @@ -291,25 +292,20 @@ struct net_device * __init sun3_82586_probe(int unit) break; default: - return ERR_PTR(-ENODEV); + return -ENODEV; } if (found) - return ERR_PTR(-ENODEV); + return -ENODEV; ioaddr = (unsigned long)ioremap(IE_OBIO, SUN3_82586_TOTAL_SIZE); if (!ioaddr) - return ERR_PTR(-ENOMEM); + return -ENOMEM; found = 1; dev = alloc_etherdev(sizeof(struct priv)); if (!dev) goto out; - if (unit >= 0) { - sprintf(dev->name, "eth%d", unit); - netdev_boot_setup_check(dev); - } - dev->irq = IE_IRQ; dev->base_addr = ioaddr; err = sun3_82586_probe1(dev, ioaddr); @@ -326,8 +322,9 @@ out1: free_netdev(dev); out: iounmap((void __iomem *)ioaddr); - return ERR_PTR(err); + return err; } +module_init(sun3_82586_probe); static const struct net_device_ops sun3_82586_netdev_ops = { .ndo_open = sun3_82586_open, diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 3e822bad4851..2c9e4eeb7270 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -980,7 +980,7 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw, default: /* if we got here and link is up something bad is afoot */ netdev_info(netdev, - "WARNING: Link is up but PHY type 0x%x is not recognized.\n", + "WARNING: Link is up but PHY type 0x%x is not recognized, or incorrect cable is in use\n", hw_link_info->phy_type); } @@ -5294,6 +5294,10 @@ flags_complete: dev_warn(&pf->pdev->dev, "Device configuration forbids SW from starting the LLDP agent.\n"); return -EINVAL; + case I40E_AQ_RC_EAGAIN: + dev_warn(&pf->pdev->dev, + "Stop FW LLDP agent command is still being processed, please try again in a second.\n"); + return -EBUSY; default: dev_warn(&pf->pdev->dev, "Starting FW LLDP agent failed: error: %s, %s\n", diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 5b4012a09acb..97c78551395b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4457,11 +4457,10 @@ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, } /** - * i40e_vsi_control_tx - Start or stop a VSI's rings + * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_tx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; @@ -4470,7 +4469,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q, - false /*is xdp*/, enable); + false /*is xdp*/, true); if (ret) break; @@ -4479,7 +4478,7 @@ static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable) ret = i40e_control_wait_tx_q(vsi->seid, pf, pf_q + vsi->alloc_queue_pairs, - true /*is xdp*/, enable); + true /*is xdp*/, true); if (ret) break; } @@ -4577,32 +4576,25 @@ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable) } /** - * i40e_vsi_control_rx - Start or stop a VSI's rings + * i40e_vsi_enable_rx - Start a VSI's rings * @vsi: the VSI being configured - * @enable: start or stop the rings **/ -static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable) +static int i40e_vsi_enable_rx(struct i40e_vsi *vsi) { struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0; pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { - ret = i40e_control_wait_rx_q(pf, pf_q, enable); + ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) { dev_info(&pf->pdev->dev, - "VSI seid %d Rx ring %d %sable timeout\n", - vsi->seid, pf_q, (enable ? "en" : "dis")); + "VSI seid %d Rx ring %d enable timeout\n", + vsi->seid, pf_q); break; } } - /* Due to HW errata, on Rx disable only, the register can indicate done - * before it really is. Needs 50ms to be sure - */ - if (!enable) - mdelay(50); - return ret; } @@ -4615,29 +4607,47 @@ int i40e_vsi_start_rings(struct i40e_vsi *vsi) int ret = 0; /* do rx first for enable and last for disable */ - ret = i40e_vsi_control_rx(vsi, true); + ret = i40e_vsi_enable_rx(vsi); if (ret) return ret; - ret = i40e_vsi_control_tx(vsi, true); + ret = i40e_vsi_enable_tx(vsi); return ret; } +#define I40E_DISABLE_TX_GAP_MSEC 50 + /** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured **/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi) { + struct i40e_pf *pf = vsi->back; + int pf_q, err, q_end; + /* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi); - /* do rx first for enable and last for disable - * Ignore return value, we need to shutdown whatever we can - */ - i40e_vsi_control_tx(vsi, false); - i40e_vsi_control_rx(vsi, false); + q_end = vsi->base_queue + vsi->num_queue_pairs; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + i40e_pre_tx_queue_cfg(&pf->hw, (u32)pf_q, false); + + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) { + err = i40e_control_wait_rx_q(pf, pf_q, false); + if (err) + dev_info(&pf->pdev->dev, + "VSI seid %d Rx ring %d dissable timeout\n", + vsi->seid, pf_q); + } + + msleep(I40E_DISABLE_TX_GAP_MSEC); + pf_q = vsi->base_queue; + for (pf_q = vsi->base_queue; pf_q < q_end; pf_q++) + wr32(&pf->hw, I40E_QTX_ENA(pf_q), 0); + + i40e_vsi_wait_queues_disabled(vsi); } /** @@ -7283,6 +7293,8 @@ static int i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, } if (vsi->num_queue_pairs < (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) { + dev_err(&vsi->back->pdev->dev, + "Failed to create traffic channel, insufficient number of queues.\n"); return -EINVAL; } if (sum_max_rate > i40e_get_link_speed(vsi)) { @@ -13264,6 +13276,7 @@ static const struct net_device_ops i40e_netdev_ops = { .ndo_poll_controller = i40e_netpoll, #endif .ndo_setup_tc = __i40e_setup_tc, + .ndo_select_queue = i40e_lan_select_queue, .ndo_set_features = i40e_set_features, .ndo_set_vf_mac = i40e_ndo_set_vf_mac, .ndo_set_vf_vlan = i40e_ndo_set_vf_port_vlan, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 38eb8151ee9a..3f25bd8c4924 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -3631,6 +3631,56 @@ dma_error: return -1; } +static u16 i40e_swdcb_skb_tx_hash(struct net_device *dev, + const struct sk_buff *skb, + u16 num_tx_queues) +{ + u32 jhash_initval_salt = 0xd631614b; + u32 hash; + + if (skb->sk && skb->sk->sk_hash) + hash = skb->sk->sk_hash; + else + hash = (__force u16)skb->protocol ^ skb->hash; + + hash = jhash_1word(hash, jhash_initval_salt); + + return (u16)(((u64)hash * num_tx_queues) >> 32); +} + +u16 i40e_lan_select_queue(struct net_device *netdev, + struct sk_buff *skb, + struct net_device __always_unused *sb_dev) +{ + struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_vsi *vsi = np->vsi; + struct i40e_hw *hw; + u16 qoffset; + u16 qcount; + u8 tclass; + u16 hash; + u8 prio; + + /* is DCB enabled at all? */ + if (vsi->tc_config.numtc == 1) + return i40e_swdcb_skb_tx_hash(netdev, skb, + netdev->real_num_tx_queues); + + prio = skb->priority; + hw = &vsi->back->hw; + tclass = hw->local_dcbx_config.etscfg.prioritytable[prio]; + /* sanity check */ + if (unlikely(!(vsi->tc_config.enabled_tc & BIT(tclass)))) + tclass = 0; + + /* select a queue assigned for the given TC */ + qcount = vsi->tc_config.tc_info[tclass].qcount; + hash = i40e_swdcb_skb_tx_hash(netdev, skb, qcount); + + qoffset = vsi->tc_config.tc_info[tclass].qoffset; + return qoffset + hash; +} + /** * i40e_xmit_xdp_ring - transmits an XDP buffer to an XDP Tx ring * @xdpf: data to transmit diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index 86fed05b4f19..bfc2845c99d1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -451,6 +451,8 @@ static inline unsigned int i40e_rx_pg_order(struct i40e_ring *ring) bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count); netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev); +u16 i40e_lan_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev); void i40e_clean_tx_ring(struct i40e_ring *tx_ring); void i40e_clean_rx_ring(struct i40e_ring *rx_ring); int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 91b545ab8b8f..8c863d64930b 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -475,7 +475,7 @@ struct ice_pf *ice_allocate_pf(struct device *dev) { struct devlink *devlink; - devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf)); + devlink = devlink_alloc(&ice_devlink_ops, sizeof(struct ice_pf), dev); if (!devlink) return NULL; @@ -502,7 +502,7 @@ int ice_devlink_register(struct ice_pf *pf) struct device *dev = ice_pf_to_dev(pf); int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) { dev_err(dev, "devlink registration failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index ff8db311963c..5d1007e1b5c9 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2327,7 +2327,7 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, if (!skb) return ERR_PTR(-ENOMEM); - skb_mark_for_recycle(skb, virt_to_page(xdp->data), pool); + skb_mark_for_recycle(skb); skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); @@ -2339,10 +2339,6 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(frag), skb_frag_off(frag), skb_frag_size(frag), PAGE_SIZE); - /* We don't need to reset pp_recycle here. It's already set, so - * just mark fragments for recycling. - */ - page_pool_store_mem_info(skb_frag_page(frag), pool); } return skb; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 99bd8b8aa0e2..744f58f41ecc 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -3995,7 +3995,7 @@ static int mvpp2_rx(struct mvpp2_port *port, struct napi_struct *napi, } if (pp) - skb_mark_for_recycle(skb, page, pp); + skb_mark_for_recycle(skb); else dma_unmap_single_attrs(dev->dev.parent, dma_addr, bm_pool->buf_size, DMA_FROM_DEVICE, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 9169849881bf..544c96c8fe1d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1504,8 +1504,8 @@ static int cgx_lmac_init(struct cgx *cgx) /* Add reference */ cgx->lmac_idmap[lmac->lmac_id] = lmac; - cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); set_bit(lmac->lmac_id, &cgx->lmac_bmap); + cgx->mac_ops->mac_pause_frm_config(cgx, lmac->lmac_id, true); } return cgx_lmac_verify_fwi_version(cgx); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/common.h b/drivers/net/ethernet/marvell/octeontx2/af/common.h index 47f5ed006a93..752ba6b4b919 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/common.h @@ -146,10 +146,7 @@ enum nix_scheduler { #define TXSCH_RR_QTM_MAX ((1 << 24) - 1) #define TXSCH_TL1_DFLT_RR_QTM TXSCH_RR_QTM_MAX #define TXSCH_TL1_DFLT_RR_PRIO (0x1ull) -#define MAX_SCHED_WEIGHT 0xFF -#define DFLT_RR_WEIGHT 71 -#define DFLT_RR_QTM ((DFLT_RR_WEIGHT * TXSCH_RR_QTM_MAX) \ - / MAX_SCHED_WEIGHT) +#define CN10K_MAX_DWRR_WEIGHT 16384 /* Weight is 14bit on CN10K */ /* Min/Max packet sizes, excluding FCS */ #define NIC_HW_MIN_FRS 40 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index f5ec39de026a..447093361b7a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -1032,8 +1032,12 @@ struct nix_bp_cfg_rsp { struct nix_hw_info { struct mbox_msghdr hdr; + u16 rsvs16; u16 max_mtu; u16 min_mtu; + u32 rpm_dwrr_mtu; + u32 sdp_dwrr_mtu; + u64 rsvd[16]; /* Add reserved fields for future expansion */ }; struct nix_bandprof_alloc_req { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h index 19bad9a59c8f..243cf8070e77 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h @@ -151,7 +151,10 @@ enum npc_kpu_lh_ltype { * Software assigns pkind for each incoming port such as CGX * Ethernet interfaces, LBK interfaces, etc. */ +#define NPC_UNRESERVED_PKIND_COUNT NPC_RX_VLAN_EXDSA_PKIND + enum npc_pkind_type { + NPC_RX_LBK_PKIND = 0ULL, NPC_RX_VLAN_EXDSA_PKIND = 56ULL, NPC_RX_CHLEN24B_PKIND = 57ULL, NPC_RX_CPT_HDR_PKIND, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 017163fb3cd5..5fe277e354f7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -391,8 +391,10 @@ void rvu_get_pf_numvfs(struct rvu *rvu, int pf, int *numvfs, int *hwvf) /* Get numVFs attached to this PF and first HWVF */ cfg = rvu_read64(rvu, BLKADDR_RVUM, RVU_PRIV_PFX_CFG(pf)); - *numvfs = (cfg >> 12) & 0xFF; - *hwvf = cfg & 0xFFF; + if (numvfs) + *numvfs = (cfg >> 12) & 0xFF; + if (hwvf) + *hwvf = cfg & 0xFFF; } static int rvu_get_hwvf(struct rvu *rvu, int pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 91503fb2762c..95591e77aea8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -329,6 +329,7 @@ struct hw_cap { bool nix_shaping; /* Is shaping and coloring supported */ bool nix_tx_link_bp; /* Can link backpressure TL queues ? */ bool nix_rx_multicast; /* Rx packet replication support */ + bool nix_common_dwrr_mtu; /* Common DWRR MTU for quantum config */ bool per_pf_mbox_regs; /* PF mbox specified in per PF registers ? */ bool programmable_chans; /* Channels programmable ? */ bool ipolicer; @@ -706,6 +707,8 @@ int nix_aq_context_read(struct rvu *rvu, struct nix_hw *nix_hw, struct nix_cn10k_aq_enq_rsp *aq_rsp, u16 pcifunc, u8 ctype, u32 qidx); int rvu_get_nix_blkaddr(struct rvu *rvu, u16 pcifunc); +u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu); +u32 convert_bytes_to_dwrr_mtu(u32 bytes); /* NPC APIs */ int rvu_npc_init(struct rvu *rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 2688186066d9..a55b46ad162d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -1364,6 +1364,89 @@ static void rvu_health_reporters_destroy(struct rvu *rvu) rvu_nix_health_reporters_destroy(rvu_dl); } +/* Devlink Params APIs */ +static int rvu_af_dl_dwrr_mtu_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + int dwrr_mtu = val.vu32; + struct nix_txsch *txsch; + struct nix_hw *nix_hw; + + if (!rvu->hw->cap.nix_common_dwrr_mtu) { + NL_SET_ERR_MSG_MOD(extack, + "Setting DWRR_MTU is not supported on this silicon"); + return -EOPNOTSUPP; + } + + if ((dwrr_mtu > 65536 || !is_power_of_2(dwrr_mtu)) && + (dwrr_mtu != 9728 && dwrr_mtu != 10240)) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid, supported MTUs are 0,2,4,8.16,32,64....4K,8K,32K,64K and 9728, 10240"); + return -EINVAL; + } + + nix_hw = get_nix_hw(rvu->hw, BLKADDR_NIX0); + if (!nix_hw) + return -ENODEV; + + txsch = &nix_hw->txsch[NIX_TXSCH_LVL_SMQ]; + if (rvu_rsrc_free_count(&txsch->schq) != txsch->schq.max) { + NL_SET_ERR_MSG_MOD(extack, + "Changing DWRR MTU is not supported when there are active NIXLFs"); + NL_SET_ERR_MSG_MOD(extack, + "Make sure none of the PF/VF interfaces are initialized and retry"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int rvu_af_dl_dwrr_mtu_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + u64 dwrr_mtu; + + dwrr_mtu = convert_bytes_to_dwrr_mtu(ctx->val.vu32); + rvu_write64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU, dwrr_mtu); + + return 0; +} + +static int rvu_af_dl_dwrr_mtu_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct rvu_devlink *rvu_dl = devlink_priv(devlink); + struct rvu *rvu = rvu_dl->rvu; + u64 dwrr_mtu; + + if (!rvu->hw->cap.nix_common_dwrr_mtu) + return -EOPNOTSUPP; + + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + ctx->val.vu32 = convert_dwrr_mtu_to_bytes(dwrr_mtu); + + return 0; +} + +enum rvu_af_dl_param_id { + RVU_AF_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, +}; + +static const struct devlink_param rvu_af_dl_params[] = { + DEVLINK_PARAM_DRIVER(RVU_AF_DEVLINK_PARAM_ID_DWRR_MTU, + "dwrr_mtu", DEVLINK_PARAM_TYPE_U32, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + rvu_af_dl_dwrr_mtu_get, rvu_af_dl_dwrr_mtu_set, + rvu_af_dl_dwrr_mtu_validate), +}; + +/* Devlink switch mode */ static int rvu_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { struct rvu_devlink *rvu_dl = devlink_priv(devlink); @@ -1420,13 +1503,14 @@ int rvu_register_dl(struct rvu *rvu) struct devlink *dl; int err; - dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink)); + dl = devlink_alloc(&rvu_devlink_ops, sizeof(struct rvu_devlink), + rvu->dev); if (!dl) { dev_warn(rvu->dev, "devlink_alloc failed\n"); return -ENOMEM; } - err = devlink_register(dl, rvu->dev); + err = devlink_register(dl); if (err) { dev_err(rvu->dev, "devlink register failed with error %d\n", err); devlink_free(dl); @@ -1438,7 +1522,30 @@ int rvu_register_dl(struct rvu *rvu) rvu_dl->rvu = rvu; rvu->rvu_dl = rvu_dl; - return rvu_health_reporters_create(rvu); + err = rvu_health_reporters_create(rvu); + if (err) { + dev_err(rvu->dev, + "devlink health reporter creation failed with error %d\n", err); + goto err_dl_health; + } + + err = devlink_params_register(dl, rvu_af_dl_params, + ARRAY_SIZE(rvu_af_dl_params)); + if (err) { + dev_err(rvu->dev, + "devlink params register failed with error %d", err); + goto err_dl_health; + } + + devlink_params_publish(dl); + + return 0; + +err_dl_health: + rvu_health_reporters_destroy(rvu); + devlink_unregister(dl); + devlink_free(dl); + return err; } void rvu_unregister_dl(struct rvu *rvu) @@ -1449,6 +1556,8 @@ void rvu_unregister_dl(struct rvu *rvu) if (!dl) return; + devlink_params_unregister(dl, rvu_af_dl_params, + ARRAY_SIZE(rvu_af_dl_params)); rvu_health_reporters_destroy(rvu); devlink_unregister(dl); devlink_free(dl); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 0933699a0d2d..53db8ebddb5e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -192,15 +192,67 @@ struct nix_hw *get_nix_hw(struct rvu_hwinfo *hw, int blkaddr) return NULL; } +u32 convert_dwrr_mtu_to_bytes(u8 dwrr_mtu) +{ + dwrr_mtu &= 0x1FULL; + + /* MTU used for DWRR calculation is in power of 2 up until 64K bytes. + * Value of 4 is reserved for MTU value of 9728 bytes. + * Value of 5 is reserved for MTU value of 10240 bytes. + */ + switch (dwrr_mtu) { + case 4: + return 9728; + case 5: + return 10240; + default: + return BIT_ULL(dwrr_mtu); + } + + return 0; +} + +u32 convert_bytes_to_dwrr_mtu(u32 bytes) +{ + /* MTU used for DWRR calculation is in power of 2 up until 64K bytes. + * Value of 4 is reserved for MTU value of 9728 bytes. + * Value of 5 is reserved for MTU value of 10240 bytes. + */ + if (bytes > BIT_ULL(16)) + return 0; + + switch (bytes) { + case 9728: + return 4; + case 10240: + return 5; + default: + return ilog2(bytes); + } + + return 0; +} + static void nix_rx_sync(struct rvu *rvu, int blkaddr) { int err; - /*Sync all in flight RX packets to LLC/DRAM */ + /* Sync all in flight RX packets to LLC/DRAM */ + rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); + err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); + if (err) + dev_err(rvu->dev, "SYNC1: NIX RX software sync failed\n"); + + /* SW_SYNC ensures all existing transactions are finished and pkts + * are written to LLC/DRAM, queues should be teared down after + * successful SW_SYNC. Due to a HW errata, in some rare scenarios + * an existing transaction might end after SW_SYNC operation. To + * ensure operation is fully done, do the SW_SYNC twice. + */ rvu_write64(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0)); err = rvu_poll_reg(rvu, blkaddr, NIX_AF_RX_SW_SYNC, BIT_ULL(0), true); if (err) - dev_err(rvu->dev, "NIX RX software sync failed\n"); + dev_err(rvu->dev, "SYNC2: NIX RX software sync failed\n"); } static bool is_valid_txschq(struct rvu *rvu, int blkaddr, @@ -298,6 +350,7 @@ static int nix_interface_init(struct rvu *rvu, u16 pcifunc, int type, int nixlf) rvu_nix_chan_lbk(rvu, lbkid, vf + 1); pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; + rvu_npc_set_pkind(rvu, NPC_RX_LBK_PKIND, pfvf); rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, pfvf->rx_chan_base, pfvf->rx_chan_cnt); @@ -1946,8 +1999,17 @@ static void nix_tl1_default_cfg(struct rvu *rvu, struct nix_hw *nix_hw, return; rvu_write64(rvu, blkaddr, NIX_AF_TL1X_TOPOLOGY(schq), (TXSCH_TL1_DFLT_RR_PRIO << 1)); - rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq), - TXSCH_TL1_DFLT_RR_QTM); + + /* On OcteonTx2 the config was in bytes and newer silcons + * it's changed to weight. + */ + if (!rvu->hw->cap.nix_common_dwrr_mtu) + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq), + TXSCH_TL1_DFLT_RR_QTM); + else + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_SCHEDULE(schq), + CN10K_MAX_DWRR_WEIGHT); + rvu_write64(rvu, blkaddr, NIX_AF_TL1X_CIR(schq), 0x00); pfvf_map[schq] = TXSCH_SET_FLAG(pfvf_map[schq], NIX_TXSCHQ_CFG_DONE); } @@ -2655,6 +2717,15 @@ static int nix_setup_txschq(struct rvu *rvu, struct nix_hw *nix_hw, int blkaddr) for (schq = 0; schq < txsch->schq.max; schq++) txsch->pfvf_map[schq] = TXSCH_MAP(0, NIX_TXSCHQ_FREE); } + + /* Setup a default value of 8192 as DWRR MTU */ + if (rvu->hw->cap.nix_common_dwrr_mtu) { + rvu_write64(rvu, blkaddr, NIX_AF_DWRR_RPM_MTU, + convert_bytes_to_dwrr_mtu(8192)); + rvu_write64(rvu, blkaddr, NIX_AF_DWRR_SDP_MTU, + convert_bytes_to_dwrr_mtu(8192)); + } + return 0; } @@ -2731,6 +2802,7 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, struct nix_hw_info *rsp) { u16 pcifunc = req->hdr.pcifunc; + u64 dwrr_mtu; int blkaddr; blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); @@ -2743,6 +2815,20 @@ int rvu_mbox_handler_nix_get_hw_info(struct rvu *rvu, struct msg_req *req, rvu_get_lmac_link_max_frs(rvu, &rsp->max_mtu); rsp->min_mtu = NIC_HW_MIN_FRS; + + if (!rvu->hw->cap.nix_common_dwrr_mtu) { + /* Return '1' on OTx2 */ + rsp->rpm_dwrr_mtu = 1; + rsp->sdp_dwrr_mtu = 1; + return 0; + } + + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_RPM_MTU); + rsp->rpm_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + + dwrr_mtu = rvu_read64(rvu, BLKADDR_NIX0, NIX_AF_DWRR_SDP_MTU); + rsp->sdp_dwrr_mtu = convert_dwrr_mtu_to_bytes(dwrr_mtu); + return 0; } @@ -3635,6 +3721,28 @@ static int nix_aq_init(struct rvu *rvu, struct rvu_block *block) return 0; } +static void rvu_nix_setup_capabilities(struct rvu *rvu, int blkaddr) +{ + struct rvu_hwinfo *hw = rvu->hw; + u64 hw_const; + + hw_const = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); + + /* On OcteonTx2 DWRR quantum is directly configured into each of + * the transmit scheduler queues. And PF/VF drivers were free to + * config any value upto 2^24. + * On CN10K, HW is modified, the quantum configuration at scheduler + * queues is in terms of weight. And SW needs to setup a base DWRR MTU + * at NIX_AF_DWRR_RPM_MTU / NIX_AF_DWRR_SDP_MTU. HW will do + * 'DWRR MTU * weight' to get the quantum. + * + * Check if HW uses a common MTU for all DWRR quantum configs. + * On OcteonTx2 this register field is '0'. + */ + if (((hw_const >> 56) & 0x10) == 0x10) + hw->cap.nix_common_dwrr_mtu = true; +} + static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) { const struct npc_lt_def_cfg *ltdefs; @@ -3672,6 +3780,9 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw) if (err) return err; + /* Setup capabilities of the NIX block */ + rvu_nix_setup_capabilities(rvu, blkaddr); + /* Initialize admin queue */ err = nix_aq_init(rvu, block); if (err) @@ -3842,7 +3953,6 @@ static void rvu_nix_block_freemem(struct rvu *rvu, int blkaddr, vlan = &nix_hw->txvlan; kfree(vlan->rsrc.bmap); mutex_destroy(&vlan->rsrc_lock); - devm_kfree(rvu->dev, vlan->entry2pfvf_map); mcast = &nix_hw->mcast; qmem_free(rvu->dev, mcast->mce_ctx); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 1097291aaa45..52b255426c22 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -1721,7 +1721,6 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) { struct rvu_hwinfo *hw = rvu->hw; int num_pkinds, num_kpus, idx; - struct npc_pkind *pkind; /* Disable all KPUs and their entries */ for (idx = 0; idx < hw->npc_kpus; idx++) { @@ -1739,9 +1738,8 @@ static void npc_parser_profile_init(struct rvu *rvu, int blkaddr) * Check HW max count to avoid configuring junk or * writing to unsupported CSR addresses. */ - pkind = &hw->pkind; num_pkinds = rvu->kpu.pkinds; - num_pkinds = min_t(int, pkind->rsrc.max, num_pkinds); + num_pkinds = min_t(int, hw->npc_pkinds, num_pkinds); for (idx = 0; idx < num_pkinds; idx++) npc_config_kpuaction(rvu, blkaddr, &rvu->kpu.ikpu[idx], 0, idx, true); @@ -1891,7 +1889,8 @@ static void rvu_npc_hw_init(struct rvu *rvu, int blkaddr) if (npc_const1 & BIT_ULL(63)) npc_const2 = rvu_read64(rvu, blkaddr, NPC_AF_CONST2); - pkind->rsrc.max = (npc_const1 >> 12) & 0xFFULL; + pkind->rsrc.max = NPC_UNRESERVED_PKIND_COUNT; + hw->npc_pkinds = (npc_const1 >> 12) & 0xFFULL; hw->npc_kpu_entries = npc_const1 & 0xFFFULL; hw->npc_kpus = (npc_const >> 8) & 0x1FULL; hw->npc_intfs = npc_const & 0xFULL; @@ -2002,6 +2001,10 @@ int rvu_npc_init(struct rvu *rvu) err = rvu_alloc_bitmap(&pkind->rsrc); if (err) return err; + /* Reserve PKIND#0 for LBKs. Power reset value of LBK_CH_PKIND is '0', + * no need to configure PKIND for all LBKs separately. + */ + rvu_alloc_rsrc(&pkind->rsrc); /* Allocate mem for pkind to PF and channel mapping info */ pkind->pfchan_map = devm_kcalloc(rvu->dev, pkind->rsrc.max, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 8b01ef6e2c99..6efcf3afff40 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -269,6 +269,8 @@ #define NIX_AF_DEBUG_NPC_RESP_DATAX(a) (0x680 | (a) << 3) #define NIX_AF_SMQX_CFG(a) (0x700 | (a) << 16) #define NIX_AF_SQM_DBG_CTL_STATUS (0x750) +#define NIX_AF_DWRR_SDP_MTU (0x790) +#define NIX_AF_DWRR_RPM_MTU (0x7A0) #define NIX_AF_PSE_CHANNEL_LEVEL (0x800) #define NIX_AF_PSE_SHAPER_CFG (0x810) #define NIX_AF_TX_EXPR_CREDIT (0x830) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c index 2e5379710aa5..820adf390b8e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_switch.c @@ -71,8 +71,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) struct rvu_switch *rswitch = &rvu->rswitch; u16 start = rswitch->start_entry; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; u16 pcifunc, entry = 0; + int pf, vf, numvfs; int err; for (pf = 1; pf < hw->total_pfs; pf++) { @@ -110,8 +110,8 @@ static int rvu_switch_install_rules(struct rvu *rvu) rswitch->entry2pcifunc[entry++] = pcifunc; - rvu_get_pf_numvfs(rvu, pf, &numvfs, &hwvf); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); rvu_get_nix_blkaddr(rvu, pcifunc); @@ -198,7 +198,7 @@ void rvu_switch_disable(struct rvu *rvu) struct npc_mcam_free_entry_req free_req = { 0 }; struct rvu_switch *rswitch = &rvu->rswitch; struct rvu_hwinfo *hw = rvu->hw; - int pf, vf, numvfs, hwvf; + int pf, vf, numvfs; struct msg_rsp rsp; u16 pcifunc; int err; @@ -217,7 +217,8 @@ void rvu_switch_disable(struct rvu *rvu) "Reverting RX rule for PF%d failed(%d)\n", pf, err); - for (vf = 0; vf < numvfs; vf++, hwvf++) { + rvu_get_pf_numvfs(rvu, pf, &numvfs, NULL); + for (vf = 0; vf < numvfs; vf++) { pcifunc = pf << 10 | ((vf + 1) & 0x3FF); err = rvu_switch_install_rx_rule(rvu, pcifunc, 0xFFF); if (err) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index 184de9466286..ccffddad1233 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -92,8 +92,7 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) aq->sq.ena = 1; /* Only one SMQ is allocated, map all SQ's to that SMQ */ aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/ - aq->sq.smq_rr_weight = pfvf->netdev->mtu; + aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); aq->sq.default_chan = pfvf->hw.tx_chan_base; aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ aq->sq.sqb_aura = sqb_aura; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h index 1a1ae334477d..e07723d71a26 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h @@ -9,6 +9,20 @@ #include "otx2_common.h" +static inline int mtu_to_dwrr_weight(struct otx2_nic *pfvf, int mtu) +{ + u32 weight; + + /* On OTx2, since AF returns DWRR_MTU as '1', this logic + * will work on those silicons as well. + */ + weight = mtu / pfvf->hw.dwrr_mtu; + if (mtu % pfvf->hw.dwrr_mtu) + weight += 1; + + return weight; +} + void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index 7cccd802c4ed..ce799b7a8449 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -596,6 +596,9 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) struct otx2_hw *hw = &pfvf->hw; struct nix_txschq_config *req; u64 schq, parent; + u64 dwrr_val; + + dwrr_val = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox); if (!req) @@ -621,21 +624,21 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) req->num_regs++; /* Set DWRR quantum */ req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq); - req->regval[2] = DFLT_RR_QTM; + req->regval[2] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL4) { parent = hw->txschq_list[NIX_TXSCH_LVL_TL3][0]; req->reg[0] = NIX_AF_TL4X_PARENT(schq); req->regval[0] = parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq); - req->regval[1] = DFLT_RR_QTM; + req->regval[1] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL3) { parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0]; req->reg[0] = NIX_AF_TL3X_PARENT(schq); req->regval[0] = parent << 16; req->num_regs++; req->reg[1] = NIX_AF_TL3X_SCHEDULE(schq); - req->regval[1] = DFLT_RR_QTM; + req->regval[1] = dwrr_val; } else if (lvl == NIX_TXSCH_LVL_TL2) { parent = hw->txschq_list[NIX_TXSCH_LVL_TL1][0]; req->reg[0] = NIX_AF_TL2X_PARENT(schq); @@ -643,7 +646,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) req->num_regs++; req->reg[1] = NIX_AF_TL2X_SCHEDULE(schq); - req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | DFLT_RR_QTM; + req->regval[1] = TXSCH_TL1_DFLT_RR_PRIO << 24 | dwrr_val; req->num_regs++; req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, @@ -656,7 +659,10 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl) * For VF this is always ignored. */ - /* Set DWRR quantum */ + /* On CN10K, if RR_WEIGHT is greater than 16384, HW will + * clip it to 16384, so configuring a 24bit max value + * will work on both OTx2 and CN10K. + */ req->reg[0] = NIX_AF_TL1X_SCHEDULE(schq); req->regval[0] = TXSCH_TL1_DFLT_RR_QTM; @@ -803,7 +809,7 @@ int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) aq->sq.ena = 1; /* Only one SMQ is allocated, map all SQ's to that SMQ */ aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - aq->sq.smq_rr_quantum = DFLT_RR_QTM; + aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->max_frs); aq->sq.default_chan = pfvf->hw.tx_chan_base; aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ aq->sq.sqb_aura = sqb_aura; @@ -924,12 +930,14 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx) aq->cq.drop = RQ_DROP_LVL_CQ(pfvf->hw.rq_skid, cq->cqe_cnt); aq->cq.drop_ena = 1; - /* Enable receive CQ backpressure */ - aq->cq.bp_ena = 1; - aq->cq.bpid = pfvf->bpid[0]; + if (!is_otx2_lbkvf(pfvf->pdev)) { + /* Enable receive CQ backpressure */ + aq->cq.bp_ena = 1; + aq->cq.bpid = pfvf->bpid[0]; - /* Set backpressure level is same as cq pass level */ - aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + /* Set backpressure level is same as cq pass level */ + aq->cq.bp = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt); + } } /* Fill AQ info */ @@ -1186,7 +1194,7 @@ static int otx2_aura_init(struct otx2_nic *pfvf, int aura_id, aq->aura.fc_hyst_bits = 0; /* Store count on all updates */ /* Enable backpressure for RQ aura */ - if (aura_id < pfvf->hw.rqpool_cnt) { + if (aura_id < pfvf->hw.rqpool_cnt && !is_otx2_lbkvf(pfvf->pdev)) { aq->aura.bp_ena = 0; aq->aura.nix0_bpid = pfvf->bpid[0]; /* Set backpressure level for RQ's Aura */ @@ -1666,6 +1674,11 @@ u16 otx2_get_max_mtu(struct otx2_nic *pfvf) * SMQ errors */ max_mtu = rsp->max_mtu - 8 - OTX2_ETH_HLEN; + + /* Also save DWRR MTU, needed for DWRR weight calculation */ + pfvf->hw.dwrr_mtu = rsp->rpm_dwrr_mtu; + if (!pfvf->hw.dwrr_mtu) + pfvf->hw.dwrr_mtu = 1; } out: diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 8fd58cd07f50..2a80cdc848e5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -181,6 +181,7 @@ struct otx2_hw { /* NIX */ u16 txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC]; u16 matchall_ipolicer; + u32 dwrr_mtu; /* HW settings, coalescing etc */ u16 rx_chan_base; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 8df748e0677b..b906a0eb6e0d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -298,15 +298,14 @@ static int otx2_set_channels(struct net_device *dev, err = otx2_set_real_num_queues(dev, channel->tx_count, channel->rx_count); if (err) - goto fail; + return err; pfvf->hw.rx_queues = channel->rx_count; pfvf->hw.tx_queues = channel->tx_count; pfvf->qset.cq_cnt = pfvf->hw.tx_queues + pfvf->hw.rx_queues; -fail: if (if_up) - dev->netdev_ops->ndo_open(dev); + err = dev->netdev_ops->ndo_open(dev); netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", pfvf->hw.tx_queues, pfvf->hw.rx_queues); @@ -410,7 +409,7 @@ static int otx2_set_ringparam(struct net_device *netdev, qs->rqe_cnt = rx_count; if (if_up) - netdev->netdev_ops->ndo_open(netdev); + return netdev->netdev_ops->ndo_open(netdev); return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 3f03bbdd8d04..22b7af029ebf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1662,6 +1662,7 @@ int otx2_open(struct net_device *netdev) err_tx_stop_queues: netif_tx_stop_all_queues(netdev); netif_carrier_off(netdev); + pf->flags |= OTX2_FLAG_INTF_DOWN; err_free_cints: otx2_free_cints(pf, qidx); vec = pci_irq_vector(pf->pdev, @@ -1689,6 +1690,10 @@ int otx2_stop(struct net_device *netdev) struct otx2_rss_info *rss; int qidx, vec, wrk; + /* If the DOWN flag is set resources are already freed */ + if (pf->flags & OTX2_FLAG_INTF_DOWN) + return 0; + netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c index d12e21db9fd6..68b442eb6d69 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.c @@ -390,11 +390,12 @@ static const struct devlink_ops prestera_dl_ops = { .trap_drop_counter_get = prestera_drop_counter_get, }; -struct prestera_switch *prestera_devlink_alloc(void) +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev) { struct devlink *dl; - dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch)); + dl = devlink_alloc(&prestera_dl_ops, sizeof(struct prestera_switch), + dev->dev); return devlink_priv(dl); } @@ -411,7 +412,7 @@ int prestera_devlink_register(struct prestera_switch *sw) struct devlink *dl = priv_to_devlink(sw); int err; - err = devlink_register(dl, sw->dev->dev); + err = devlink_register(dl); if (err) { dev_err(prestera_dev(sw), "devlink_register failed: %d\n", err); return err; @@ -530,6 +531,8 @@ err_trap_register: prestera_trap = &prestera_trap_items_arr[i]; devlink_traps_unregister(devlink, &prestera_trap->trap, 1); } + devlink_trap_groups_unregister(devlink, prestera_trap_groups_arr, + groups_count); err_groups_register: kfree(trap_data->trap_items_arr); err_trap_items_alloc: diff --git a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h index 5d73aa9db897..cc34c3db13a2 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_devlink.h +++ b/drivers/net/ethernet/marvell/prestera/prestera_devlink.h @@ -6,7 +6,7 @@ #include "prestera.h" -struct prestera_switch *prestera_devlink_alloc(void); +struct prestera_switch *prestera_devlink_alloc(struct prestera_device *dev); void prestera_devlink_free(struct prestera_switch *sw); int prestera_devlink_register(struct prestera_switch *sw); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 7c569c1abefc..44c670807fb3 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -905,7 +905,7 @@ int prestera_device_register(struct prestera_device *dev) struct prestera_switch *sw; int err; - sw = prestera_devlink_alloc(); + sw = prestera_devlink_alloc(dev); if (!sw) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 743ca96527fa..dc9dd77d1ea0 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -4884,7 +4884,7 @@ static int sky2_test_msi(struct sky2_hw *hw) /* This driver supports yukon2 chipset only */ static const char *sky2_name(u8 chipid, char *buf, int sz) { - const char *name[] = { + static const char *const name[] = { "XL", /* 0xb3 */ "EC Ultra", /* 0xb4 */ "Extreme", /* 0xb5 */ diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 00c84656b2e7..7267c6c6d2e2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3535,6 +3535,7 @@ slave_start: if (!SRIOV_VALID_STATE(dev->flags)) { mlx4_err(dev, "Invalid SRIOV state\n"); + err = -EINVAL; goto err_close; } } @@ -4004,7 +4005,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) printk_once(KERN_INFO "%s", mlx4_version); - devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv)); + devlink = devlink_alloc(&mlx4_devlink_ops, sizeof(*priv), &pdev->dev); if (!devlink) return -ENOMEM; priv = devlink_priv(devlink); @@ -4023,7 +4024,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mutex_init(&dev->persist->interface_state_mutex); mutex_init(&dev->persist->pci_status_mutex); - ret = devlink_register(devlink, &pdev->dev); + ret = devlink_register(devlink); if (ret) goto err_persist_free; ret = devlink_params_register(devlink, mlx4_devlink_params, diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 427e7a31862c..2584bc038f94 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -917,7 +917,7 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt, { int err; int i; - enum mlx4_qp_state states[] = { + static const enum mlx4_qp_state states[] = { MLX4_QP_STATE_RST, MLX4_QP_STATE_INIT, MLX4_QP_STATE_RTR, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 6378dc815df7..33e550d77fa6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ - lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \ + lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o @@ -28,7 +28,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \ en/reporter_tx.o en/reporter_rx.o en/params.o en/xsk/pool.o \ en/xsk/setup.o en/xsk/rx.o en/xsk/tx.o en/devlink.o en/ptp.o \ en/qos.o en/trap.o en/fs_tt_redirect.o en/rqt.o en/tir.o \ - en/rx_res.o + en/rx_res.o en/channels.o # # Netdev extra diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index df3e4938ecdd..99ec278d0370 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -89,7 +89,8 @@ static void mlx5_add_cq_to_tasklet(struct mlx5_core_cq *cq, int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u32 *in, int inlen, u32 *out, int outlen) { - int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); + int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), + c_eqn_or_apu_element); u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; struct mlx5_eq_comp *eq; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index ceebfc20f65e..def2156e50ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -500,10 +500,7 @@ static int next_phys_dev(struct device *dev, const void *data) return 1; } -/* This function is called with two flows: - * 1. During initialization of mlx5_core_dev and we don't need to lock it. - * 2. During LAG configure stage and caller holds &mlx5_intf_mutex. - */ +/* Must be called with intf_mutex held */ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) { struct auxiliary_device *adev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index d791d351b489..f38553ff538b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -359,9 +359,10 @@ int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, return 0; } -struct devlink *mlx5_devlink_alloc(void) +struct devlink *mlx5_devlink_alloc(struct device *dev) { - return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev)); + return devlink_alloc(&mlx5_devlink_ops, sizeof(struct mlx5_core_dev), + dev); } void mlx5_devlink_free(struct devlink *devlink) @@ -638,11 +639,11 @@ static void mlx5_devlink_traps_unregister(struct devlink *devlink) ARRAY_SIZE(mlx5_trap_groups_arr)); } -int mlx5_devlink_register(struct devlink *devlink, struct device *dev) +int mlx5_devlink_register(struct devlink *devlink) { int err; - err = devlink_register(devlink, dev); + err = devlink_register(devlink); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 7318d44b774b..30bf4882779b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -31,9 +31,9 @@ int mlx5_devlink_trap_get_num_active(struct mlx5_core_dev *dev); int mlx5_devlink_traps_get_action(struct mlx5_core_dev *dev, int trap_id, enum devlink_trap_action *action); -struct devlink *mlx5_devlink_alloc(void); +struct devlink *mlx5_devlink_alloc(struct device *dev); void mlx5_devlink_free(struct devlink *devlink); -int mlx5_devlink_register(struct devlink *devlink, struct device *dev); +int mlx5_devlink_register(struct devlink *devlink); void mlx5_devlink_unregister(struct devlink *devlink); #endif /* __MLX5_DEVLINK_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 35668986878a..4f6897c1ea8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -66,8 +66,6 @@ struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 -#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) - #define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) @@ -140,6 +138,7 @@ struct page_pool; #define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2 #define MLX5E_MIN_NUM_CHANNELS 0x1 +#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) #define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_TX_XSK_POLL_BUDGET 64 @@ -921,8 +920,6 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); void mlx5e_timestamp_init(struct mlx5e_priv *priv); -int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv); - struct mlx5e_xsk_param; struct mlx5e_rq_param; @@ -984,9 +981,6 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx); -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, - int num_channels); - int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state); void mlx5e_activate_rq(struct mlx5e_rq *rq); void mlx5e_deactivate_rq(struct mlx5e_rq *rq); @@ -1036,16 +1030,6 @@ void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq); int mlx5e_init_di_list(struct mlx5e_rq *rq, int wq_sz, int node); void mlx5e_free_di_list(struct mlx5e_rq *rq); -int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv); - -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc); -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv); - -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv); -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); - int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn); void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn); @@ -1133,8 +1117,6 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); -void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, - u16 num_channels); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c new file mode 100644 index 000000000000..e7c14c0de0a7 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#include "channels.h" +#include "en.h" +#include "en/ptp.h" + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs) +{ + return chs->num; +} + +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c; + + WARN_ON(ix >= mlx5e_channels_get_num(chs)); + c = chs->c[ix]; + + *rqn = c->rq.rqn; +} + +bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn) +{ + struct mlx5e_channel *c; + + WARN_ON(ix >= mlx5e_channels_get_num(chs)); + c = chs->c[ix]; + + if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + return false; + + *rqn = c->xskrq.rqn; + return true; +} + +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn) +{ + struct mlx5e_ptp *c = chs->ptp; + + if (!c || !test_bit(MLX5E_PTP_STATE_RX, c->state)) + return false; + + *rqn = c->rq.rqn; + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h new file mode 100644 index 000000000000..ca00cbc827cb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/channels.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ + +#ifndef __MLX5_EN_CHANNELS_H__ +#define __MLX5_EN_CHANNELS_H__ + +#include <linux/kernel.h> + +struct mlx5e_channels; + +unsigned int mlx5e_channels_get_num(struct mlx5e_channels *chs); +void mlx5e_channels_get_regular_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_xsk_rqn(struct mlx5e_channels *chs, unsigned int ix, u32 *rqn); +bool mlx5e_channels_get_ptp_rqn(struct mlx5e_channels *chs, u32 *rqn); + +#endif /* __MLX5_EN_CHANNELS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 0e053aab12b5..e348c276eaa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -5,6 +5,7 @@ #define __MLX5E_FLOW_STEER_H__ #include "mod_hdr.h" +#include "lib/fs_ttc.h" enum { MLX5E_TC_FT_LEVEL = 0, @@ -67,21 +68,7 @@ struct mlx5e_l2_table { bool promisc_enabled; }; -enum mlx5e_traffic_types { - MLX5E_TT_IPV4_TCP, - MLX5E_TT_IPV6_TCP, - MLX5E_TT_IPV4_UDP, - MLX5E_TT_IPV6_UDP, - MLX5E_TT_IPV4_IPSEC_AH, - MLX5E_TT_IPV6_IPSEC_AH, - MLX5E_TT_IPV4_IPSEC_ESP, - MLX5E_TT_IPV6_IPSEC_ESP, - MLX5E_TT_IPV4, - MLX5E_TT_IPV6, - MLX5E_TT_ANY, - MLX5E_NUM_TT, - MLX5E_NUM_INDIR_TIRS = MLX5E_TT_ANY, -}; +#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1) #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -93,30 +80,6 @@ enum mlx5e_traffic_types { MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) -enum mlx5e_tunnel_types { - MLX5E_TT_IPV4_GRE, - MLX5E_TT_IPV6_GRE, - MLX5E_TT_IPV4_IPIP, - MLX5E_TT_IPV6_IPIP, - MLX5E_TT_IPV4_IPV6, - MLX5E_TT_IPV6_IPV6, - MLX5E_NUM_TUNNEL_TT, -}; - -bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); - -struct mlx5e_ttc_rule { - struct mlx5_flow_handle *rule; - struct mlx5_flow_destination default_dest; -}; - -/* L3/L4 traffic type classifier */ -struct mlx5e_ttc_table { - struct mlx5e_flow_table ft; - struct mlx5e_ttc_rule rules[MLX5E_NUM_TT]; - struct mlx5_flow_handle *tunnel_rules[MLX5E_NUM_TUNNEL_TT]; -}; - /* NIC prio FTS */ enum { MLX5E_PROMISC_FT_LEVEL, @@ -138,22 +101,6 @@ enum { #endif }; -#define MLX5E_TTC_NUM_GROUPS 3 -#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT) -#define MLX5E_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\ - MLX5E_TTC_GROUP2_SIZE +\ - MLX5E_TTC_GROUP3_SIZE) - -#define MLX5E_INNER_TTC_NUM_GROUPS 3 -#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3) -#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1) -#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0) -#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\ - MLX5E_INNER_TTC_GROUP2_SIZE +\ - MLX5E_INNER_TTC_GROUP3_SIZE) - struct mlx5e_priv; #ifdef CONFIG_MLX5_EN_RXNFC @@ -222,8 +169,8 @@ struct mlx5e_flow_steering { struct mlx5e_promisc_table promisc; struct mlx5e_vlan_table *vlan; struct mlx5e_l2_table l2; - struct mlx5e_ttc_table ttc; - struct mlx5e_ttc_table inner_ttc; + struct mlx5_ttc_table *ttc; + struct mlx5_ttc_table *inner_ttc; #ifdef CONFIG_MLX5_EN_ARFS struct mlx5e_arfs_tables *arfs; #endif @@ -235,27 +182,13 @@ struct mlx5e_flow_steering { struct mlx5e_ptp_fs *ptp_fs; }; -struct ttc_params { - struct mlx5_flow_table_attr ft_attr; - u32 any_tt_tirn; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table *inner_ttc; -}; - -void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, struct ttc_params *ttc_params); -void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params); +void mlx5e_set_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params, bool tunnel); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc); -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc); +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); +int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft); -int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, - struct mlx5_flow_destination *new_dest); -struct mlx5_flow_destination -mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type); -int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type); void mlx5e_enable_cvlan_filter(struct mlx5e_priv *priv); void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); @@ -263,7 +196,6 @@ void mlx5e_disable_cvlan_filter(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv); -u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt); int mlx5e_add_vlan_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); void mlx5e_remove_vlan_trap(struct mlx5e_priv *priv); int mlx5e_add_mac_trap(struct mlx5e_priv *priv, int trap_id, int tir_num); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c index 909faa6c89d7..7aa25a5e29d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.c @@ -33,22 +33,22 @@ static char *fs_udp_type2str(enum fs_udp_type i) } } -static enum mlx5e_traffic_types fs_udp2tt(enum fs_udp_type i) +static enum mlx5_traffic_types fs_udp2tt(enum fs_udp_type i) { switch (i) { case FS_IPV4_UDP: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; default: /* FS_IPV6_UDP */ - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; } } -static enum fs_udp_type tt2fs_udp(enum mlx5e_traffic_types i) +static enum fs_udp_type tt2fs_udp(enum mlx5_traffic_types i) { switch (i) { - case MLX5E_TT_IPV4_UDP: + case MLX5_TT_IPV4_UDP: return FS_IPV4_UDP; - case MLX5E_TT_IPV6_UDP: + case MLX5_TT_IPV6_UDP: return FS_IPV6_UDP; default: return FS_UDP_NUM_TYPES; @@ -75,7 +75,7 @@ static void fs_udp_set_dport_flow(struct mlx5_flow_spec *spec, enum fs_udp_type struct mlx5_flow_handle * mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, - enum mlx5e_traffic_types ttc_type, + enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port) { enum fs_udp_type type = tt2fs_udp(ttc_type); @@ -124,7 +124,7 @@ static int fs_udp_add_default_rule(struct mlx5e_priv *priv, enum fs_udp_type typ fs_udp = priv->fs.udp; fs_udp_t = &fs_udp->tables[type]; - dest = mlx5e_ttc_get_default_dest(priv, fs_udp2tt(type)); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_udp2tt(type)); rule = mlx5_add_flow_rules(fs_udp_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -259,7 +259,7 @@ static int fs_udp_disable(struct mlx5e_priv *priv) for (i = 0; i < FS_UDP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, fs_udp2tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_udp2tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -281,7 +281,7 @@ static int fs_udp_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.udp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, fs_udp2tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_udp2tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", @@ -401,7 +401,7 @@ static int fs_any_add_default_rule(struct mlx5e_priv *priv) fs_any = priv->fs.any; fs_any_t = &fs_any->table; - dest = mlx5e_ttc_get_default_dest(priv, MLX5E_TT_ANY); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, MLX5_TT_ANY); rule = mlx5_add_flow_rules(fs_any_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -514,11 +514,11 @@ static int fs_any_disable(struct mlx5e_priv *priv) int err; /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, MLX5E_TT_ANY); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, MLX5_TT_ANY); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", - __func__, MLX5E_TT_ANY, err); + __func__, MLX5_TT_ANY, err); return err; } return 0; @@ -533,11 +533,11 @@ static int fs_any_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.any->table.t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, MLX5E_TT_ANY, &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, MLX5_TT_ANY, &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", - __func__, MLX5E_TT_ANY, err); + __func__, MLX5_TT_ANY, err); return err; } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h index 8385df24eb99..7a70c4f38fda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs_tt_redirect.h @@ -12,7 +12,7 @@ void mlx5e_fs_tt_redirect_del_rule(struct mlx5_flow_handle *rule); /* UDP traffic type redirect */ struct mlx5_flow_handle * mlx5e_fs_tt_redirect_udp_add_rule(struct mlx5e_priv *priv, - enum mlx5e_traffic_types ttc_type, + enum mlx5_traffic_types ttc_type, u32 tir_num, u16 d_port); void mlx5e_fs_tt_redirect_udp_destroy(struct mlx5e_priv *priv); int mlx5e_fs_tt_redirect_udp_create(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c index ea321e528749..4e72ca8070e2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.c @@ -5,11 +5,15 @@ #include <linux/slab.h> #include <linux/xarray.h> #include <linux/hashtable.h> +#include <linux/refcount.h> #include "mapping.h" #define MAPPING_GRACE_PERIOD 2000 +static LIST_HEAD(shared_ctx_list); +static DEFINE_MUTEX(shared_ctx_lock); + struct mapping_ctx { struct xarray xarray; DECLARE_HASHTABLE(ht, 8); @@ -20,6 +24,10 @@ struct mapping_ctx { struct delayed_work dwork; struct list_head pending_list; spinlock_t pending_list_lock; /* Guards pending list */ + u64 id; + u8 type; + struct list_head list; + refcount_t refcount; }; struct mapping_item { @@ -205,11 +213,48 @@ mapping_create(size_t data_size, u32 max_id, bool delayed_removal) mutex_init(&ctx->lock); xa_init_flags(&ctx->xarray, XA_FLAGS_ALLOC1); + refcount_set(&ctx->refcount, 1); + INIT_LIST_HEAD(&ctx->list); + + return ctx; +} + +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal) +{ + struct mapping_ctx *ctx; + + mutex_lock(&shared_ctx_lock); + list_for_each_entry(ctx, &shared_ctx_list, list) { + if (ctx->id == id && ctx->type == type) { + if (refcount_inc_not_zero(&ctx->refcount)) + goto unlock; + break; + } + } + + ctx = mapping_create(data_size, max_id, delayed_removal); + if (IS_ERR(ctx)) + goto unlock; + + ctx->id = id; + ctx->type = type; + list_add(&ctx->list, &shared_ctx_list); + +unlock: + mutex_unlock(&shared_ctx_lock); return ctx; } void mapping_destroy(struct mapping_ctx *ctx) { + if (!refcount_dec_and_test(&ctx->refcount)) + return; + + mutex_lock(&shared_ctx_lock); + list_del(&ctx->list); + mutex_unlock(&shared_ctx_lock); + mapping_flush_work(ctx); xa_destroy(&ctx->xarray); mutex_destroy(&ctx->lock); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h index 285525cc5470..4e2119f0f4c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/mapping.h @@ -24,4 +24,9 @@ struct mapping_ctx *mapping_create(size_t data_size, u32 max_id, bool delayed_removal); void mapping_destroy(struct mapping_ctx *ctx); +/* adds mapping with an id or get an existing mapping with the same id + */ +struct mapping_ctx * +mapping_create_for_id(u64 id, u8 type, size_t data_size, u32 max_id, bool delayed_removal); + #endif /* __MLX5_MAPPING_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index fc602d85ca48..3cbb596821e8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -483,6 +483,15 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev, param->cq_period_mode = params->rx_cq_moderation.cq_period_mode; } +static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params) +{ + bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) && + MLX5_CAP_GEN(mdev, relaxed_ordering_write); + + return ro && params->lro_en ? + MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN; +} + int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, @@ -520,7 +529,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, } MLX5_SET(wq, wq, wq_type, params->rq_wq_type); - MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); + MLX5_SET(wq, wq, end_padding_mode, rq_end_pad_mode(mdev, params)); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 849ee3e147c4..f479ef31ca40 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -482,8 +482,11 @@ static void mlx5e_ptp_build_params(struct mlx5e_ptp *c, params->log_sq_size = orig->log_sq_size; mlx5e_ptp_build_sq_param(c->mdev, params, &cparams->txq_sq_param); } - if (test_bit(MLX5E_PTP_STATE_RX, c->state)) + /* RQ */ + if (test_bit(MLX5E_PTP_STATE_RX, c->state)) { + params->vlan_strip_disable = orig->vlan_strip_disable; mlx5e_ptp_build_rq_param(c->mdev, c->netdev, c->priv->q_counter, cparams); + } } static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, @@ -494,7 +497,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, int err; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; @@ -602,8 +605,8 @@ static void mlx5e_ptp_rx_unset_fs(struct mlx5e_priv *priv) static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) { + u32 tirn = mlx5e_rx_res_get_tirn_ptp(priv->rx_res); struct mlx5e_ptp_fs *ptp_fs = priv->fs.ptp_fs; - u32 tirn = priv->rx_res->ptp.tir.tirn; struct mlx5_flow_handle *rule; int err; @@ -614,7 +617,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) if (err) goto out_free; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV4_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV4_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -622,7 +625,7 @@ static int mlx5e_ptp_rx_set_fs(struct mlx5e_priv *priv) } ptp_fs->udp_v4_rule = rule; - rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5E_TT_IPV6_UDP, + rule = mlx5e_fs_tt_redirect_udp_add_rule(priv, MLX5_TT_IPV6_UDP, tirn, PTP_EV_PORT); if (IS_ERR(rule)) { err = PTR_ERR(rule); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c index 38d0e9dbd6bd..b915fb29dd2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c @@ -4,6 +4,15 @@ #include "rqt.h" #include <linux/mlx5/transobj.h> +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels) +{ + unsigned int i; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir->table[i] = i % num_channels; +} + static int mlx5e_rqt_init(struct mlx5e_rqt *rqt, struct mlx5_core_dev *mdev, u16 max_size, u32 *init_rqns, u16 init_size) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h index d2c76649efb0..60c985a12f24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h @@ -14,6 +14,9 @@ struct mlx5e_rss_params_indir { u32 table[MLX5E_INDIR_RQT_SIZE]; }; +void mlx5e_rss_params_indir_init_uniform(struct mlx5e_rss_params_indir *indir, + unsigned int num_channels); + struct mlx5e_rqt { struct mlx5_core_dev *mdev; u32 rqtn; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index 8fc1dfc4e830..e2a8fe13f29d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -2,54 +2,56 @@ /* Copyright (c) 2021, Mellanox Technologies inc. All rights reserved. */ #include "rx_res.h" +#include "channels.h" +#include "params.h" static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_INDIR_TIRS] = { - [MLX5E_TT_IPV4_TCP] = { + [MLX5_TT_IPV4_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV6_TCP] = { + [MLX5_TT_IPV6_TCP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = MLX5_L4_PROT_TYPE_TCP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV4_UDP] = { + [MLX5_TT_IPV4_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV6_UDP] = { + [MLX5_TT_IPV6_UDP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = MLX5_L4_PROT_TYPE_UDP, .rx_hash_fields = MLX5_HASH_IP_L4PORTS, }, - [MLX5E_TT_IPV4_IPSEC_AH] = { + [MLX5_TT_IPV4_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV6_IPSEC_AH] = { + [MLX5_TT_IPV6_IPSEC_AH] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { + [MLX5_TT_IPV4_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { + [MLX5_TT_IPV6_IPSEC_ESP] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP_IPSEC_SPI, }, - [MLX5E_TT_IPV4] = { + [MLX5_TT_IPV4] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV4, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP, }, - [MLX5E_TT_IPV6] = { + [MLX5_TT_IPV6] = { .l3_prot_type = MLX5_L3_PROT_TYPE_IPV6, .l4_prot_type = 0, .rx_hash_fields = MLX5_HASH_IP, @@ -57,13 +59,556 @@ static const struct mlx5e_rss_params_traffic_type rss_default_config[MLX5E_NUM_I }; struct mlx5e_rss_params_traffic_type -mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt) +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt) { return rss_default_config[tt]; } +struct mlx5e_rx_res { + struct mlx5_core_dev *mdev; + enum mlx5e_rx_res_features features; + unsigned int max_nch; + u32 drop_rqn; + + struct { + struct mlx5e_rss_params_hash hash; + struct mlx5e_rss_params_indir indir; + u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; + } rss_params; + + struct mlx5e_rqt indir_rqt; + struct { + struct mlx5e_tir indir_tir; + struct mlx5e_tir inner_indir_tir; + } rss[MLX5E_NUM_INDIR_TIRS]; + + bool rss_active; + u32 rss_rqns[MLX5E_INDIR_RQT_SIZE]; + unsigned int rss_nch; + + struct { + struct mlx5e_rqt direct_rqt; + struct mlx5e_tir direct_tir; + struct mlx5e_rqt xsk_rqt; + struct mlx5e_tir xsk_tir; + } *channels; + + struct { + struct mlx5e_rqt rqt; + struct mlx5e_tir tir; + } ptp; +}; + +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void) +{ + return kvzalloc(sizeof(struct mlx5e_rx_res), GFP_KERNEL); +} + +static void mlx5e_rx_res_rss_params_init(struct mlx5e_rx_res *res, unsigned int init_nch) +{ + enum mlx5_traffic_types tt; + + res->rss_params.hash.hfunc = ETH_RSS_HASH_TOP; + netdev_rss_key_fill(res->rss_params.hash.toeplitz_hash_key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, init_nch); + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + res->rss_params.rx_hash_fields[tt] = + mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; +} + +static int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + enum mlx5_traffic_types tt, max_tt; + struct mlx5e_tir_builder *builder; + u32 indir_rqtn; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->indir_rqt, res->mdev, true, res->drop_rqn); + if (err) + goto out; + + indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, false); + + err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an indirect TIR: err = %d, tt = %d\n", + err, tt); + goto err_destroy_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + if (!inner_ft_support) + goto out; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + struct mlx5e_rss_params_traffic_type rss_tt; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + indir_rqtn, inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, true); + + err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an inner indirect TIR: err = %d, tt = %d\n", + err, tt); + goto err_destroy_inner_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_inner_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + tt = MLX5E_NUM_INDIR_TIRS; +err_destroy_tirs: + max_tt = tt; + for (tt = 0; tt < max_tt; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + + mlx5e_rqt_destroy(&res->indir_rqt); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_channels_init(struct mlx5e_rx_res *res, + const struct mlx5e_lro_param *init_lro_param) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err = 0; + int ix; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + res->channels = kvcalloc(res->max_nch, sizeof(*res->channels), GFP_KERNEL); + if (!res->channels) { + err = -ENOMEM; + goto out; + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].direct_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].direct_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create a direct TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_direct_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + goto out; + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_init_direct(&res->channels[ix].xsk_rqt, + res->mdev, false, res->drop_rqn); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an XSK RQT: err = %d, ix = %u\n", + err, ix); + goto err_destroy_xsk_rqts; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + inner_ft_support); + mlx5e_tir_builder_build_lro(builder, init_lro_param); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->channels[ix].xsk_tir, builder, res->mdev, true); + if (err) { + mlx5_core_warn(res->mdev, "Failed to create an XSK TIR: err = %d, ix = %u\n", + err, ix); + goto err_destroy_xsk_tirs; + } + + mlx5e_tir_builder_clear(builder); + } + + goto out; + +err_destroy_xsk_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + + ix = res->max_nch; +err_destroy_xsk_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); + + ix = res->max_nch; +err_destroy_direct_tirs: + while (--ix >= 0) + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + + ix = res->max_nch; +err_destroy_direct_rqts: + while (--ix >= 0) + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + kvfree(res->channels); + +out: + mlx5e_tir_builder_free(builder); + + return err; +} + +static int mlx5e_rx_res_ptp_init(struct mlx5e_rx_res *res) +{ + bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_tir_builder *builder; + int err; + + builder = mlx5e_tir_builder_alloc(false); + if (!builder) + return -ENOMEM; + + err = mlx5e_rqt_init_direct(&res->ptp.rqt, res->mdev, false, res->drop_rqn); + if (err) + goto out; + + mlx5e_tir_builder_build_rqt(builder, res->mdev->mlx5e_res.hw_objs.td.tdn, + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + inner_ft_support); + mlx5e_tir_builder_build_direct(builder); + + err = mlx5e_tir_init(&res->ptp.tir, builder, res->mdev, true); + if (err) + goto err_destroy_ptp_rqt; + + goto out; + +err_destroy_ptp_rqt: + mlx5e_rqt_destroy(&res->ptp.rqt); + +out: + mlx5e_tir_builder_free(builder); + return err; +} + +static void mlx5e_rx_res_rss_destroy(struct mlx5e_rx_res *res) +{ + enum mlx5_traffic_types tt; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].indir_tir); + + if (res->features & MLX5E_RX_RES_FEATURE_INNER_FT) + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) + mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); + + mlx5e_rqt_destroy(&res->indir_rqt); +} + +static void mlx5e_rx_res_channels_destroy(struct mlx5e_rx_res *res) +{ + unsigned int ix; + + for (ix = 0; ix < res->max_nch; ix++) { + mlx5e_tir_destroy(&res->channels[ix].direct_tir); + mlx5e_rqt_destroy(&res->channels[ix].direct_rqt); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + mlx5e_tir_destroy(&res->channels[ix].xsk_tir); + mlx5e_rqt_destroy(&res->channels[ix].xsk_rqt); + } + + kvfree(res->channels); +} + +static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_tir_destroy(&res->ptp.tir); + mlx5e_rqt_destroy(&res->ptp.rqt); +} + +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch) +{ + int err; + + res->mdev = mdev; + res->features = features; + res->max_nch = max_nch; + res->drop_rqn = drop_rqn; + + mlx5e_rx_res_rss_params_init(res, init_nch); + + err = mlx5e_rx_res_rss_init(res, init_lro_param); + if (err) + return err; + + err = mlx5e_rx_res_channels_init(res, init_lro_param); + if (err) + goto err_rss_destroy; + + err = mlx5e_rx_res_ptp_init(res); + if (err) + goto err_channels_destroy; + + return 0; + +err_channels_destroy: + mlx5e_rx_res_channels_destroy(res); +err_rss_destroy: + mlx5e_rx_res_rss_destroy(res); + return err; +} + +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res) +{ + mlx5e_rx_res_ptp_destroy(res); + mlx5e_rx_res_channels_destroy(res); + mlx5e_rx_res_rss_destroy(res); +} + +void mlx5e_rx_res_free(struct mlx5e_rx_res *res) +{ + kvfree(res); +} + +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_tir_get_tirn(&res->channels[ix].direct_tir); +} + +u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_XSK)); + + return mlx5e_tir_get_tirn(&res->channels[ix].xsk_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + return mlx5e_tir_get_tirn(&res->rss[tt].indir_tir); +} + +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)); + return mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir); +} + +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res) +{ + WARN_ON(!(res->features & MLX5E_RX_RES_FEATURE_PTP)); + return mlx5e_tir_get_tirn(&res->ptp.tir); +} + +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix) +{ + return mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt); +} + +static void mlx5e_rx_res_rss_enable(struct mlx5e_rx_res *res) +{ + int err; + + res->rss_active = true; + + err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), err); +} + +static void mlx5e_rx_res_rss_disable(struct mlx5e_rx_res *res) +{ + int err; + + res->rss_active = false; + + err = mlx5e_rqt_redirect_direct(&res->indir_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to drop RQ %#x: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), res->drop_rqn, err); +} + +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs) +{ + unsigned int nch, ix; + int err; + + nch = mlx5e_channels_get_num(chs); + + for (ix = 0; ix < chs->num; ix++) + mlx5e_channels_get_regular_rqn(chs, ix, &res->rss_rqns[ix]); + res->rss_nch = chs->num; + + mlx5e_rx_res_rss_enable(res); + + for (ix = 0; ix < nch; ix++) { + u32 rqn; + + mlx5e_channels_get_regular_rqn(chs, ix, &rqn); + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) + rqn = res->drop_rqn; + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + rqn, ix, err); + } + for (ix = nch; ix < res->max_nch; ix++) { + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + } + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + u32 rqn; + + if (mlx5e_channels_get_ptp_rqn(chs, &rqn)) + rqn = res->drop_rqn; + + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + rqn, err); + } +} + +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res) +{ + unsigned int ix; + int err; + + mlx5e_rx_res_rss_disable(res); + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].direct_rqt), + res->drop_rqn, ix, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_XSK)) + continue; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + } + + if (res->features & MLX5E_RX_RES_FEATURE_PTP) { + err = mlx5e_rqt_redirect_direct(&res->ptp.rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect direct RQT %#x to drop RQ %#x (PTP): err = %d\n", + mlx5e_rqt_get_rqtn(&res->ptp.rqt), + res->drop_rqn, err); + } +} + +int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix) +{ + u32 rqn; + int err; + + if (!mlx5e_channels_get_xsk_rqn(chs, ix, &rqn)) + return -EINVAL; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to XSK RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + rqn, ix, err); + return err; +} + +int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix) +{ + int err; + + err = mlx5e_rqt_redirect_direct(&res->channels[ix].xsk_rqt, res->drop_rqn); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect XSK RQT %#x to drop RQ %#x (channel %u): err = %d\n", + mlx5e_rqt_get_rqtn(&res->channels[ix].xsk_rqt), + res->drop_rqn, ix, err); + return err; +} + struct mlx5e_rss_params_traffic_type -mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt) +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) { struct mlx5e_rss_params_traffic_type rss_tt; @@ -71,3 +616,216 @@ mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traf rss_tt.rx_hash_fields = res->rss_params.rx_hash_fields[tt]; return rss_tt; } + +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch) +{ + mlx5e_rss_params_indir_init_uniform(&res->rss_params.indir, nch); + + if (!res->rss_active) + return; + + mlx5e_rx_res_rss_enable(res); +} + +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc) +{ + unsigned int i; + + if (indir) + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + indir[i] = res->rss_params.indir.table[i]; + + if (key) + memcpy(key, res->rss_params.hash.toeplitz_hash_key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + + if (hfunc) + *hfunc = res->rss_params.hash.hfunc; +} + +static int mlx5e_rx_res_rss_update_tir(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + bool inner) +{ + struct mlx5e_rss_params_traffic_type rss_tt; + struct mlx5e_tir_builder *builder; + struct mlx5e_tir *tir; + int err; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); + + mlx5e_tir_builder_build_rss(builder, &res->rss_params.hash, &rss_tt, inner); + tir = inner ? &res->rss[tt].inner_indir_tir : &res->rss[tt].indir_tir; + err = mlx5e_tir_modify(tir, builder); + + mlx5e_tir_builder_free(builder); + return err; +} + +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, + const u8 *key, const u8 *hfunc) +{ + enum mlx5_traffic_types tt; + bool changed_indir = false; + bool changed_hash = false; + int err; + + if (hfunc && *hfunc != res->rss_params.hash.hfunc) { + switch (*hfunc) { + case ETH_RSS_HASH_XOR: + case ETH_RSS_HASH_TOP: + break; + default: + return -EINVAL; + } + changed_hash = true; + changed_indir = true; + res->rss_params.hash.hfunc = *hfunc; + } + + if (key) { + if (res->rss_params.hash.hfunc == ETH_RSS_HASH_TOP) + changed_hash = true; + memcpy(res->rss_params.hash.toeplitz_hash_key, key, + sizeof(res->rss_params.hash.toeplitz_hash_key)); + } + + if (indir) { + unsigned int i; + + changed_indir = true; + + for (i = 0; i < MLX5E_INDIR_RQT_SIZE; i++) + res->rss_params.indir.table[i] = indir[i]; + } + + if (changed_indir && res->rss_active) { + err = mlx5e_rqt_redirect_indir(&res->indir_rqt, res->rss_rqns, res->rss_nch, + res->rss_params.hash.hfunc, + &res->rss_params.indir); + if (err) + mlx5_core_warn(res->mdev, "Failed to redirect indirect RQT %#x to channels: err = %d\n", + mlx5e_rqt_get_rqtn(&res->indir_rqt), err); + } + + if (changed_hash) + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_rx_res_rss_update_tir(res, tt, false); + if (err) + mlx5_core_warn(res->mdev, "Failed to update RSS hash of indirect TIR for traffic type %d: err = %d\n", + tt, err); + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + continue; + + err = mlx5e_rx_res_rss_update_tir(res, tt, true); + if (err) + mlx5_core_warn(res->mdev, "Failed to update RSS hash of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + } + + return 0; +} + +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt) +{ + return res->rss_params.rx_hash_fields[tt]; +} + +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields) +{ + u8 old_rx_hash_fields; + int err; + + old_rx_hash_fields = res->rss_params.rx_hash_fields[tt]; + + if (old_rx_hash_fields == rx_hash_fields) + return 0; + + res->rss_params.rx_hash_fields[tt] = rx_hash_fields; + + err = mlx5e_rx_res_rss_update_tir(res, tt, false); + if (err) { + res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; + mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of indirect TIR for traffic type %d: err = %d\n", + tt, err); + return err; + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + return 0; + + err = mlx5e_rx_res_rss_update_tir(res, tt, true); + if (err) { + /* Partial update happened. Try to revert - it may fail too, but + * there is nothing more we can do. + */ + res->rss_params.rx_hash_fields[tt] = old_rx_hash_fields; + mlx5_core_warn(res->mdev, "Failed to update RSS hash fields of inner indirect TIR for traffic type %d: err = %d\n", + tt, err); + if (mlx5e_rx_res_rss_update_tir(res, tt, false)) + mlx5_core_warn(res->mdev, "Partial update of RSS hash fields happened: failed to revert indirect TIR for traffic type %d to the old values\n", + tt); + } + + return err; +} + +int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param) +{ + struct mlx5e_tir_builder *builder; + enum mlx5_traffic_types tt; + int err, final_err; + unsigned int ix; + + builder = mlx5e_tir_builder_alloc(true); + if (!builder) + return -ENOMEM; + + mlx5e_tir_builder_build_lro(builder, lro_param); + + final_err = 0; + + for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { + err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(&res->rss[tt].indir_tir), tt, err); + if (!final_err) + final_err = err; + } + + if (!(res->features & MLX5E_RX_RES_FEATURE_INNER_FT)) + continue; + + err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of inner indirect TIR %#x for traffic type %d: err = %d\n", + mlx5e_tir_get_tirn(&res->rss[tt].inner_indir_tir), tt, err); + if (!final_err) + final_err = err; + } + } + + for (ix = 0; ix < res->max_nch; ix++) { + err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); + if (err) { + mlx5_core_warn(res->mdev, "Failed to update LRO state of direct TIR %#x for channel %u: err = %d\n", + mlx5e_tir_get_tirn(&res->channels[ix].direct_tir), ix, err); + if (!final_err) + final_err = err; + } + } + + mlx5e_tir_builder_free(builder); + return final_err; +} + +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res) +{ + return res->rss_params.hash; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 068e48140a6f..1baeec5158a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -9,39 +9,59 @@ #include "tir.h" #include "fs.h" -#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE / 2) +struct mlx5e_rx_res; -struct mlx5e_rss_params { - struct mlx5e_rss_params_hash hash; - struct mlx5e_rss_params_indir indir; - u32 rx_hash_fields[MLX5E_NUM_INDIR_TIRS]; -}; +struct mlx5e_channels; +struct mlx5e_rss_params_hash; -struct mlx5e_rx_res { - struct mlx5e_rss_params rss_params; - - struct mlx5e_rqt indir_rqt; - struct { - struct mlx5e_tir indir_tir; - struct mlx5e_tir inner_indir_tir; - } rss[MLX5E_NUM_INDIR_TIRS]; - - struct { - struct mlx5e_rqt direct_rqt; - struct mlx5e_tir direct_tir; - struct mlx5e_rqt xsk_rqt; - struct mlx5e_tir xsk_tir; - } channels[MLX5E_MAX_NUM_CHANNELS]; - - struct { - struct mlx5e_rqt rqt; - struct mlx5e_tir tir; - } ptp; +enum mlx5e_rx_res_features { + MLX5E_RX_RES_FEATURE_INNER_FT = BIT(0), + MLX5E_RX_RES_FEATURE_XSK = BIT(1), + MLX5E_RX_RES_FEATURE_PTP = BIT(2), }; struct mlx5e_rss_params_traffic_type -mlx5e_rss_get_default_tt_config(enum mlx5e_traffic_types tt); +mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); + +/* Setup */ +struct mlx5e_rx_res *mlx5e_rx_res_alloc(void); +int mlx5e_rx_res_init(struct mlx5e_rx_res *res, struct mlx5_core_dev *mdev, + enum mlx5e_rx_res_features features, unsigned int max_nch, + u32 drop_rqn, const struct mlx5e_lro_param *init_lro_param, + unsigned int init_nch); +void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); +void mlx5e_rx_res_free(struct mlx5e_rx_res *res); + +/* TIRN getters for flow steering */ +u32 mlx5e_rx_res_get_tirn_direct(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_xsk(struct mlx5e_rx_res *res, unsigned int ix); +u32 mlx5e_rx_res_get_tirn_rss(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_rss_inner(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +u32 mlx5e_rx_res_get_tirn_ptp(struct mlx5e_rx_res *res); + +/* RQTN getters for modules that create their own TIRs */ +u32 mlx5e_rx_res_get_rqtn_direct(struct mlx5e_rx_res *res, unsigned int ix); + +/* Activate/deactivate API */ +void mlx5e_rx_res_channels_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs); +void mlx5e_rx_res_channels_deactivate(struct mlx5e_rx_res *res); +int mlx5e_rx_res_xsk_activate(struct mlx5e_rx_res *res, struct mlx5e_channels *chs, + unsigned int ix); +int mlx5e_rx_res_xsk_deactivate(struct mlx5e_rx_res *res, unsigned int ix); + +/* Configuration API */ struct mlx5e_rss_params_traffic_type -mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5e_traffic_types tt); +mlx5e_rx_res_rss_get_current_tt_config(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +void mlx5e_rx_res_rss_set_indir_uniform(struct mlx5e_rx_res *res, unsigned int nch); +void mlx5e_rx_res_rss_get_rxfh(struct mlx5e_rx_res *res, u32 *indir, u8 *key, u8 *hfunc); +int mlx5e_rx_res_rss_set_rxfh(struct mlx5e_rx_res *res, const u32 *indir, + const u8 *key, const u8 *hfunc); +u8 mlx5e_rx_res_rss_get_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt); +int mlx5e_rx_res_rss_set_hash_fields(struct mlx5e_rx_res *res, enum mlx5_traffic_types tt, + u8 rx_hash_fields); +int mlx5e_rx_res_lro_set_param(struct mlx5e_rx_res *res, struct mlx5e_lro_param *lro_param); + +/* Workaround for hairpin */ +struct mlx5e_rss_params_hash mlx5e_rx_res_get_current_hash(struct mlx5e_rx_res *res); #endif /* __MLX5_EN_RX_RES_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 91e7a01e32be..b1707b86aa16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2138,6 +2138,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, struct mlx5_tc_ct_priv *ct_priv; struct mlx5_core_dev *dev; const char *msg; + u64 mapping_id; int err; dev = priv->mdev; @@ -2153,13 +2154,17 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, if (!ct_priv) goto err_alloc; - ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true); + mapping_id = mlx5_query_nic_system_image_guid(dev); + + ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE, + sizeof(u16), 0, true); if (IS_ERR(ct_priv->zone_mapping)) { err = PTR_ERR(ct_priv->zone_mapping); goto err_mapping_zone; } - ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true); + ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS, + sizeof(u32) * 4, 0, true); if (IS_ERR(ct_priv->labels_mapping)) { err = PTR_ERR(ct_priv->labels_mapping); goto err_mapping_labels; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index afaf5b413066..d54607a42740 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -37,7 +37,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params struct mlx5e_priv *priv = t->priv; rq->wq_type = params->rq_wq_type; - rq->pdev = mdev->device; + rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; rq->clock = &mdev->clock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 71e8d66fa150..7b562d2c8a19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -122,7 +122,7 @@ static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv, * any Fill Ring entries at the setup stage. */ - err = mlx5e_xsk_redirect_rqt_to_channel(priv, priv->channels.c[ix]); + err = mlx5e_rx_res_xsk_activate(priv->rx_res, &priv->channels, ix); if (unlikely(err)) goto err_deactivate; @@ -169,7 +169,7 @@ static int mlx5e_xsk_disable_locked(struct mlx5e_priv *priv, u16 ix) goto remove_pool; c = priv->channels.c[ix]; - mlx5e_xsk_redirect_rqt_to_drop(priv, ix); + mlx5e_rx_res_xsk_deactivate(priv->rx_res, ix); mlx5e_deactivate_xsk(c); mlx5e_close_xsk(c); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index ab485d082729..c06267477b27 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -183,59 +183,3 @@ void mlx5e_deactivate_xsk(struct mlx5e_channel *c) mlx5e_deactivate_rq(&c->xskrq); /* TX queue is disabled on close. */ } - -int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c) -{ - return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[c->ix].xsk_rqt, c->xskrq.rqn); -} - -int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix) -{ - return mlx5e_rqt_redirect_direct(&priv->rx_res->channels[ix].xsk_rqt, priv->drop_rq.rqn); -} - -int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs) -{ - int err, i; - - if (!priv->xsk.refcnt) - return 0; - - for (i = 0; i < chs->num; i++) { - struct mlx5e_channel *c = chs->c[i]; - - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) - continue; - - err = mlx5e_xsk_redirect_rqt_to_channel(priv, c); - if (unlikely(err)) - goto err_stop; - } - - return 0; - -err_stop: - for (i--; i >= 0; i--) { - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) - continue; - - mlx5e_xsk_redirect_rqt_to_drop(priv, i); - } - - return err; -} - -void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs) -{ - int i; - - if (!priv->xsk.refcnt) - return; - - for (i = 0; i < chs->num; i++) { - if (!test_bit(MLX5E_CHANNEL_STATE_XSK, chs->c[i]->state)) - continue; - - mlx5e_xsk_redirect_rqt_to_drop(priv, i); - } -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h index ca20f1ff5e39..50e111b85efd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.h @@ -17,9 +17,5 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, void mlx5e_close_xsk(struct mlx5e_channel *c); void mlx5e_activate_xsk(struct mlx5e_channel *c); void mlx5e_deactivate_xsk(struct mlx5e_channel *c); -int mlx5e_xsk_redirect_rqt_to_channel(struct mlx5e_priv *priv, struct mlx5e_channel *c); -int mlx5e_xsk_redirect_rqt_to_drop(struct mlx5e_priv *priv, u16 ix); -int mlx5e_xsk_redirect_rqts_to_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs); -void mlx5e_xsk_redirect_rqts_to_drop(struct mlx5e_priv *priv, struct mlx5e_channels *chs); #endif /* __MLX5_EN_XSK_SETUP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index e51f60b55daa..4c4ee524176c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -16,13 +16,13 @@ struct mlx5e_accel_fs_tcp { struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES]; }; -static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) +static enum mlx5_traffic_types fs_accel2tt(enum accel_fs_tcp_type i) { switch (i) { case ACCEL_FS_IPV4_TCP: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; default: /* ACCEL_FS_IPV6_TCP */ - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; } } @@ -161,7 +161,7 @@ static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv, fs_tcp = priv->fs.accel_tcp; accel_fs_t = &fs_tcp->tables[type]; - dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type)); + dest = mlx5_ttc_get_default_dest(priv->fs.ttc, fs_accel2tt(type)); rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -307,7 +307,7 @@ static int accel_fs_tcp_disable(struct mlx5e_priv *priv) for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) { /* Modify ttc rules destination to point back to the indir TIRs */ - err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_accel2tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -329,7 +329,7 @@ static int accel_fs_tcp_enable(struct mlx5e_priv *priv) dest.ft = priv->fs.accel_tcp->tables[i].t; /* Modify ttc rules destination to point on the accel_fs FTs */ - err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, fs_accel2tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] destination to accel failed, err(%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 34119ce92031..17da23dff0ed 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -41,11 +41,11 @@ struct mlx5e_ipsec_tx { }; /* IPsec RX flow steering */ -static enum mlx5e_traffic_types fs_esp2tt(enum accel_fs_esp_type i) +static enum mlx5_traffic_types fs_esp2tt(enum accel_fs_esp_type i) { if (i == ACCEL_FS_ESP4) - return MLX5E_TT_IPV4_IPSEC_ESP; - return MLX5E_TT_IPV6_IPSEC_ESP; + return MLX5_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; } static int rx_err_add_rule(struct mlx5e_priv *priv, @@ -265,7 +265,8 @@ static int rx_create(struct mlx5e_priv *priv, enum accel_fs_esp_type type) accel_esp = priv->ipsec->rx_fs; fs_prot = &accel_esp->fs_prot[type]; - fs_prot->default_dest = mlx5e_ttc_get_default_dest(priv, fs_esp2tt(type)); + fs_prot->default_dest = + mlx5_ttc_get_default_dest(priv->fs.ttc, fs_esp2tt(type)); err = rx_err_create_ft(priv, fs_prot, &fs_prot->rx_err); if (err) @@ -301,7 +302,7 @@ static int rx_ft_get(struct mlx5e_priv *priv, enum accel_fs_esp_type type) /* connect */ dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = fs_prot->ft; - mlx5e_ttc_fwd_dest(priv, fs_esp2tt(type), &dest); + mlx5_ttc_fwd_dest(priv->fs.ttc, fs_esp2tt(type), &dest); out: mutex_unlock(&fs_prot->prot_mutex); @@ -320,7 +321,7 @@ static void rx_ft_put(struct mlx5e_priv *priv, enum accel_fs_esp_type type) goto out; /* disconnect */ - mlx5e_ttc_fwd_default_dest(priv, fs_esp2tt(type)); + mlx5_ttc_fwd_default_dest(priv->fs.ttc, fs_esp2tt(type)); /* remove FT */ rx_destroy(priv, type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index bfdbc3060755..62abce008c7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -628,7 +628,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, priv_rx->sw_stats = &priv->tls->sw_stats; mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx); - rqtn = mlx5e_rqt_get_rqtn(&priv->rx_res->channels[rxq].direct_rqt); + rqtn = mlx5e_rx_res_get_rqtn_direct(priv->rx_res, rxq); err = mlx5e_ktls_create_tir(mdev, &priv_rx->tir, rqtn); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index db6c6a96a6c9..fe5d82fa6e92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -98,17 +98,17 @@ struct arfs_rule { for (j = 0; j < ARFS_HASH_SIZE; j++) \ hlist_for_each_entry_safe(hn, tmp, &hash[j], hlist) -static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) +static enum mlx5_traffic_types arfs_get_tt(enum arfs_type type) { switch (type) { case ARFS_IPV4_TCP: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; case ARFS_IPV4_UDP: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; case ARFS_IPV6_TCP: - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; case ARFS_IPV6_UDP: - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; default: return -EINVAL; } @@ -120,7 +120,7 @@ static int arfs_disable(struct mlx5e_priv *priv) for (i = 0; i < ARFS_NUM_TYPES; i++) { /* Modify ttc rules destination back to their default */ - err = mlx5e_ttc_fwd_default_dest(priv, arfs_get_tt(i)); + err = mlx5_ttc_fwd_default_dest(priv->fs.ttc, arfs_get_tt(i)); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] default destination failed, err(%d)\n", @@ -149,7 +149,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv) for (i = 0; i < ARFS_NUM_TYPES; i++) { dest.ft = priv->fs.arfs->arfs_tables[i].ft.t; /* Modify ttc rules destination to point on the aRFS FTs */ - err = mlx5e_ttc_fwd_dest(priv, arfs_get_tt(i), &dest); + err = mlx5_ttc_fwd_dest(priv->fs.ttc, arfs_get_tt(i), &dest); if (err) { netdev_err(priv->netdev, "%s: modify ttc[%d] dest to arfs, failed err(%d)\n", @@ -194,7 +194,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t = &priv->fs.arfs->arfs_tables[type]; struct mlx5_flow_destination dest = {}; MLX5_DECLARE_FLOW_ACT(flow_act); - enum mlx5e_traffic_types tt; + enum mlx5_traffic_types tt; int err = 0; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; @@ -205,10 +205,10 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, return -EINVAL; } - /* FIXME: Must use mlx5e_ttc_get_default_dest(), + /* FIXME: Must use mlx5_ttc_get_default_dest(), * but can't since TTC default is not setup yet ! */ - dest.tir_num = priv->rx_res->rss[tt].indir_tir.tirn; + dest.tir_num = mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, NULL, &flow_act, &dest, 1); @@ -552,7 +552,7 @@ static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv, 16); } dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dest.tir_num = priv->rx_res->channels[arfs_rule->rxq].direct_tir.tirn; + dest.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, arfs_rule->rxq); rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -575,7 +575,7 @@ static void arfs_modify_rule_rq(struct mlx5e_priv *priv, int err = 0; dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst.tir_num = priv->rx_res->channels[rxq].direct_tir.tirn; + dst.tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, rxq); err = mlx5_modify_rule_destination(rule, &dst, NULL); if (err) netdev_warn(priv->netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 9264d18b0964..2cf59bb5f898 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1172,7 +1172,7 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, u32 mlx5e_ethtool_get_rxfh_key_size(struct mlx5e_priv *priv) { - return sizeof(priv->rx_res->rss_params.hash.toeplitz_hash_key); + return sizeof_field(struct mlx5e_rss_params_hash, toeplitz_hash_key); } static u32 mlx5e_get_rxfh_key_size(struct net_device *netdev) @@ -1198,18 +1198,10 @@ int mlx5e_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, u8 *hfunc) { struct mlx5e_priv *priv = netdev_priv(netdev); - struct mlx5e_rss_params *rss; - rss = &priv->rx_res->rss_params; - - if (indir) - memcpy(indir, rss->indir.table, sizeof(rss->indir.table)); - - if (key) - memcpy(key, rss->hash.toeplitz_hash_key, sizeof(rss->hash.toeplitz_hash_key)); - - if (hfunc) - *hfunc = rss->hash.hfunc; + mutex_lock(&priv->state_lock); + mlx5e_rx_res_rss_get_rxfh(priv->rx_res, indir, key, hfunc); + mutex_unlock(&priv->state_lock); return 0; } @@ -1218,58 +1210,13 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_rss_params *rss; - bool refresh_tirs = false; - bool refresh_rqt = false; - - if ((hfunc != ETH_RSS_HASH_NO_CHANGE) && - (hfunc != ETH_RSS_HASH_XOR) && - (hfunc != ETH_RSS_HASH_TOP)) - return -EINVAL; + int err; mutex_lock(&priv->state_lock); - - rss = &priv->rx_res->rss_params; - - if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != rss->hash.hfunc) { - rss->hash.hfunc = hfunc; - refresh_rqt = true; - refresh_tirs = true; - } - - if (indir) { - memcpy(rss->indir.table, indir, sizeof(rss->indir.table)); - refresh_rqt = true; - } - - if (key) { - memcpy(rss->hash.toeplitz_hash_key, key, sizeof(rss->hash.toeplitz_hash_key)); - refresh_tirs = refresh_tirs || rss->hash.hfunc == ETH_RSS_HASH_TOP; - } - - if (refresh_rqt && test_bit(MLX5E_STATE_OPENED, &priv->state)) { - u32 *rqns; - - rqns = kvmalloc_array(priv->channels.num, sizeof(*rqns), GFP_KERNEL); - if (rqns) { - unsigned int ix; - - for (ix = 0; ix < priv->channels.num; ix++) - rqns[ix] = priv->channels.c[ix]->rq.rqn; - - mlx5e_rqt_redirect_indir(&priv->rx_res->indir_rqt, rqns, - priv->channels.num, - rss->hash.hfunc, &rss->indir); - kvfree(rqns); - } - } - - if (refresh_tirs) - mlx5e_modify_tirs_hash(priv); - + err = mlx5e_rx_res_rss_set_rxfh(priv->rx_res, indir, key, + hfunc == ETH_RSS_HASH_NO_CHANGE ? NULL : &hfunc); mutex_unlock(&priv->state_lock); - - return 0; + return err; } #define MLX5E_PFC_PREVEN_AUTO_TOUT_MSEC 100 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index e79815763edf..5c754e9af669 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -718,7 +718,7 @@ static int mlx5e_add_promisc_rule(struct mlx5e_priv *priv) if (!spec) return -ENOMEM; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.ttc.ft.t; + dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); rule_p = &priv->fs.promisc.rule; *rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); @@ -854,587 +854,59 @@ void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft) ft->t = NULL; } -static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc) -{ - int i; - - for (i = 0; i < MLX5E_NUM_TT; i++) { - if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { - mlx5_del_flow_rules(ttc->rules[i].rule); - ttc->rules[i].rule = NULL; - } - } - - for (i = 0; i < MLX5E_NUM_TUNNEL_TT; i++) { - if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { - mlx5_del_flow_rules(ttc->tunnel_rules[i]); - ttc->tunnel_rules[i] = NULL; - } - } -} - -struct mlx5e_etype_proto { - u16 etype; - u8 proto; -}; - -static struct mlx5e_etype_proto ttc_rules[] = { - [MLX5E_TT_IPV4_TCP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_TCP, - }, - [MLX5E_TT_IPV6_TCP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_TCP, - }, - [MLX5E_TT_IPV4_UDP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_UDP, - }, - [MLX5E_TT_IPV6_UDP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_UDP, - }, - [MLX5E_TT_IPV4_IPSEC_AH] = { - .etype = ETH_P_IP, - .proto = IPPROTO_AH, - }, - [MLX5E_TT_IPV6_IPSEC_AH] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_AH, - }, - [MLX5E_TT_IPV4_IPSEC_ESP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_ESP, - }, - [MLX5E_TT_IPV6_IPSEC_ESP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_ESP, - }, - [MLX5E_TT_IPV4] = { - .etype = ETH_P_IP, - .proto = 0, - }, - [MLX5E_TT_IPV6] = { - .etype = ETH_P_IPV6, - .proto = 0, - }, - [MLX5E_TT_ANY] = { - .etype = 0, - .proto = 0, - }, -}; - -static struct mlx5e_etype_proto ttc_tunnel_rules[] = { - [MLX5E_TT_IPV4_GRE] = { - .etype = ETH_P_IP, - .proto = IPPROTO_GRE, - }, - [MLX5E_TT_IPV6_GRE] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_GRE, - }, - [MLX5E_TT_IPV4_IPIP] = { - .etype = ETH_P_IP, - .proto = IPPROTO_IPIP, - }, - [MLX5E_TT_IPV6_IPIP] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_IPIP, - }, - [MLX5E_TT_IPV4_IPV6] = { - .etype = ETH_P_IP, - .proto = IPPROTO_IPV6, - }, - [MLX5E_TT_IPV6_IPV6] = { - .etype = ETH_P_IPV6, - .proto = IPPROTO_IPV6, - }, - -}; - -u8 mlx5e_get_proto_by_tunnel_type(enum mlx5e_tunnel_types tt) -{ - return ttc_tunnel_rules[tt].proto; -} - -static bool mlx5e_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, u8 proto_type) -{ - switch (proto_type) { - case IPPROTO_GRE: - return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); - case IPPROTO_IPIP: - case IPPROTO_IPV6: - return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || - MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); - default: - return false; - } -} - -static bool mlx5e_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) -{ - int tt; - - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (mlx5e_tunnel_proto_supported_rx(mdev, ttc_tunnel_rules[tt].proto)) - return true; - } - return false; -} - -bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) -{ - return (mlx5e_tunnel_any_rx_proto_supported(mdev) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ft_field_support.inner_ip_version)); -} - -static u8 mlx5e_etype_to_ipv(u16 ethertype) -{ - if (ethertype == ETH_P_IP) - return 4; - - if (ethertype == ETH_P_IPV6) - return 6; - - return 0; -} - -static struct mlx5_flow_handle * -mlx5e_generate_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, - u8 proto) -{ - int match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - u8 ipv; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return ERR_PTR(-ENOMEM); - - if (proto) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); - } - - ipv = mlx5e_etype_to_ipv(etype); - if (match_ipv_outer && ipv) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); - } else if (etype) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); - } - - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); - } - - kvfree(spec); - return err ? ERR_PTR(err) : rule; -} - -static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv, - struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_handle **trules; - struct mlx5e_ttc_rule *rules; - struct mlx5_flow_table *ft; - int tt; - int err; - - ft = ttc->ft.t; - rules = ttc->rules; - - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - struct mlx5e_ttc_rule *rule = &rules[tt]; - - if (tt == MLX5E_TT_ANY) - dest.tir_num = params->any_tt_tirn; - else - dest.tir_num = params->indir_tirn[tt]; - - rule->rule = mlx5e_generate_ttc_rule(priv, ft, &dest, - ttc_rules[tt].etype, - ttc_rules[tt].proto); - if (IS_ERR(rule->rule)) { - err = PTR_ERR(rule->rule); - rule->rule = NULL; - goto del_rules; - } - rule->default_dest = dest; - } - - if (!params->inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) - return 0; - - trules = ttc->tunnel_rules; - dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = params->inner_ttc->ft.t; - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (!mlx5e_tunnel_proto_supported_rx(priv->mdev, - ttc_tunnel_rules[tt].proto)) - continue; - trules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, - ttc_tunnel_rules[tt].etype, - ttc_tunnel_rules[tt].proto); - if (IS_ERR(trules[tt])) { - err = PTR_ERR(trules[tt]); - trules[tt] = NULL; - goto del_rules; - } - } - - return 0; - -del_rules: - mlx5e_cleanup_ttc_rules(ttc); - return err; -} - -static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc, - bool use_ipv) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_flow_table *ft = &ttc->ft; - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(MLX5E_TTC_NUM_GROUPS, - sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - ft->g = NULL; - return -ENOMEM; - } - - /* L4 Group */ - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); - if (use_ipv) - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); - else - MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* L3 Group */ - MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP2_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Any Group */ - memset(in, 0, inlen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_TTC_GROUP3_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -static struct mlx5_flow_handle * -mlx5e_generate_inner_ttc_rule(struct mlx5e_priv *priv, - struct mlx5_flow_table *ft, - struct mlx5_flow_destination *dest, - u16 etype, u8 proto) -{ - MLX5_DECLARE_FLOW_ACT(flow_act); - struct mlx5_flow_handle *rule; - struct mlx5_flow_spec *spec; - int err = 0; - u8 ipv; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - if (!spec) - return ERR_PTR(-ENOMEM); - - ipv = mlx5e_etype_to_ipv(etype); - if (etype && ipv) { - spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); - } - - if (proto) { - spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); - MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); - } - - rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); - if (IS_ERR(rule)) { - err = PTR_ERR(rule); - netdev_err(priv->netdev, "%s: add rule failed\n", __func__); - } - - kvfree(spec); - return err ? ERR_PTR(err) : rule; -} - -static int mlx5e_generate_inner_ttc_table_rules(struct mlx5e_priv *priv, - struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5_flow_destination dest = {}; - struct mlx5e_ttc_rule *rules; - struct mlx5_flow_table *ft; - int err; - int tt; - - ft = ttc->ft.t; - rules = ttc->rules; - dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; - - for (tt = 0; tt < MLX5E_NUM_TT; tt++) { - struct mlx5e_ttc_rule *rule = &rules[tt]; - - if (tt == MLX5E_TT_ANY) - dest.tir_num = params->any_tt_tirn; - else - dest.tir_num = params->indir_tirn[tt]; - - rule->rule = mlx5e_generate_inner_ttc_rule(priv, ft, &dest, - ttc_rules[tt].etype, - ttc_rules[tt].proto); - if (IS_ERR(rule->rule)) { - err = PTR_ERR(rule->rule); - rule->rule = NULL; - goto del_rules; - } - rule->default_dest = dest; - } - - return 0; - -del_rules: - - mlx5e_cleanup_ttc_rules(ttc); - return err; -} - -static int mlx5e_create_inner_ttc_table_groups(struct mlx5e_ttc_table *ttc) -{ - int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - struct mlx5e_flow_table *ft = &ttc->ft; - int ix = 0; - u32 *in; - int err; - u8 *mc; - - ft->g = kcalloc(MLX5E_INNER_TTC_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL); - if (!ft->g) - return -ENOMEM; - in = kvzalloc(inlen, GFP_KERNEL); - if (!in) { - kfree(ft->g); - ft->g = NULL; - return -ENOMEM; - } - - /* L4 Group */ - mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); - MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); - MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP1_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* L3 Group */ - MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP2_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - /* Any Group */ - memset(in, 0, inlen); - MLX5_SET_CFG(in, start_flow_index, ix); - ix += MLX5E_INNER_TTC_GROUP3_SIZE; - MLX5_SET_CFG(in, end_flow_index, ix - 1); - ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in); - if (IS_ERR(ft->g[ft->num_groups])) - goto err; - ft->num_groups++; - - kvfree(in); - return 0; - -err: - err = PTR_ERR(ft->g[ft->num_groups]); - ft->g[ft->num_groups] = NULL; - kvfree(in); - - return err; -} - -void mlx5e_set_ttc_basic_params(struct mlx5e_priv *priv, - struct ttc_params *ttc_params) -{ - ttc_params->any_tt_tirn = priv->rx_res->channels[0].direct_tir.tirn; - ttc_params->inner_ttc = &priv->fs.inner_ttc; -} - -static void mlx5e_set_inner_ttc_ft_params(struct ttc_params *ttc_params) +static void mlx5e_set_inner_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; - ft_attr->max_fte = MLX5E_INNER_TTC_TABLE_SIZE; + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_INNER_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : + mlx5e_rx_res_get_tirn_rss_inner(priv->rx_res, + tt); + } } -void mlx5e_set_ttc_ft_params(struct ttc_params *ttc_params) +void mlx5e_set_ttc_params(struct mlx5e_priv *priv, + struct ttc_params *ttc_params, bool tunnel) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; + int tt; - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; + memset(ttc_params, 0, sizeof(*ttc_params)); + ttc_params->ns = mlx5_get_flow_namespace(priv->mdev, + MLX5_FLOW_NAMESPACE_KERNEL); ft_attr->level = MLX5E_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; -} - -static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - struct mlx5e_flow_table *ft = &ttc->ft; - int err; - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_rx_res_get_tirn_direct(priv->rx_res, 0) : + mlx5e_rx_res_get_tirn_rss(priv->rx_res, tt); } - err = mlx5e_create_inner_ttc_table_groups(ttc); - if (err) - goto err; - - err = mlx5e_generate_inner_ttc_table_rules(priv, params, ttc); - if (err) - goto err; - - return 0; - -err: - mlx5e_destroy_flow_table(ft); - return err; -} - -static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) -{ - mlx5e_cleanup_ttc_rules(ttc); - mlx5e_destroy_flow_table(&ttc->ft); -} - -void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv, - struct mlx5e_ttc_table *ttc) -{ - mlx5e_cleanup_ttc_rules(ttc); - mlx5e_destroy_flow_table(&ttc->ft); -} - -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, struct ttc_params *params, - struct mlx5e_ttc_table *ttc) -{ - bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version); - struct mlx5e_flow_table *ft = &ttc->ft; - int err; + ttc_params->inner_ttc = tunnel; + if (!tunnel || !mlx5_tunnel_inner_ft_supported(priv->mdev)) + return; - ft->t = mlx5_create_flow_table(priv->fs.ns, ¶ms->ft_attr); - if (IS_ERR(ft->t)) { - err = PTR_ERR(ft->t); - ft->t = NULL; - return err; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + ttc_params->tunnel_dests[tt].type = + MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + ttc_params->tunnel_dests[tt].ft = + mlx5_get_ttc_flow_table(priv->fs.inner_ttc); } - - err = mlx5e_create_ttc_table_groups(ttc, match_ipv_outer); - if (err) - goto err; - - err = mlx5e_generate_ttc_table_rules(priv, params, ttc); - if (err) - goto err; - - return 0; -err: - mlx5e_destroy_flow_table(ft); - return err; -} - -int mlx5e_ttc_fwd_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type, - struct mlx5_flow_destination *new_dest) -{ - return mlx5_modify_rule_destination(priv->fs.ttc.rules[type].rule, new_dest, NULL); -} - -struct mlx5_flow_destination -mlx5e_ttc_get_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type) -{ - struct mlx5_flow_destination *dest = &priv->fs.ttc.rules[type].default_dest; - - WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, - "TTC[%d] default dest is not setup yet", type); - - return *dest; -} - -int mlx5e_ttc_fwd_default_dest(struct mlx5e_priv *priv, enum mlx5e_traffic_types type) -{ - struct mlx5_flow_destination dest = mlx5e_ttc_get_default_dest(priv, type); - - return mlx5e_ttc_fwd_dest(priv, type, &dest); } static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv, @@ -1467,7 +939,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv, outer_headers.dmac_47_16); dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest.ft = priv->fs.ttc.ft.t; + dest.ft = mlx5_get_ttc_flow_table(priv->fs.ttc); switch (type) { case MLX5E_FULLMATCH: @@ -1763,10 +1235,46 @@ static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) kvfree(priv->fs.vlan); } -int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +static void mlx5e_destroy_inner_ttc_table(struct mlx5e_priv *priv) +{ + if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + return; + mlx5_destroy_ttc_table(priv->fs.inner_ttc); +} + +void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) +{ + mlx5_destroy_ttc_table(priv->fs.ttc); +} + +static int mlx5e_create_inner_ttc_table(struct mlx5e_priv *priv) { struct ttc_params ttc_params = {}; - int tt, err; + + if (!mlx5_tunnel_inner_ft_supported(priv->mdev)) + return 0; + + mlx5e_set_inner_ttc_params(priv, &ttc_params); + priv->fs.inner_ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.inner_ttc)) + return PTR_ERR(priv->fs.inner_ttc); + return 0; +} + +int mlx5e_create_ttc_table(struct mlx5e_priv *priv) +{ + struct ttc_params ttc_params = {}; + + mlx5e_set_ttc_params(priv, &ttc_params, true); + priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.ttc)) + return PTR_ERR(priv->fs.ttc); + return 0; +} + +int mlx5e_create_flow_steering(struct mlx5e_priv *priv) +{ + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -1781,26 +1289,15 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - mlx5e_set_ttc_basic_params(priv, &ttc_params); - - if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) { - mlx5e_set_inner_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].inner_indir_tir.tirn; - - err = mlx5e_create_inner_ttc_table(priv, &ttc_params, &priv->fs.inner_ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create inner ttc table, err=%d\n", - err); - goto err_destroy_arfs_tables; - } + err = mlx5e_create_inner_ttc_table(priv); + if (err) { + netdev_err(priv->netdev, + "Failed to create inner ttc table, err=%d\n", + err); + goto err_destroy_arfs_tables; } - mlx5e_set_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -1834,10 +1331,9 @@ err_destory_vlan_table: err_destroy_l2_table: mlx5e_destroy_l2_table(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_ttc_table(priv); err_destroy_inner_ttc_table: - if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_inner_ttc_table(priv); err_destroy_arfs_tables: mlx5e_arfs_destroy_tables(priv); @@ -1849,9 +1345,8 @@ void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) mlx5e_ptp_free_rx_fs(priv); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - if (mlx5e_tunnel_inner_ft_supported(priv->mdev)) - mlx5e_destroy_inner_ttc_table(priv, &priv->fs.inner_ttc); + mlx5e_destroy_ttc_table(priv); + mlx5e_destroy_inner_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); mlx5e_ethtool_cleanup_steering(priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 494f6f832407..3d8918f9399e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -433,9 +433,9 @@ add_ethtool_flow_rule(struct mlx5e_priv *priv, dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; if (group == MLX5E_RQ_GROUP_XSK) - dst->tir_num = priv->rx_res->channels[ix].xsk_tir.tirn; + dst->tir_num = mlx5e_rx_res_get_tirn_xsk(priv->rx_res, ix); else - dst->tir_num = priv->rx_res->channels[ix].direct_tir.tirn; + dst->tir_num = mlx5e_rx_res_get_tirn_direct(priv->rx_res, ix); flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } @@ -786,43 +786,44 @@ void mlx5e_ethtool_init_steering(struct mlx5e_priv *priv) INIT_LIST_HEAD(&priv->fs.ethtool.rules); } -static enum mlx5e_traffic_types flow_type_to_traffic_type(u32 flow_type) +static int flow_type_to_traffic_type(u32 flow_type) { switch (flow_type) { case TCP_V4_FLOW: - return MLX5E_TT_IPV4_TCP; + return MLX5_TT_IPV4_TCP; case TCP_V6_FLOW: - return MLX5E_TT_IPV6_TCP; + return MLX5_TT_IPV6_TCP; case UDP_V4_FLOW: - return MLX5E_TT_IPV4_UDP; + return MLX5_TT_IPV4_UDP; case UDP_V6_FLOW: - return MLX5E_TT_IPV6_UDP; + return MLX5_TT_IPV6_UDP; case AH_V4_FLOW: - return MLX5E_TT_IPV4_IPSEC_AH; + return MLX5_TT_IPV4_IPSEC_AH; case AH_V6_FLOW: - return MLX5E_TT_IPV6_IPSEC_AH; + return MLX5_TT_IPV6_IPSEC_AH; case ESP_V4_FLOW: - return MLX5E_TT_IPV4_IPSEC_ESP; + return MLX5_TT_IPV4_IPSEC_ESP; case ESP_V6_FLOW: - return MLX5E_TT_IPV6_IPSEC_ESP; + return MLX5_TT_IPV6_IPSEC_ESP; case IPV4_FLOW: - return MLX5E_TT_IPV4; + return MLX5_TT_IPV4; case IPV6_FLOW: - return MLX5E_TT_IPV6; + return MLX5_TT_IPV6; default: - return MLX5E_NUM_INDIR_TIRS; + return -EINVAL; } } static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - enum mlx5e_traffic_types tt; u8 rx_hash_field = 0; + int err; + int tt; tt = flow_type_to_traffic_type(nfc->flow_type); - if (tt == MLX5E_NUM_INDIR_TIRS) - return -EINVAL; + if (tt < 0) + return tt; /* RSS does not support anything other than hashing to queues * on src IP, dest IP, TCP/UDP src port and TCP/UDP dest @@ -848,29 +849,23 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv, rx_hash_field |= MLX5_HASH_FIELD_SEL_L4_DPORT; mutex_lock(&priv->state_lock); - - if (rx_hash_field == priv->rx_res->rss_params.rx_hash_fields[tt]) - goto out; - - priv->rx_res->rss_params.rx_hash_fields[tt] = rx_hash_field; - mlx5e_modify_tirs_hash(priv); - -out: + err = mlx5e_rx_res_rss_set_hash_fields(priv->rx_res, tt, rx_hash_field); mutex_unlock(&priv->state_lock); - return 0; + + return err; } static int mlx5e_get_rss_hash_opt(struct mlx5e_priv *priv, struct ethtool_rxnfc *nfc) { - enum mlx5e_traffic_types tt; u32 hash_field = 0; + int tt; tt = flow_type_to_traffic_type(nfc->flow_type); - if (tt == MLX5E_NUM_INDIR_TIRS) - return -EINVAL; + if (tt < 0) + return tt; - hash_field = priv->rx_res->rss_params.rx_hash_fields[tt]; + hash_field = mlx5e_rx_res_rss_get_hash_fields(priv->rx_res, tt); nfc->data = 0; if (hash_field & MLX5_HASH_FIELD_SEL_SRC_IP) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index c663811f210b..ccc569c4ee50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1627,7 +1627,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -2194,202 +2194,14 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) chs->num = 0; } -int mlx5e_create_indirect_rqt(struct mlx5e_priv *priv) -{ - int err; - - err = mlx5e_rqt_init_direct(&priv->rx_res->indir_rqt, priv->mdev, true, - priv->drop_rq.rqn); - if (err) - mlx5_core_warn(priv->mdev, "create indirect rqts failed, %d\n", err); - return err; -} - -int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) -{ - int err; - int ix; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].direct_rqt, - priv->mdev, false, priv->drop_rq.rqn); - if (unlikely(err)) - goto err_destroy_rqts; - } - - return 0; - -err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create direct rqts failed, %d\n", err); - while (--ix >= 0) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt); - - return err; -} - -static int mlx5e_create_xsk_rqts(struct mlx5e_priv *priv) -{ - int err; - int ix; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_rqt_init_direct(&priv->rx_res->channels[ix].xsk_rqt, - priv->mdev, false, priv->drop_rq.rqn); - if (unlikely(err)) - goto err_destroy_rqts; - } - - return 0; - -err_destroy_rqts: - mlx5_core_warn(priv->mdev, "create xsk rqts failed, %d\n", err); - while (--ix >= 0) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt); - - return err; -} - -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].direct_rqt); -} - -static void mlx5e_destroy_xsk_rqts(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_rqt_destroy(&priv->rx_res->channels[ix].xsk_rqt); -} - -static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv, - struct mlx5e_channels *chs) -{ - struct mlx5e_rx_res *res = priv->rx_res; - unsigned int ix; - u32 *rqns; - - rqns = kvmalloc_array(chs->num, sizeof(*rqns), GFP_KERNEL); - if (rqns) { - for (ix = 0; ix < chs->num; ix++) - rqns[ix] = chs->c[ix]->rq.rqn; - - mlx5e_rqt_redirect_indir(&res->indir_rqt, rqns, chs->num, - res->rss_params.hash.hfunc, - &res->rss_params.indir); - kvfree(rqns); - } - - for (ix = 0; ix < priv->max_nch; ix++) { - u32 rqn = priv->drop_rq.rqn; - - if (ix < chs->num) - rqn = chs->c[ix]->rq.rqn; - - mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, rqn); - } - - if (priv->profile->rx_ptp_support) { - u32 rqn; - - if (mlx5e_ptp_get_rqn(priv->channels.ptp, &rqn)) - rqn = priv->drop_rq.rqn; - - mlx5e_rqt_redirect_direct(&res->ptp.rqt, rqn); - } -} - -static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - unsigned int ix; - - mlx5e_rqt_redirect_direct(&res->indir_rqt, priv->drop_rq.rqn); - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_rqt_redirect_direct(&res->channels[ix].direct_rqt, priv->drop_rq.rqn); - - if (priv->profile->rx_ptp_support) - mlx5e_rqt_redirect_direct(&res->ptp.rqt, priv->drop_rq.rqn); -} - -int mlx5e_modify_tirs_hash(struct mlx5e_priv *priv) -{ - struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash; - struct mlx5e_rss_params_traffic_type rss_tt; - struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; - enum mlx5e_traffic_types tt; - - builder = mlx5e_tir_builder_alloc(true); - if (!builder) - return -ENOMEM; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); - mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - mlx5e_tir_builder_clear(builder); - } - - /* Verify inner tirs resources allocated */ - if (!res->rss[0].inner_indir_tir.tirn) - goto out; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true); - mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - mlx5e_tir_builder_clear(builder); - } - -out: - mlx5e_tir_builder_free(builder); - return 0; -} - static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) { struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; struct mlx5e_lro_param lro_param; - enum mlx5e_traffic_types tt; - int err; - int ix; - - builder = mlx5e_tir_builder_alloc(true); - if (!builder) - return -ENOMEM; lro_param = mlx5e_get_lro_param(&priv->channels.params); - mlx5e_tir_builder_build_lro(builder, &lro_param); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5e_tir_modify(&res->rss[tt].indir_tir, builder); - if (err) - goto err_free_builder; - - /* Verify inner tirs resources allocated */ - if (!res->rss[0].inner_indir_tir.tirn) - continue; - - err = mlx5e_tir_modify(&res->rss[tt].inner_indir_tir, builder); - if (err) - goto err_free_builder; - } - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_tir_modify(&res->channels[ix].direct_tir, builder); - if (err) - goto err_free_builder; - } -err_free_builder: - mlx5e_tir_builder_free(builder); - return err; + return mlx5e_rx_res_lro_set_param(res, &lro_param); } static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_lro); @@ -2572,8 +2384,7 @@ int mlx5e_num_channels_changed(struct mlx5e_priv *priv) /* This function may be called on attach, before priv->rx_res is created. */ if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res) - mlx5e_build_default_indir_rqt(priv->rx_res->rss_params.indir.table, - MLX5E_INDIR_RQT_SIZE, count); + mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count); return 0; } @@ -2633,18 +2444,14 @@ void mlx5e_activate_priv_channels(struct mlx5e_priv *priv) mlx5e_wait_channels_min_rx_wqes(&priv->channels); - if (priv->rx_res) { - mlx5e_redirect_rqts_to_channels(priv, &priv->channels); - mlx5e_xsk_redirect_rqts_to_channels(priv, &priv->channels); - } + if (priv->rx_res) + mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels); } void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv) { - if (priv->rx_res) { - mlx5e_xsk_redirect_rqts_to_drop(priv, &priv->channels); - mlx5e_redirect_rqts_to_drop(priv); - } + if (priv->rx_res) + mlx5e_rx_res_channels_deactivate(priv->rx_res); if (mlx5e_is_vport_rep(priv)) mlx5e_remove_sqs_fwd_rules(priv); @@ -3019,194 +2826,6 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_destroy_tises(priv); } -int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc) -{ - struct mlx5e_rss_params_hash *rss_hash = &priv->rx_res->rss_params.hash; - bool inner_ft_support = priv->channels.params.tunneled_offload_en; - struct mlx5e_rss_params_traffic_type rss_tt; - struct mlx5e_rx_res *res = priv->rx_res; - enum mlx5e_traffic_types tt, max_tt; - struct mlx5e_tir_builder *builder; - struct mlx5e_lro_param lro_param; - u32 indir_rqtn; - int err = 0; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - lro_param = mlx5e_get_lro_param(&priv->channels.params); - indir_rqtn = mlx5e_rqt_get_rqtn(&res->indir_rqt); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, - indir_rqtn, inner_ft_support); - mlx5e_tir_builder_build_lro(builder, &lro_param); - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); - - err = mlx5e_tir_init(&res->rss[tt].indir_tir, builder, priv->mdev, true); - if (err) { - mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err); - goto err_destroy_tirs; - } - - mlx5e_tir_builder_clear(builder); - } - - if (!inner_ttc || !mlx5e_tunnel_inner_ft_supported(priv->mdev)) - goto out; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, - indir_rqtn, inner_ft_support); - mlx5e_tir_builder_build_lro(builder, &lro_param); - rss_tt = mlx5e_rx_res_rss_get_current_tt_config(res, tt); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, true); - - err = mlx5e_tir_init(&res->rss[tt].inner_indir_tir, builder, priv->mdev, true); - if (err) { - mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err); - goto err_destroy_inner_tirs; - } - - mlx5e_tir_builder_clear(builder); - } - - goto out; - -err_destroy_inner_tirs: - max_tt = tt; - for (tt = 0; tt < max_tt; tt++) - mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); - - tt = MLX5E_NUM_INDIR_TIRS; -err_destroy_tirs: - max_tt = tt; - for (tt = 0; tt < max_tt; tt++) - mlx5e_tir_destroy(&res->rss[tt].indir_tir); - -out: - mlx5e_tir_builder_free(builder); - - return err; -} - -static int mlx5e_create_direct_tir(struct mlx5e_priv *priv, struct mlx5e_tir *tir, - struct mlx5e_tir_builder *builder, struct mlx5e_rqt *rqt) -{ - bool inner_ft_support = priv->channels.params.tunneled_offload_en; - struct mlx5e_lro_param lro_param; - int err = 0; - - lro_param = mlx5e_get_lro_param(&priv->channels.params); - - mlx5e_tir_builder_build_rqt(builder, priv->mdev->mlx5e_res.hw_objs.td.tdn, - mlx5e_rqt_get_rqtn(rqt), inner_ft_support); - mlx5e_tir_builder_build_lro(builder, &lro_param); - mlx5e_tir_builder_build_direct(builder); - - err = mlx5e_tir_init(tir, builder, priv->mdev, true); - if (unlikely(err)) - mlx5_core_warn(priv->mdev, "create tirs failed, %d\n", err); - - mlx5e_tir_builder_clear(builder); - - return err; -} - -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; - int err = 0; - int ix; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_create_direct_tir(priv, &res->channels[ix].direct_tir, - builder, &res->channels[ix].direct_rqt); - if (err) - goto err_destroy_tirs; - } - - goto out; - -err_destroy_tirs: - while (--ix >= 0) - mlx5e_tir_destroy(&res->channels[ix].direct_tir); - -out: - mlx5e_tir_builder_free(builder); - - return err; -} - -static int mlx5e_create_xsk_tirs(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - struct mlx5e_tir_builder *builder; - int err; - int ix; - - builder = mlx5e_tir_builder_alloc(false); - if (!builder) - return -ENOMEM; - - for (ix = 0; ix < priv->max_nch; ix++) { - err = mlx5e_create_direct_tir(priv, &res->channels[ix].xsk_tir, - builder, &res->channels[ix].xsk_rqt); - if (err) - goto err_destroy_tirs; - } - - goto out; - -err_destroy_tirs: - while (--ix >= 0) - mlx5e_tir_destroy(&res->channels[ix].xsk_tir); - -out: - mlx5e_tir_builder_free(builder); - - return err; -} - -void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) -{ - struct mlx5e_rx_res *res = priv->rx_res; - enum mlx5e_traffic_types tt; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5e_tir_destroy(&res->rss[tt].indir_tir); - - /* Verify inner tirs resources allocated */ - if (!res->rss[0].inner_indir_tir.tirn) - return; - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - mlx5e_tir_destroy(&res->rss[tt].inner_indir_tir); -} - -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_tir_destroy(&priv->rx_res->channels[ix].direct_tir); -} - -static void mlx5e_destroy_xsk_tirs(struct mlx5e_priv *priv) -{ - unsigned int ix; - - for (ix = 0; ix < priv->max_nch; ix++) - mlx5e_tir_destroy(&priv->rx_res->channels[ix].xsk_tir); -} - static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool enable) { int err = 0; @@ -3223,7 +2842,7 @@ static int mlx5e_modify_channels_scatter_fcs(struct mlx5e_channels *chs, bool en static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) { - int err = 0; + int err; int i; for (i = 0; i < chs->num; i++) { @@ -3231,6 +2850,8 @@ static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd) if (err) return err; } + if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state)) + return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd); return 0; } @@ -3668,6 +3289,24 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) return 0; } +static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev, + netdev_features_t features) +{ + features &= ~NETIF_F_HW_TLS_RX; + if (netdev->features & NETIF_F_HW_TLS_RX) + netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_HW_TLS_TX; + if (netdev->features & NETIF_F_HW_TLS_TX) + netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); + + features &= ~NETIF_F_NTUPLE; + if (netdev->features & NETIF_F_NTUPLE) + netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n"); + + return features; +} + static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_features_t features) { @@ -3699,15 +3338,8 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev, netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n"); } - if (mlx5e_is_uplink_rep(priv)) { - features &= ~NETIF_F_HW_TLS_RX; - if (netdev->features & NETIF_F_HW_TLS_RX) - netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n"); - - features &= ~NETIF_F_HW_TLS_TX; - if (netdev->features & NETIF_F_HW_TLS_TX) - netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n"); - } + if (mlx5e_is_uplink_rep(priv)) + features = mlx5e_fix_uplink_rep_features(netdev, features); mutex_unlock(&priv->state_lock); @@ -4446,15 +4078,6 @@ const struct net_device_ops mlx5e_netdev_ops = { .ndo_get_devlink_port = mlx5e_get_devlink_port, }; -void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, - int num_channels) -{ - int i; - - for (i = 0; i < len; i++) - indirection_rqt[i] = i % num_channels; -} - static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) { int i; @@ -4467,21 +4090,6 @@ static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeo return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); } -void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, - u16 num_channels) -{ - enum mlx5e_traffic_types tt; - - rss_params->hash.hfunc = ETH_RSS_HASH_TOP; - netdev_rss_key_fill(rss_params->hash.toeplitz_hash_key, - sizeof(rss_params->hash.toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(rss_params->indir.table, - MLX5E_INDIR_RQT_SIZE, num_channels); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - rss_params->rx_hash_fields[tt] = - mlx5e_rss_get_default_tt_config(tt).rx_hash_fields; -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { struct mlx5e_params *params = &priv->channels.params; @@ -4543,7 +4151,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* TX inline */ mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode); - params->tunneled_offload_en = mlx5e_tunnel_inner_ft_supported(mdev); + params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev); /* AF_XDP */ params->xsk = xsk; @@ -4603,8 +4211,8 @@ static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev) { int tt; - for (tt = 0; tt < MLX5E_NUM_TUNNEL_TT; tt++) { - if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5e_get_proto_by_tunnel_type(tt))) + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt))) return true; } return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)); @@ -4701,6 +4309,9 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) if (MLX5_CAP_ETH(mdev, scatter_fcs)) netdev->hw_features |= NETIF_F_RXFCS; + if (mlx5_qos_is_supported(mdev)) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->features = netdev->hw_features; /* Defaults */ @@ -4721,8 +4332,6 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_NTUPLE; #endif } - if (mlx5_qos_is_supported(mdev)) - netdev->features |= NETIF_F_HW_TC; netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HW_VLAN_STAG_FILTER; @@ -4805,15 +4414,14 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5e_tir_builder *tir_builder; + enum mlx5e_rx_res_features features; + struct mlx5e_lro_param lro_param; int err; - priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) return -ENOMEM; - mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); - mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -4822,50 +4430,20 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - err = mlx5e_create_indirect_rqt(priv); + features = MLX5E_RX_RES_FEATURE_XSK | MLX5E_RX_RES_FEATURE_PTP; + if (priv->channels.params.tunneled_offload_en) + features |= MLX5E_RX_RES_FEATURE_INNER_FT; + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, true); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv); - if (err) - goto err_destroy_indirect_tirs; - - err = mlx5e_create_xsk_rqts(priv); - if (unlikely(err)) - goto err_destroy_direct_tirs; - - err = mlx5e_create_xsk_tirs(priv); - if (unlikely(err)) - goto err_destroy_xsk_rqts; - - err = mlx5e_rqt_init_direct(&priv->rx_res->ptp.rqt, priv->mdev, false, - priv->drop_rq.rqn); - if (err) - goto err_destroy_xsk_tirs; - - tir_builder = mlx5e_tir_builder_alloc(false); - if (!tir_builder) { - err = -ENOMEM; - goto err_destroy_ptp_rqt; - } - err = mlx5e_create_direct_tir(priv, &priv->rx_res->ptp.tir, tir_builder, - &priv->rx_res->ptp.rqt); - mlx5e_tir_builder_free(tir_builder); - if (err) - goto err_destroy_ptp_rqt; - err = mlx5e_create_flow_steering(priv); if (err) { mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_ptp_direct_tir; + goto err_destroy_rx_res; } err = mlx5e_tc_nic_init(priv); @@ -4886,27 +4464,13 @@ err_tc_nic_cleanup: mlx5e_tc_nic_cleanup(priv); err_destroy_flow_steering: mlx5e_destroy_flow_steering(priv); -err_destroy_ptp_direct_tir: - mlx5e_tir_destroy(&priv->rx_res->ptp.tir); -err_destroy_ptp_rqt: - mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt); -err_destroy_xsk_tirs: - mlx5e_destroy_xsk_tirs(priv); -err_destroy_xsk_rqts: - mlx5e_destroy_xsk_rqts(priv); -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); -err_destroy_indirect_rqts: - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; return err; } @@ -4916,17 +4480,10 @@ static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) mlx5e_accel_cleanup_rx(priv); mlx5e_tc_nic_cleanup(priv); mlx5e_destroy_flow_steering(priv); - mlx5e_tir_destroy(&priv->rx_res->ptp.tir); - mlx5e_rqt_destroy(&priv->rx_res->ptp.rqt); - mlx5e_destroy_xsk_tirs(priv); - mlx5e_destroy_xsk_rqts(priv); - mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv); - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2c54951c240d..c54aaef521b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -49,6 +49,7 @@ #include "en/devlink.h" #include "fs_core.h" #include "lib/mlx5.h" +#include "lib/devcom.h" #define CREATE_TRACE_POINTS #include "diag/en_rep_tracepoint.h" #include "en_accel/ipsec.h" @@ -310,6 +311,8 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw, rpriv = mlx5e_rep_to_rep_priv(rep); list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) { mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + if (rep_sq->send_to_vport_rule_peer) + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); list_del(&rep_sq->list); kfree(rep_sq); } @@ -319,6 +322,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u32 *sqns_array, int sqns_num) { + struct mlx5_eswitch *peer_esw = NULL; struct mlx5_flow_handle *flow_rule; struct mlx5e_rep_priv *rpriv; struct mlx5e_rep_sq *rep_sq; @@ -329,6 +333,10 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, return 0; rpriv = mlx5e_rep_to_rep_priv(rep); + if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS)) + peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS); + for (i = 0; i < sqns_num; i++) { rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL); if (!rep_sq) { @@ -337,7 +345,7 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, } /* Add re-inject rule to the PF/representor sqs */ - flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep, + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, esw, rep, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -345,12 +353,34 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw, goto out_err; } rep_sq->send_to_vport_rule = flow_rule; + rep_sq->sqn = sqns_array[i]; + + if (peer_esw) { + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, + rep, sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule); + kfree(rep_sq); + goto out_err; + } + rep_sq->send_to_vport_rule_peer = flow_rule; + } + list_add(&rep_sq->list, &rpriv->vport_sqs_list); } + + if (peer_esw) + mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return 0; out_err: mlx5e_sqs2vport_stop(esw, rep); + + if (peer_esw) + mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return err; } @@ -647,27 +677,24 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - struct mlx5e_rx_res *res = priv->rx_res; struct ttc_params ttc_params = {}; - int tt, err; + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); /* The inner_ttc in the ttc params is intentionally not set */ - ttc_params.any_tt_tirn = res->channels[0].direct_tir.tirn; - mlx5e_set_ttc_ft_params(&ttc_params); + mlx5e_set_ttc_params(priv, &ttc_params, false); if (rep->vport != MLX5_VPORT_UPLINK) /* To give uplik rep TTC a lower level for chaining from root ft */ ttc_params.ft_attr.level = MLX5E_TTC_FT_LEVEL + 1; - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = res->rss[tt].indir_tir.tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); - if (err) { - netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", err); + priv->fs.ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(priv->fs.ttc)) { + err = PTR_ERR(priv->fs.ttc); + netdev_err(priv->netdev, "Failed to create rep ttc table, err=%d\n", + err); return err; } return 0; @@ -685,7 +712,7 @@ static int mlx5e_create_rep_root_ft(struct mlx5e_priv *priv) /* non uplik reps will skip any bypass tables and go directly to * their own ttc */ - rpriv->root_ft = priv->fs.ttc.ft.t; + rpriv->root_ft = mlx5_get_ttc_flow_table(priv->fs.ttc); return 0; } @@ -758,14 +785,13 @@ int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup) static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_lro_param lro_param; int err; - priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) return -ENOMEM; - mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); - mlx5e_init_l2_addr(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -774,25 +800,16 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) return err; } - err = mlx5e_create_indirect_rqt(priv); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, false); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv); - if (err) - goto err_destroy_indirect_tirs; - err = mlx5e_create_rep_ttc_table(priv); if (err) - goto err_destroy_direct_tirs; + goto err_destroy_rx_res; err = mlx5e_create_rep_root_ft(priv); if (err) @@ -809,18 +826,12 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) err_destroy_root_ft: mlx5e_destroy_rep_root_ft(priv); err_destroy_ttc_table: - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); -err_destroy_indirect_rqts: - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5_destroy_ttc_table(priv->fs.ttc); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; return err; } @@ -830,13 +841,10 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) mlx5e_ethtool_cleanup_steering(priv); rep_vport_rx_rule_destroy(priv); mlx5e_destroy_rep_root_ft(priv); - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); - mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv); - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5_destroy_ttc_table(priv->fs.ttc); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; } @@ -1269,10 +1277,64 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep) return rpriv->netdev; } +static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + if (!rep_sq->send_to_vport_rule_peer) + continue; + mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer); + rep_sq->send_to_vport_rule_peer = NULL; + } +} + +static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + struct mlx5_eswitch *peer_esw) +{ + struct mlx5_flow_handle *flow_rule; + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_sq *rep_sq; + + rpriv = mlx5e_rep_to_rep_priv(rep); + list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) { + if (rep_sq->send_to_vport_rule_peer) + continue; + flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn); + if (IS_ERR(flow_rule)) + goto err_out; + rep_sq->send_to_vport_rule_peer = flow_rule; + } + + return 0; +err_out: + mlx5e_vport_rep_event_unpair(rep); + return PTR_ERR(flow_rule); +} + +static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + enum mlx5_switchdev_event event, + void *data) +{ + int err = 0; + + if (event == MLX5_SWITCHDEV_EVENT_PAIR) + err = mlx5e_vport_rep_event_pair(esw, rep, data); + else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR) + mlx5e_vport_rep_event_unpair(rep); + + return err; +} + static const struct mlx5_eswitch_rep_ops rep_ops = { .load = mlx5e_vport_rep_load, .unload = mlx5e_vport_rep_unload, - .get_proto_dev = mlx5e_vport_rep_get_proto_dev + .get_proto_dev = mlx5e_vport_rep_get_proto_dev, + .event = mlx5e_vport_rep_event, }; static int mlx5e_rep_probe(struct auxiliary_device *adev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 47a2dfb7792a..8f0c82448eec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -207,6 +207,8 @@ struct mlx5e_encap_entry { struct mlx5e_rep_sq { struct mlx5_flow_handle *send_to_vport_rule; + struct mlx5_flow_handle *send_to_vport_rule_peer; + u32 sqn; struct list_head list; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0cee2fa76d65..e5c4344a114e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -34,19 +34,13 @@ #include <net/flow_offload.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> -#include <net/tc_act/tc_gact.h> -#include <net/tc_act/tc_skbedit.h> #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> #include <linux/refcount.h> #include <linux/completion.h> -#include <net/tc_act/tc_mirred.h> -#include <net/tc_act/tc_vlan.h> -#include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> -#include <net/tc_act/tc_mpls.h> #include <net/psample.h> #include <net/arp.h> #include <net/ipv6_stubs.h> @@ -345,7 +339,7 @@ struct mlx5e_hairpin { int num_channels; struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_ttc_table ttc; + struct mlx5_ttc_table *ttc; }; struct mlx5e_hairpin_entry { @@ -452,12 +446,32 @@ static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, static struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex) { + struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_priv *priv; - netdev = __dev_get_by_index(net, ifindex); + netdev = dev_get_by_index(net, ifindex); + if (!netdev) + return ERR_PTR(-ENODEV); + priv = netdev_priv(netdev); - return priv->mdev; + mdev = priv->mdev; + dev_put(netdev); + + /* Mirred tc action holds a refcount on the ifindex net_device (see + * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev + * after dev_put(netdev), while we're in the context of adding a tc flow. + * + * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then + * stored in a hairpin object, which exists until all flows, that refer to it, get + * removed. + * + * On the other hand, after a hairpin object has been created, the peer net_device may + * be removed/unbound while there are still some hairpin flows that are using it. This + * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to + * NETDEV_UNREGISTER event of the peer net_device. + */ + return mdev; } static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp) @@ -505,9 +519,10 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) if (!indir) return -ENOMEM; - mlx5e_build_default_indir_rqt(indir->table, MLX5E_INDIR_RQT_SIZE, hp->num_channels); + mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels); err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels, - priv->rx_res->rss_params.hash.hfunc, indir); + mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc, + indir); kvfree(indir); return err; @@ -516,8 +531,8 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) { struct mlx5e_priv *priv = hp->func_priv; - struct mlx5e_rss_params_hash *rss_hash; - enum mlx5e_traffic_types tt, max_tt; + struct mlx5e_rss_params_hash rss_hash; + enum mlx5_traffic_types tt, max_tt; struct mlx5e_tir_builder *builder; int err = 0; @@ -525,7 +540,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) if (!builder) return -ENOMEM; - rss_hash = &priv->rx_res->rss_params.hash; + rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { struct mlx5e_rss_params_traffic_type rss_tt; @@ -535,7 +550,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp) mlx5e_tir_builder_build_rqt(builder, hp->tdn, mlx5e_rqt_get_rqtn(&hp->indir_rqt), false); - mlx5e_tir_builder_build_rss(builder, rss_hash, &rss_tt, false); + mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false); err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false); if (err) { @@ -574,12 +589,16 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, memset(ttc_params, 0, sizeof(*ttc_params)); - ttc_params->any_tt_tirn = mlx5e_tir_get_tirn(&hp->direct_tir); - - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params->indir_tirn[tt] = mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev, + MLX5_FLOW_NAMESPACE_KERNEL); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; + ttc_params->dests[tt].tir_num = + tt == MLX5_TT_ANY ? + mlx5e_tir_get_tirn(&hp->direct_tir) : + mlx5e_tir_get_tirn(&hp->indir_tir[tt]); + } - ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr->level = MLX5E_TC_TTC_FT_LEVEL; ft_attr->prio = MLX5E_TC_PRIO; } @@ -599,12 +618,15 @@ static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp) goto err_create_indirect_tirs; mlx5e_hairpin_set_ttc_params(hp, &ttc_params); - err = mlx5e_create_ttc_table(priv, &ttc_params, &hp->ttc); - if (err) + hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params); + if (IS_ERR(hp->ttc)) { + err = PTR_ERR(hp->ttc); goto err_create_ttc_table; + } netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n", - hp->num_channels, hp->ttc.ft.t->id); + hp->num_channels, + mlx5_get_ttc_flow_table(priv->fs.ttc)->id); return 0; @@ -618,9 +640,7 @@ err_create_indirect_tirs: static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp) { - struct mlx5e_priv *priv = hp->func_priv; - - mlx5e_destroy_ttc_table(priv, &hp->ttc); + mlx5_destroy_ttc_table(hp->ttc); mlx5e_hairpin_destroy_indirect_tirs(hp); mlx5e_rqt_destroy(&hp->indir_rqt); } @@ -640,6 +660,10 @@ mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params func_mdev = priv->mdev; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + err = PTR_ERR(peer_mdev); + goto create_pair_err; + } pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params); if (IS_ERR(pair)) { @@ -778,6 +802,11 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, int err; peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex); + if (IS_ERR(peer_mdev)) { + NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device"); + return PTR_ERR(peer_mdev); + } + if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) { NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported"); return -EOPNOTSUPP; @@ -855,7 +884,8 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv, attach_flow: if (hpe->hp->num_channels > 1) { flow_flag_set(flow, HAIRPIN_RSS); - flow->attr->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; + flow->attr->nic_attr->hairpin_ft = + mlx5_get_ttc_flow_table(hpe->hp->ttc); } else { flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir); } @@ -1001,15 +1031,17 @@ err_ft_get: static int mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct mlx5_core_dev *dev = priv->mdev; - struct mlx5_fc *counter = NULL; + struct mlx5_fc *counter; int err; + parse_attr = attr->parse_attr; + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); if (err) @@ -1329,9 +1361,9 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, bool vf_tun = false, encap_valid = true; struct net_device *encap_dev = NULL; struct mlx5_esw_flow_attr *esw_attr; - struct mlx5_fc *counter = NULL; struct mlx5e_rep_priv *rpriv; struct mlx5e_priv *out_priv; + struct mlx5_fc *counter; u32 max_prio, max_chain; int err = 0; int out_index; @@ -3297,10 +3329,10 @@ static int validate_goto_chain(struct mlx5e_priv *priv, static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, - struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5_flow_attr *attr = flow->attr; struct pedit_headers_action hdrs[2] = {}; const struct flow_action_entry *act; @@ -3316,8 +3348,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, return -EOPNOTSUPP; nic_attr = attr->nic_attr; - nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; + parse_attr = attr->parse_attr; flow_action_for_each(i, act, flow_action) { switch (act->id) { @@ -3326,10 +3358,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_DROP: - action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - if (MLX5_CAP_FLOWTABLE(priv->mdev, - flow_table_properties_nic_receive.flow_counter)) - action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: @@ -3370,7 +3400,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, "device is not on same HW, can't offload"); netdev_warn(priv->netdev, "device %s not on same HW, can't offload\n", peer_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@ -3380,7 +3410,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, "Bad flow mark - only 16 bit is supported"); - return -EINVAL; + return -EOPNOTSUPP; } nic_attr->flow_tag = mark; @@ -3677,8 +3707,7 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack, - struct net_device *filter_dev) + struct netlink_ext_ack *extack) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3743,7 +3772,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "mpls pop supported only as first action"); return -EOPNOTSUPP; } - if (!netif_is_bareudp(filter_dev)) { + if (!netif_is_bareudp(parse_attr->filter_dev)) { NL_SET_ERR_MSG_MOD(extack, "mpls pop supported only on bareudp devices"); return -EOPNOTSUPP; @@ -3892,7 +3921,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, "devices %s %s not on same switch HW, can't offload forwarding\n", priv->netdev->name, out_dev->name); - return -EINVAL; + return -EOPNOTSUPP; } } break; @@ -4245,7 +4274,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); if (err) goto err_free; @@ -4391,11 +4420,11 @@ mlx5e_add_nic_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); + err = parse_tc_nic_actions(priv, &rule->action, flow, extack); if (err) goto err_free; - err = mlx5e_tc_add_nic_flow(priv, parse_attr, flow, extack); + err = mlx5e_tc_add_nic_flow(priv, flow, extack); if (err) goto err_free; @@ -4822,6 +4851,7 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) struct mlx5_core_dev *dev = priv->mdev; struct mapping_ctx *chains_mapping; struct mlx5_chains_attr attr = {}; + u64 mapping_id; int err; mlx5e_mod_hdr_tbl_init(&tc->mod_hdr); @@ -4835,8 +4865,12 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv) lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key); - chains_mapping = mapping_create(sizeof(struct mlx5_mapped_obj), - MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + mapping_id = mlx5_query_nic_system_image_guid(dev); + + chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + MLX5E_TC_TABLE_CHAIN_TAG_MASK, true); + if (IS_ERR(chains_mapping)) { err = PTR_ERR(chains_mapping); goto err_mapping; @@ -4925,6 +4959,7 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) struct mapping_ctx *mapping; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; + u64 mapping_id; int err = 0; uplink_priv = container_of(tc_ht, struct mlx5_rep_uplink_priv, tc_ht); @@ -4941,8 +4976,12 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) uplink_priv->esw_psample = mlx5_esw_sample_init(netdev_priv(priv->netdev)); #endif - mapping = mapping_create(sizeof(struct tunnel_match_key), - TUNNEL_INFO_BITS_MASK, true); + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL, + sizeof(struct tunnel_match_key), + TUNNEL_INFO_BITS_MASK, true); + if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_tun_mapping; @@ -4950,7 +4989,8 @@ int mlx5e_tc_esw_init(struct rhashtable *tc_ht) uplink_priv->tunnel_mapping = mapping; /* 0xFFF is reserved for stack devices slow path table mark */ - mapping = mapping_create(sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); + mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS, + sz_enc_opts, ENC_OPTS_BITS_MASK - 1, true); if (IS_ERR(mapping)) { err = PTR_ERR(mapping); goto err_enc_opts_mapping; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 505bf811984a..2e504c7461c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) vport->egress.offloads.fwd_rule = NULL; } +static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.bounce_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.bounce_rule); + vport->egress.offloads.bounce_rule = NULL; +} + static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, struct mlx5_flow_destination *fwd_dest) @@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) { esw_acl_egress_vlan_destroy(vport); esw_acl_egress_ofld_fwd2vport_destroy(vport); + esw_acl_egress_ofld_bounce_rule_destroy(vport); } static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp); vport->egress.offloads.fwd_grp = NULL; } + + if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 97e6cb6f13c1..7ffea2350f44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1458,8 +1458,6 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int mode, int num_vfs) esw->mode = mode; - mlx5_lag_update(esw->dev); - if (mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1506,6 +1504,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!mlx5_esw_allowed(esw)) return 0; + mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); @@ -1519,6 +1518,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) esw->esw_funcs.num_vfs = num_vfs; } up_write(&esw->mode_lock); + mlx5_lag_enable_change(esw->dev); return ret; } @@ -1550,8 +1550,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw, bool clear_vf) old_mode = esw->mode; esw->mode = MLX5_ESWITCH_NONE; - mlx5_lag_update(esw->dev); - if (old_mode == MLX5_ESWITCH_OFFLOADS) mlx5_rescan_drivers(esw->dev); @@ -1567,10 +1565,12 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) if (!mlx5_esw_allowed(esw)) return; + mlx5_lag_disable_change(esw->dev); down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw, clear_vf); esw->esw_funcs.num_vfs = 0; up_write(&esw->mode_lock); + mlx5_lag_enable_change(esw->dev); } static int mlx5_query_hca_cap_host_pf(struct mlx5_core_dev *dev, void *out) @@ -1759,7 +1759,9 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) ida_init(&esw->offloads.vport_metadata_ida); xa_init_flags(&esw->offloads.vhca_map, XA_FLAGS_ALLOC); mutex_init(&esw->state_lock); + lockdep_register_key(&esw->mode_lock_key); init_rwsem(&esw->mode_lock); + lockdep_set_class(&esw->mode_lock, &esw->mode_lock_key); esw->enabled_vports = 0; esw->mode = MLX5_ESWITCH_NONE; @@ -1793,6 +1795,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); + lockdep_unregister_key(&esw->mode_lock_key); mutex_destroy(&esw->state_lock); WARN_ON(!xa_empty(&esw->offloads.vhca_map)); xa_destroy(&esw->offloads.vhca_map); @@ -1889,8 +1892,7 @@ is_port_function_supported(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_is_sf_vport(esw, vport_num); } -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack) { @@ -1899,7 +1901,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) return PTR_ERR(esw); @@ -1923,8 +1925,7 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, return err; } -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack) { @@ -1933,7 +1934,7 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, int err = -EOPNOTSUPP; u16 vport_num; - esw = mlx5_devlink_eswitch_get(devlink); + esw = mlx5_devlink_eswitch_get(port->devlink); if (IS_ERR(esw)) { NL_SET_ERR_MSG_MOD(extack, "Eswitch doesn't support set hw_addr"); return PTR_ERR(esw); @@ -2366,10 +2367,23 @@ int mlx5_esw_try_lock(struct mlx5_eswitch *esw) */ void mlx5_esw_unlock(struct mlx5_eswitch *esw) { + if (!mlx5_esw_allowed(esw)) + return; up_write(&esw->mode_lock); } /** + * mlx5_esw_lock() - Take write lock on esw mode lock + * @esw: eswitch device. + */ +void mlx5_esw_lock(struct mlx5_eswitch *esw) +{ + if (!mlx5_esw_allowed(esw)) + return; + down_write(&esw->mode_lock); +} + +/** * mlx5_eswitch_get_total_vports - Get total vports of the eswitch * * @dev: Pointer to core device @@ -2384,3 +2398,15 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) return mlx5_esw_allowed(esw) ? esw->total_vports : 0; } EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports); + +/** + * mlx5_eswitch_get_core_dev - Get the mdev device + * @esw : eswitch device. + * + * Return the mellanox core device which manages the eswitch. + */ +struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) +{ + return mlx5_esw_allowed(esw) ? esw->dev : NULL; +} +EXPORT_SYMBOL(mlx5_eswitch_get_core_dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 48cac5bf606d..01e8dfb994d4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -86,6 +86,14 @@ struct mlx5_mapped_obj { #define esw_chains(esw) \ ((esw)->fdb_table.offloads.esw_chains_priv) +enum { + MAPPING_TYPE_CHAIN, + MAPPING_TYPE_TUNNEL, + MAPPING_TYPE_TUNNEL_ENC_OPTS, + MAPPING_TYPE_LABELS, + MAPPING_TYPE_ZONE, +}; + struct vport_ingress { struct mlx5_flow_table *acl; struct mlx5_flow_handle *allow_rule; @@ -124,6 +132,8 @@ struct vport_egress { struct { struct mlx5_flow_group *fwd_grp; struct mlx5_flow_handle *fwd_rule; + struct mlx5_flow_handle *bounce_rule; + struct mlx5_flow_group *bounce_grp; } offloads; }; }; @@ -315,6 +325,7 @@ struct mlx5_eswitch { u32 large_group_num; } params; struct blocking_notifier_head n_head; + struct lock_class_key mode_lock_key; }; void esw_offloads_disable(struct mlx5_eswitch *esw); @@ -475,12 +486,10 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, struct netlink_ext_ack *extack); int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, enum devlink_eswitch_encap_mode *encap); -int mlx5_devlink_port_function_hw_addr_get(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, u8 *hw_addr, int *hw_addr_len, struct netlink_ext_ack *extack); -int mlx5_devlink_port_function_hw_addr_set(struct devlink *devlink, - struct devlink_port *port, +int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); @@ -636,7 +645,7 @@ struct esw_vport_tbl_namespace { }; struct mlx5_vport_tbl_attr { - u16 chain; + u32 chain; u16 prio; u16 vport; const struct esw_vport_tbl_namespace *vport_ns; @@ -699,11 +708,18 @@ void mlx5_esw_get(struct mlx5_core_dev *dev); void mlx5_esw_put(struct mlx5_core_dev *dev); int mlx5_esw_try_lock(struct mlx5_eswitch *esw); void mlx5_esw_unlock(struct mlx5_eswitch *esw); +void mlx5_esw_lock(struct mlx5_eswitch *esw); void esw_vport_change_handle_locked(struct mlx5_vport *vport); bool mlx5_esw_offloads_controller_valid(const struct mlx5_eswitch *esw, u32 controller); +int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw); +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw); + #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -719,6 +735,9 @@ static inline const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(-EOPNOTSUPP); } +static inline void mlx5_esw_unlock(struct mlx5_eswitch *esw) { return; } +static inline void mlx5_esw_lock(struct mlx5_eswitch *esw) { return; } + static inline struct mlx5_flow_handle * esw_add_restore_rule(struct mlx5_eswitch *esw, u32 tag) { @@ -731,6 +750,23 @@ mlx5_esw_vport_to_devlink_port_index(const struct mlx5_core_dev *dev, { return vport_num; } + +static inline int +mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + return 0; +} + +static inline void +mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) {} + +static inline int +mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + return 0; +} #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7579f3402776..0e3645c4fd0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -382,10 +382,11 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f { dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest[dest_idx].vport.num = esw_attr->dests[attr_idx].rep->vport; - dest[dest_idx].vport.vhca_id = - MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); - if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + dest[dest_idx].vport.vhca_id = + MLX5_CAP_GEN(esw_attr->dests[attr_idx].mdev, vhca_id); dest[dest_idx].vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + } if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) { if (pkt_reformat) { flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; @@ -925,6 +926,7 @@ out: struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch *from_esw, struct mlx5_eswitch_rep *rep, u32 sqn) { @@ -943,10 +945,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); /* source vport is the esw manager */ - MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, from_esw->manager_vport); if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch)) MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, - MLX5_CAP_GEN(rep->esw->dev, vhca_id)); + MLX5_CAP_GEN(from_esw->dev, vhca_id)); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); @@ -962,6 +964,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (rep->vport == MLX5_VPORT_UPLINK) + spec->flow_context.flow_source = MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT; + flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb, spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) @@ -1612,7 +1617,18 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) goto ns_err; } - table_size = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + + /* To be strictly correct: + * MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + * should be: + * esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + + * peer_esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ + * but as the peer device might not be in switchdev mode it's not + * possible. We use the fact that by default FW sets max vfs and max sfs + * to the same value on both devices. If it needs to be changed in the future note + * the peer miss group should also be created based on the number of + * total vports of the peer (currently is also uses esw->total_vports). + */ + table_size = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ) + MLX5_ESW_MISS_FLOWS + esw->total_vports + esw->esw_funcs.num_vfs; /* create the slow path fdb with encap set, so further table instances @@ -1669,7 +1685,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw) source_eswitch_owner_vhca_id_valid, 1); } - ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ; + /* See comment above table_size calculation */ + ix = MLX5_MAX_PORTS * (esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ); MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); @@ -2309,14 +2326,293 @@ void esw_offloads_unload_rep(struct mlx5_eswitch *esw, u16 vport_num) mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); } +static int esw_set_uplink_slave_ingress_root(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + struct mlx5_eswitch *esw; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + struct mlx5_vport *vport; + int err; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, FS_FT_ESW_INGRESS_ACL); + MLX5_SET(set_flow_table_root_in, in, other_vport, 1); + MLX5_SET(set_flow_table_root_in, in, vport_number, MLX5_VPORT_UPLINK); + + if (master) { + esw = master->priv.eswitch; + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + MLX5_SET(set_flow_table_root_in, in, table_of_other_vport, 1); + MLX5_SET(set_flow_table_root_in, in, table_vport_number, + MLX5_VPORT_UPLINK); + + ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport->index); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } else { + esw = slave->priv.eswitch; + vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + ns = mlx5_get_flow_vport_acl_namespace(slave, + MLX5_FLOW_NAMESPACE_ESW_INGRESS, + vport->index); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, table_id, root->root_ft->id); + } + + err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); + mutex_unlock(&root->chain_lock); + + return err; +} + +static int esw_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + int err; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + + if (master) { + ns = mlx5_get_flow_namespace(master, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + mutex_lock(&root->chain_lock); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + err = mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); + mutex_unlock(&root->chain_lock); + + return err; +} + +static int __esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + struct mlx5_vport *vport, + struct mlx5_flow_table *acl) +{ + struct mlx5_flow_handle *flow_rule = NULL; + struct mlx5_flow_destination dest = {}; + struct mlx5_flow_act flow_act = {}; + struct mlx5_flow_spec *spec; + int err = 0; + void *misc; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, MLX5_VPORT_UPLINK); + MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id, + MLX5_CAP_GEN(slave, vhca_id)); + + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, + source_eswitch_owner_vhca_id); + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport.num = slave->priv.eswitch->manager_vport; + dest.vport.vhca_id = MLX5_CAP_GEN(slave, vhca_id); + dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID; + + flow_rule = mlx5_add_flow_rules(acl, spec, &flow_act, + &dest, 1); + if (IS_ERR(flow_rule)) + err = PTR_ERR(flow_rule); + else + vport->egress.offloads.bounce_rule = flow_rule; + + kvfree(spec); + return err; +} + +static int esw_set_master_egress_rule(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_eswitch *esw = master->priv.eswitch; + struct mlx5_flow_table_attr ft_attr = { + .max_fte = 1, .prio = 0, .level = 0, + }; + struct mlx5_flow_namespace *egress_ns; + struct mlx5_flow_table *acl; + struct mlx5_flow_group *g; + struct mlx5_vport *vport; + void *match_criteria; + u32 *flow_group_in; + int err; + + vport = mlx5_eswitch_get_vport(esw, esw->manager_vport); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + egress_ns = mlx5_get_flow_vport_acl_namespace(master, + MLX5_FLOW_NAMESPACE_ESW_EGRESS, + vport->index); + if (!egress_ns) + return -EINVAL; + + if (vport->egress.acl) + return -EINVAL; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + acl = mlx5_create_vport_flow_table(egress_ns, &ft_attr, vport->vport); + if (IS_ERR(acl)) { + err = PTR_ERR(acl); + goto out; + } + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, + match_criteria); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_port); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0); + + g = mlx5_create_flow_group(acl, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + goto err_group; + } + + err = __esw_set_master_egress_rule(master, slave, vport, acl); + if (err) + goto err_rule; + + vport->egress.acl = acl; + vport->egress.offloads.bounce_grp = g; + + kvfree(flow_group_in); + + return 0; + +err_rule: + mlx5_destroy_flow_group(g); +err_group: + mlx5_destroy_flow_table(acl); +out: + kvfree(flow_group_in); + return err; +} + +static void esw_unset_master_egress_rule(struct mlx5_core_dev *dev) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(dev->priv.eswitch, + dev->priv.eswitch->manager_vport); + + esw_acl_egress_ofld_cleanup(vport); +} + +int mlx5_eswitch_offloads_config_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + int err; + + err = esw_set_uplink_slave_ingress_root(master_esw->dev, + slave_esw->dev); + if (err) + return -EINVAL; + + err = esw_set_slave_root_fdb(master_esw->dev, + slave_esw->dev); + if (err) + goto err_fdb; + + err = esw_set_master_egress_rule(master_esw->dev, + slave_esw->dev); + if (err) + goto err_acl; + + return err; + +err_acl: + esw_set_slave_root_fdb(NULL, slave_esw->dev); + +err_fdb: + esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); + + return err; +} + +void mlx5_eswitch_offloads_destroy_single_fdb(struct mlx5_eswitch *master_esw, + struct mlx5_eswitch *slave_esw) +{ + esw_unset_master_egress_rule(master_esw->dev); + esw_set_slave_root_fdb(NULL, slave_esw->dev); + esw_set_uplink_slave_ingress_root(NULL, slave_esw->dev); +} + #define ESW_OFFLOADS_DEVCOM_PAIR (0) #define ESW_OFFLOADS_DEVCOM_UNPAIR (1) -static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, - struct mlx5_eswitch *peer_esw) +static void mlx5_esw_offloads_rep_event_unpair(struct mlx5_eswitch *esw) { + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; - return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + mlx5_esw_for_each_rep(esw, i, rep) { + rep_type = NUM_REP_TYPES; + while (rep_type--) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) + ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_UNPAIR, NULL); + } + } } static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) @@ -2324,9 +2620,42 @@ static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw) #if IS_ENABLED(CONFIG_MLX5_CLS_ACT) mlx5e_tc_clean_fdb_peer_flows(esw); #endif + mlx5_esw_offloads_rep_event_unpair(esw); esw_del_fdb_peer_miss_rules(esw); } +static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw, + struct mlx5_eswitch *peer_esw) +{ + const struct mlx5_eswitch_rep_ops *ops; + struct mlx5_eswitch_rep *rep; + unsigned long i; + u8 rep_type; + int err; + + err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev); + if (err) + return err; + + mlx5_esw_for_each_rep(esw, i, rep) { + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + ops = esw->offloads.rep_ops[rep_type]; + if (atomic_read(&rep->rep_data[rep_type].state) == REP_LOADED && + ops->event) { + err = ops->event(esw, rep, MLX5_SWITCHDEV_EVENT_PAIR, peer_esw); + if (err) + goto err_out; + } + } + } + + return 0; + +err_out: + mlx5_esw_offloads_unpair(esw); + return err; +} + static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw, struct mlx5_eswitch *peer_esw, bool pair) @@ -2367,6 +2696,9 @@ static int mlx5_esw_offloads_devcom_event(int event, switch (event) { case ESW_OFFLOADS_DEVCOM_PAIR: + if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev) + break; + if (mlx5_eswitch_vport_match_metadata_enabled(esw) != mlx5_eswitch_vport_match_metadata_enabled(peer_esw)) break; @@ -2614,6 +2946,31 @@ static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw) esw_vport_destroy_offloads_acl_tables(esw, vport); } +int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw) +{ + struct mlx5_eswitch_rep *rep; + unsigned long i; + int ret; + + if (!esw || esw->mode != MLX5_ESWITCH_OFFLOADS) + return 0; + + rep = mlx5_eswitch_get_rep(esw, MLX5_VPORT_UPLINK); + if (atomic_read(&rep->rep_data[REP_ETH].state) != REP_LOADED) + return 0; + + ret = mlx5_esw_offloads_rep_load(esw, MLX5_VPORT_UPLINK); + if (ret) + return ret; + + mlx5_esw_for_each_rep(esw, i, rep) { + if (atomic_read(&rep->rep_data[REP_ETH].state) == REP_LOADED) + mlx5_esw_offloads_rep_load(esw, rep->vport); + } + + return 0; +} + static int esw_offloads_steering_init(struct mlx5_eswitch *esw) { struct mlx5_esw_indir_table *indir; @@ -2783,6 +3140,7 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) struct mapping_ctx *reg_c0_obj_pool; struct mlx5_vport *vport; unsigned long i; + u64 mapping_id; int err; if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat) && @@ -2806,9 +3164,13 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) if (err) goto err_vport_metadata; - reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj), - ESW_REG_C0_USER_DATA_METADATA_MASK, - true); + mapping_id = mlx5_query_nic_system_image_guid(esw->dev); + + reg_c0_obj_pool = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN, + sizeof(struct mlx5_mapped_obj), + ESW_REG_C0_USER_DATA_METADATA_MASK, + true); + if (IS_ERR(reg_c0_obj_pool)) { err = PTR_ERR(reg_c0_obj_pool); goto err_pool; @@ -2986,10 +3348,11 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); - return err; + goto enable_lag; } cur_mlx5_mode = err; err = 0; @@ -3006,6 +3369,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, unlock: mlx5_esw_unlock(esw); +enable_lag: + mlx5_lag_enable_change(esw->dev); return err; } @@ -3079,8 +3444,11 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, switch (MLX5_CAP_ETH(dev, wqe_inline_mode)) { case MLX5_CAP_INLINE_MODE_NOT_REQUIRED: - if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) + if (mode == DEVLINK_ESWITCH_INLINE_MODE_NONE) { + err = 0; goto out; + } + fallthrough; case MLX5_CAP_INLINE_MODE_L2: NL_SET_ERR_MSG_MOD(extack, "Inline mode can't be set"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index bd66ab2af5b5..9b2cca6d9620 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size)); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index); MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 896a6c3dbdb7..7db8df64a60e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -152,17 +152,56 @@ static int mlx5_cmd_stub_destroy_ns(struct mlx5_flow_root_namespace *ns) return 0; } +static int mlx5_cmd_set_slave_root_fdb(struct mlx5_core_dev *master, + struct mlx5_core_dev *slave, + bool ft_id_valid, + u32 ft_id) +{ + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {}; + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; + struct mlx5_flow_root_namespace *root; + struct mlx5_flow_namespace *ns; + + MLX5_SET(set_flow_table_root_in, in, opcode, + MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); + MLX5_SET(set_flow_table_root_in, in, table_type, + FS_FT_FDB); + if (ft_id_valid) { + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id_valid, 1); + MLX5_SET(set_flow_table_root_in, in, + table_eswitch_owner_vhca_id, + MLX5_CAP_GEN(master, vhca_id)); + MLX5_SET(set_flow_table_root_in, in, table_id, + ft_id); + } else { + ns = mlx5_get_flow_namespace(slave, + MLX5_FLOW_NAMESPACE_FDB); + root = find_root(&ns->node); + MLX5_SET(set_flow_table_root_in, in, table_id, + root->root_ft->id); + } + + return mlx5_cmd_exec(slave, in, sizeof(in), out, sizeof(out)); +} + static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, u32 underlay_qpn, bool disconnect) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {}; struct mlx5_core_dev *dev = ns->dev; + int err; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && underlay_qpn == 0) return 0; + if (ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + !mlx5_lag_is_master(dev)) + return 0; + MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); @@ -177,7 +216,24 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns, MLX5_SET(set_flow_table_root_in, in, other_vport, !!(ft->flags & MLX5_FLOW_TABLE_OTHER_VPORT)); - return mlx5_cmd_exec_in(dev, set_flow_table_root, in); + err = mlx5_cmd_exec_in(dev, set_flow_table_root, in); + if (!err && + ft->type == FS_FT_FDB && + mlx5_lag_is_shared_fdb(dev) && + mlx5_lag_is_master(dev)) { + err = mlx5_cmd_set_slave_root_fdb(dev, + mlx5_lag_get_peer_mdev(dev), + !disconnect, (!disconnect) ? + ft->id : 0); + if (err && !disconnect) { + MLX5_SET(set_flow_table_root_in, in, op_mod, 0); + MLX5_SET(set_flow_table_root_in, in, table_id, + ns->root_ft->id); + mlx5_cmd_exec_in(dev, set_flow_table_root, in); + } + } + + return err; } static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d7bf0a3e4a52..8481027e493c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -413,7 +413,7 @@ static bool check_valid_spec(const struct mlx5_flow_spec *spec) return true; } -static struct mlx5_flow_root_namespace *find_root(struct fs_node *node) +struct mlx5_flow_root_namespace *find_root(struct fs_node *node) { struct fs_node *root; struct mlx5_flow_namespace *ns; @@ -1024,17 +1024,19 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev, static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct fs_prio *prio) { - struct mlx5_flow_table *next_ft; + struct mlx5_flow_table *next_ft, *first_ft; int err = 0; /* Connect_prev_fts and update_root_ft_create are mutually exclusive */ - if (list_empty(&prio->node.children)) { + first_ft = list_first_entry_or_null(&prio->node.children, + struct mlx5_flow_table, node.list); + if (!first_ft || first_ft->level > ft->level) { err = connect_prev_fts(dev, ft, prio); if (err) return err; - next_ft = find_next_chained_ft(prio); + next_ft = first_ft ? first_ft : find_next_chained_ft(prio); err = connect_fwd_rules(dev, ft, next_ft); if (err) return err; @@ -2120,7 +2122,7 @@ static int disconnect_flow_table(struct mlx5_flow_table *ft) node.list) == ft)) return 0; - next_ft = find_next_chained_ft(prio); + next_ft = find_next_ft(ft); err = connect_fwd_rules(dev, next_ft, ft); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 7317cdeab661..98240badc342 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -294,6 +294,8 @@ void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +struct mlx5_flow_root_namespace *find_root(struct fs_node *node); + #define fs_get_obj(v, _node) {v = container_of((_node), typeof(*v), node); } #define fs_list_for_each_entry(pos, root) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 9ff163c5bcde..9abeb80ffa31 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -626,8 +626,16 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } fw_reporter_ctx.err_synd = health->synd; fw_reporter_ctx.miss_counter = health->miss_counter; - devlink_health_report(health->fw_fatal_reporter, - "FW fatal error reported", &fw_reporter_ctx); + if (devlink_health_report(health->fw_fatal_reporter, + "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) { + /* If recovery wasn't performed, due to grace period, + * unload the driver. This ensures that the driver + * closes all its resources and it is not subjected to + * requests from the kernel. + */ + mlx5_core_err(dev, "Driver is in error state. Unloading\n"); + mlx5_unload_one(dev); + } } static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index a126cbc6f0d6..67571e5040d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -314,8 +314,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - struct ttc_params ttc_params = {}; - int tt, err; + int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL); @@ -330,12 +329,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - mlx5e_set_ttc_basic_params(priv, &ttc_params); - mlx5e_set_ttc_ft_params(&ttc_params); - for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) - ttc_params.indir_tirn[tt] = priv->rx_res->rss[tt].indir_tir.tirn; - - err = mlx5e_create_ttc_table(priv, &ttc_params, &priv->fs.ttc); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); @@ -352,21 +346,20 @@ err_destroy_arfs_tables: static void mlx5i_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_destroy_ttc_table(priv, &priv->fs.ttc); + mlx5e_destroy_ttc_table(priv); mlx5e_arfs_destroy_tables(priv); } static int mlx5i_init_rx(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_lro_param lro_param; int err; - priv->rx_res = kvzalloc(sizeof(*priv->rx_res), GFP_KERNEL); + priv->rx_res = mlx5e_rx_res_alloc(); if (!priv->rx_res) return -ENOMEM; - mlx5e_build_rss_params(&priv->rx_res->rss_params, priv->channels.params.num_channels); - mlx5e_create_q_counters(priv); err = mlx5e_open_drop_rq(priv, &priv->drop_rq); @@ -375,41 +368,26 @@ static int mlx5i_init_rx(struct mlx5e_priv *priv) goto err_destroy_q_counters; } - err = mlx5e_create_indirect_rqt(priv); + lro_param = mlx5e_get_lro_param(&priv->channels.params); + err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0, + priv->max_nch, priv->drop_rq.rqn, &lro_param, + priv->channels.params.num_channels); if (err) goto err_close_drop_rq; - err = mlx5e_create_direct_rqts(priv); - if (err) - goto err_destroy_indirect_rqts; - - err = mlx5e_create_indirect_tirs(priv, false); - if (err) - goto err_destroy_direct_rqts; - - err = mlx5e_create_direct_tirs(priv); - if (err) - goto err_destroy_indirect_tirs; - err = mlx5i_create_flow_steering(priv); if (err) - goto err_destroy_direct_tirs; + goto err_destroy_rx_res; return 0; -err_destroy_direct_tirs: - mlx5e_destroy_direct_tirs(priv); -err_destroy_indirect_tirs: - mlx5e_destroy_indirect_tirs(priv); -err_destroy_direct_rqts: - mlx5e_destroy_direct_rqts(priv); -err_destroy_indirect_rqts: - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); +err_destroy_rx_res: + mlx5e_rx_res_destroy(priv->rx_res); err_close_drop_rq: mlx5e_close_drop_rq(&priv->drop_rq); err_destroy_q_counters: mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; return err; } @@ -417,13 +395,10 @@ err_destroy_q_counters: static void mlx5i_cleanup_rx(struct mlx5e_priv *priv) { mlx5i_destroy_flow_steering(priv); - mlx5e_destroy_direct_tirs(priv); - mlx5e_destroy_indirect_tirs(priv); - mlx5e_destroy_direct_rqts(priv); - mlx5e_rqt_destroy(&priv->rx_res->indir_rqt); + mlx5e_rx_res_destroy(priv->rx_res); mlx5e_close_drop_rq(&priv->drop_rq); mlx5e_destroy_q_counters(priv); - kvfree(priv->rx_res); + mlx5e_rx_res_free(priv->rx_res); priv->rx_res = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 5c043c5cc403..f4dfa55c8c7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -32,7 +32,9 @@ #include <linux/netdevice.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> +#include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" #include "lag.h" @@ -45,7 +47,7 @@ static DEFINE_SPINLOCK(lag_lock); static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) + u8 remap_port2, bool shared_fdb) { u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); @@ -54,6 +56,7 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); return mlx5_cmd_exec_in(dev, create_lag, in); } @@ -224,35 +227,59 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, } static int mlx5_create_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) + struct lag_tracker *tracker, + bool shared_fdb) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; int err; mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], &ldev->v2p_map[MLX5_LAG_P2]); - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2]); + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d", + ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], + shared_fdb); err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); - if (err) + ldev->v2p_map[MLX5_LAG_P2], shared_fdb); + if (err) { mlx5_core_err(dev0, "Failed to create LAG (%d)\n", err); + return err; + } + + if (shared_fdb) { + err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + if (err) + mlx5_core_err(dev0, "Can't enable single FDB mode\n"); + else + mlx5_core_info(dev0, "Operation mode is single FDB\n"); + } + + if (err) { + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } + return err; } int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags) + u8 flags, + bool shared_fdb) { bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; int err; - err = mlx5_create_lag(ldev, tracker); + err = mlx5_create_lag(ldev, tracker, shared_fdb); if (err) { if (roce_lag) { mlx5_core_err(dev0, @@ -266,6 +293,7 @@ int mlx5_activate_lag(struct mlx5_lag *ldev, } ldev->flags |= flags; + ldev->shared_fdb = shared_fdb; return 0; } @@ -278,6 +306,12 @@ static int mlx5_deactivate_lag(struct mlx5_lag *ldev) ldev->flags &= ~MLX5_LAG_MODE_FLAGS; + if (ldev->shared_fdb) { + mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, + ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); + ldev->shared_fdb = false; + } + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); err = mlx5_cmd_exec_in(dev0, destroy_lag, in); if (err) { @@ -333,6 +367,10 @@ static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) if (!ldev->pf[i].dev) continue; + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(ldev->pf[i].dev); } @@ -342,12 +380,15 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) { struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + bool shared_fdb = ldev->shared_fdb; bool roce_lag; int err; roce_lag = __mlx5_lag_is_roce(ldev); - if (roce_lag) { + if (shared_fdb) { + mlx5_lag_remove_devices(ldev); + } else if (roce_lag) { if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); @@ -359,8 +400,34 @@ static void mlx5_disable_lag(struct mlx5_lag *ldev) if (err) return; - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); + + if (shared_fdb) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + } +} + +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + + if (is_mdev_switchdev_mode(dev0) && + is_mdev_switchdev_mode(dev1) && + mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && + mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && + mlx5_devcom_is_paired(dev0->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && + MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + return true; + + return false; } static void mlx5_do_bond(struct mlx5_lag *ldev) @@ -371,14 +438,17 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) bool do_bond, roce_lag; int err; - if (!mlx5_lag_is_ready(ldev)) - return; - - tracker = ldev->tracker; + if (!mlx5_lag_is_ready(ldev)) { + do_bond = false; + } else { + tracker = ldev->tracker; - do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + } if (do_bond && !__mlx5_lag_is_active(ldev)) { + bool shared_fdb = mlx5_shared_fdb_supported(ldev); + roce_lag = !mlx5_sriov_is_enabled(dev0) && !mlx5_sriov_is_enabled(dev1); @@ -388,23 +458,40 @@ static void mlx5_do_bond(struct mlx5_lag *ldev) dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; #endif - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_remove_devices(ldev); err = mlx5_activate_lag(ldev, &tracker, roce_lag ? MLX5_LAG_FLAG_ROCE : - MLX5_LAG_FLAG_SRIOV); + MLX5_LAG_FLAG_SRIOV, + shared_fdb); if (err) { - if (roce_lag) + if (shared_fdb || roce_lag) mlx5_lag_add_devices(ldev); return; - } - - if (roce_lag) { + } else if (roce_lag) { dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; mlx5_rescan_drivers_locked(dev0); mlx5_nic_vport_enable_roce(dev1); + } else if (shared_fdb) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + + if (err) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_core_err(dev0, "Failed to enable lag\n"); + return; + } } } else if (do_bond && __mlx5_lag_is_active(ldev)) { mlx5_modify_lag(ldev, &tracker); @@ -418,21 +505,48 @@ static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) queue_delayed_work(ldev->wq, &ldev->bond_work, delay); } +static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev0) + mlx5_esw_lock(dev0->priv.eswitch); + if (dev1) + mlx5_esw_lock(dev1->priv.eswitch); +} + +static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev1) + mlx5_esw_unlock(dev1->priv.eswitch); + if (dev0) + mlx5_esw_unlock(dev0->priv.eswitch); +} + static void mlx5_do_bond_work(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, bond_work); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; int status; status = mlx5_dev_list_trylock(); if (!status) { - /* 1 sec delay. */ mlx5_queue_bond_work(ldev, HZ); return; } + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_lag_lock_eswitches(dev0, dev1); mlx5_do_bond(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); mlx5_dev_list_unlock(); } @@ -630,7 +744,7 @@ static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, } /* Must be called with intf_mutex held */ -static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) +static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev = NULL; struct mlx5_core_dev *tmp_dev; @@ -638,7 +752,7 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) if (!MLX5_CAP_GEN(dev, vport_group_manager) || !MLX5_CAP_GEN(dev, lag_master) || MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) - return; + return 0; tmp_dev = mlx5_get_next_phys_dev(dev); if (tmp_dev) @@ -648,15 +762,17 @@ static void __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) ldev = mlx5_lag_dev_alloc(dev); if (!ldev) { mlx5_core_err(dev, "Failed to alloc lag dev\n"); - return; + return 0; } } else { + if (ldev->mode_changes_in_progress) + return -EAGAIN; mlx5_ldev_get(ldev); } mlx5_ldev_add_mdev(ldev, dev); - return; + return 0; } void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) @@ -667,7 +783,13 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) if (!ldev) return; +recheck: mlx5_dev_list_lock(); + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } mlx5_ldev_remove_mdev(ldev, dev); mlx5_dev_list_unlock(); mlx5_ldev_put(ldev); @@ -675,8 +797,16 @@ void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) { + int err; + +recheck: mlx5_dev_list_lock(); - __mlx5_lag_dev_add_mdev(dev); + err = __mlx5_lag_dev_add_mdev(dev); + if (err) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } mlx5_dev_list_unlock(); } @@ -690,11 +820,11 @@ void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, if (!ldev) return; - if (__mlx5_lag_is_active(ldev)) - mlx5_disable_lag(ldev); - mlx5_ldev_remove_netdev(ldev, netdev); ldev->flags &= ~MLX5_LAG_FLAG_READY; + + if (__mlx5_lag_is_active(ldev)) + mlx5_queue_bond_work(ldev, 0); } /* Must be called with intf_mutex held */ @@ -716,6 +846,7 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, if (i >= MLX5_MAX_PORTS) ldev->flags |= MLX5_LAG_FLAG_READY; + mlx5_queue_bond_work(ldev, 0); } bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) @@ -746,6 +877,21 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_active); +bool mlx5_lag_is_master(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_master); + bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -760,19 +906,50 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) } EXPORT_SYMBOL(mlx5_lag_is_sriov); -void mlx5_lag_update(struct mlx5_core_dev *dev) +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); + +void mlx5_lag_disable_change(struct mlx5_core_dev *dev) { + struct mlx5_core_dev *dev0; + struct mlx5_core_dev *dev1; struct mlx5_lag *ldev; mlx5_dev_list_lock(); + ldev = mlx5_lag_dev(dev); - if (!ldev) - goto unlock; + dev0 = ldev->pf[MLX5_LAG_P1].dev; + dev1 = ldev->pf[MLX5_LAG_P2].dev; - mlx5_do_bond(ldev); + ldev->mode_changes_in_progress++; + if (__mlx5_lag_is_active(ldev)) { + mlx5_lag_lock_eswitches(dev0, dev1); + mlx5_disable_lag(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); + } + mlx5_dev_list_unlock(); +} -unlock: +void mlx5_lag_enable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + mlx5_dev_list_lock(); + ldev = mlx5_lag_dev(dev); + ldev->mode_changes_in_progress--; mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, 0); } struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) @@ -827,6 +1004,26 @@ unlock: } EXPORT_SYMBOL(mlx5_lag_get_slave_port); +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; + + peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? + ldev->pf[MLX5_LAG_P2].dev : + ldev->pf[MLX5_LAG_P1].dev; + +unlock: + spin_unlock(&lag_lock); + return peer_dev; +} +EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); + int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h index 191392c37558..d4bae528954e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.h @@ -39,6 +39,8 @@ struct lag_tracker { */ struct mlx5_lag { u8 flags; + int mode_changes_in_progress; + bool shared_fdb; u8 v2p_map[MLX5_MAX_PORTS]; struct kref ref; struct lag_func pf[MLX5_MAX_PORTS]; @@ -71,7 +73,8 @@ void mlx5_modify_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker); int mlx5_activate_lag(struct mlx5_lag *ldev, struct lag_tracker *tracker, - u8 flags); + u8 flags, + bool shared_fdb); int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, struct net_device *ndev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index c4bf8b679541..011b639b29bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -161,7 +161,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, struct lag_tracker tracker; tracker = ldev->tracker; - mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH); + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c new file mode 100644 index 000000000000..749d17c0057d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/mlx5/fs.h> +#include <linux/mlx5/driver.h> +#include "mlx5_core.h" +#include "lib/fs_ttc.h" + +#define MLX5_TTC_NUM_GROUPS 3 +#define MLX5_TTC_GROUP1_SIZE (BIT(3) + MLX5_NUM_TUNNEL_TT) +#define MLX5_TTC_GROUP2_SIZE BIT(1) +#define MLX5_TTC_GROUP3_SIZE BIT(0) +#define MLX5_TTC_TABLE_SIZE (MLX5_TTC_GROUP1_SIZE +\ + MLX5_TTC_GROUP2_SIZE +\ + MLX5_TTC_GROUP3_SIZE) + +#define MLX5_INNER_TTC_NUM_GROUPS 3 +#define MLX5_INNER_TTC_GROUP1_SIZE BIT(3) +#define MLX5_INNER_TTC_GROUP2_SIZE BIT(1) +#define MLX5_INNER_TTC_GROUP3_SIZE BIT(0) +#define MLX5_INNER_TTC_TABLE_SIZE (MLX5_INNER_TTC_GROUP1_SIZE +\ + MLX5_INNER_TTC_GROUP2_SIZE +\ + MLX5_INNER_TTC_GROUP3_SIZE) + +/* L3/L4 traffic type classifier */ +struct mlx5_ttc_table { + int num_groups; + struct mlx5_flow_table *t; + struct mlx5_flow_group **g; + struct mlx5_ttc_rule rules[MLX5_NUM_TT]; + struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc) +{ + return ttc->t; +} + +static void mlx5_cleanup_ttc_rules(struct mlx5_ttc_table *ttc) +{ + int i; + + for (i = 0; i < MLX5_NUM_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->rules[i].rule)) { + mlx5_del_flow_rules(ttc->rules[i].rule); + ttc->rules[i].rule = NULL; + } + } + + for (i = 0; i < MLX5_NUM_TUNNEL_TT; i++) { + if (!IS_ERR_OR_NULL(ttc->tunnel_rules[i])) { + mlx5_del_flow_rules(ttc->tunnel_rules[i]); + ttc->tunnel_rules[i] = NULL; + } + } +} + +struct mlx5_etype_proto { + u16 etype; + u8 proto; +}; + +static struct mlx5_etype_proto ttc_rules[] = { + [MLX5_TT_IPV4_TCP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV6_TCP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_TCP, + }, + [MLX5_TT_IPV4_UDP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV6_UDP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_UDP, + }, + [MLX5_TT_IPV4_IPSEC_AH] = { + .etype = ETH_P_IP, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV6_IPSEC_AH] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_AH, + }, + [MLX5_TT_IPV4_IPSEC_ESP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV6_IPSEC_ESP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_ESP, + }, + [MLX5_TT_IPV4] = { + .etype = ETH_P_IP, + .proto = 0, + }, + [MLX5_TT_IPV6] = { + .etype = ETH_P_IPV6, + .proto = 0, + }, + [MLX5_TT_ANY] = { + .etype = 0, + .proto = 0, + }, +}; + +static struct mlx5_etype_proto ttc_tunnel_rules[] = { + [MLX5_TT_IPV4_GRE] = { + .etype = ETH_P_IP, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV6_GRE] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_GRE, + }, + [MLX5_TT_IPV4_IPIP] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV6_IPIP] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPIP, + }, + [MLX5_TT_IPV4_IPV6] = { + .etype = ETH_P_IP, + .proto = IPPROTO_IPV6, + }, + [MLX5_TT_IPV6_IPV6] = { + .etype = ETH_P_IPV6, + .proto = IPPROTO_IPV6, + }, + +}; + +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) +{ + return ttc_tunnel_rules[tt].proto; +} + +static bool mlx5_tunnel_proto_supported_rx(struct mlx5_core_dev *mdev, + u8 proto_type) +{ + switch (proto_type) { + case IPPROTO_GRE: + return MLX5_CAP_ETH(mdev, tunnel_stateless_gre); + case IPPROTO_IPIP: + case IPPROTO_IPV6: + return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) || + MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_rx)); + default: + return false; + } +} + +static bool mlx5_tunnel_any_rx_proto_supported(struct mlx5_core_dev *mdev) +{ + int tt; + + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (mlx5_tunnel_proto_supported_rx(mdev, + ttc_tunnel_rules[tt].proto)) + return true; + } + return false; +} + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev) +{ + return (mlx5_tunnel_any_rx_proto_supported(mdev) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.inner_ip_version)); +} + +static u8 mlx5_etype_to_ipv(u16 ethertype) +{ + if (ethertype == ETH_P_IP) + return 4; + + if (ethertype == ETH_P_IPV6) + return 6; + + return 0; +} + +static struct mlx5_flow_handle * +mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, u16 etype, u8 proto) +{ + int match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_protocol, proto); + } + + ipv = mlx5_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); + } else if (etype) { + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_flow_handle **trules; + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int tt; + int err; + + ft = ttc->t; + rules = ttc->rules; + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + if (!params->inner_ttc || !mlx5_tunnel_inner_ft_supported(dev)) + return 0; + + trules = ttc->tunnel_rules; + for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) { + if (!mlx5_tunnel_proto_supported_rx(dev, + ttc_tunnel_rules[tt].proto)) + continue; + trules[tt] = mlx5_generate_ttc_rule(dev, ft, + ¶ms->tunnel_dests[tt], + ttc_tunnel_rules[tt].etype, + ttc_tunnel_rules[tt].proto); + if (IS_ERR(trules[tt])) { + err = PTR_ERR(trules[tt]); + trules[tt] = NULL; + goto del_rules; + } + } + + return 0; + +del_rules: + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, + bool use_ipv) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_TTC_NUM_GROUPS, sizeof(*ttc->g), GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +static struct mlx5_flow_handle * +mlx5_generate_inner_ttc_rule(struct mlx5_core_dev *dev, + struct mlx5_flow_table *ft, + struct mlx5_flow_destination *dest, + u16 etype, u8 proto) +{ + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int err = 0; + u8 ipv; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + ipv = mlx5_etype_to_ipv(etype); + if (etype && ipv) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_version, ipv); + } + + if (proto) { + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, inner_headers.ip_protocol); + MLX5_SET(fte_match_param, spec->match_value, inner_headers.ip_protocol, proto); + } + + rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(dev, "%s: add inner TTC rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, + struct ttc_params *params, + struct mlx5_ttc_table *ttc) +{ + struct mlx5_ttc_rule *rules; + struct mlx5_flow_table *ft; + int err; + int tt; + + ft = ttc->t; + rules = ttc->rules; + + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + struct mlx5_ttc_rule *rule = &rules[tt]; + + rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, + ¶ms->dests[tt], + ttc_rules[tt].etype, + ttc_rules[tt].proto); + if (IS_ERR(rule->rule)) { + err = PTR_ERR(rule->rule); + rule->rule = NULL; + goto del_rules; + } + rule->default_dest = params->dests[tt]; + } + + return 0; + +del_rules: + + mlx5_cleanup_ttc_rules(ttc); + return err; +} + +static int mlx5_create_inner_ttc_table_groups(struct mlx5_ttc_table *ttc) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + int ix = 0; + u32 *in; + int err; + u8 *mc; + + ttc->g = kcalloc(MLX5_INNER_TTC_NUM_GROUPS, sizeof(*ttc->g), + GFP_KERNEL); + if (!ttc->g) + return -ENOMEM; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) { + kfree(ttc->g); + ttc->g = NULL; + return -ENOMEM; + } + + /* L4 Group */ + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP1_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* L3 Group */ + MLX5_SET(fte_match_param, mc, inner_headers.ip_protocol, 0); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP2_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + /* Any Group */ + memset(in, 0, inlen); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += MLX5_INNER_TTC_GROUP3_SIZE; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + kvfree(in); + return 0; + +err: + err = PTR_ERR(ttc->g[ttc->num_groups]); + ttc->g[ttc->num_groups] = NULL; + kvfree(in); + + return err; +} + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_INNER_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_inner_ttc_table_groups(ttc); + if (err) + goto destroy_ft; + + err = mlx5_generate_inner_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc) +{ + int i; + + mlx5_cleanup_ttc_rules(ttc); + for (i = ttc->num_groups - 1; i >= 0; i--) { + if (!IS_ERR_OR_NULL(ttc->g[i])) + mlx5_destroy_flow_group(ttc->g[i]); + ttc->g[i] = NULL; + } + + kfree(ttc->g); + mlx5_destroy_flow_table(ttc->t); + kvfree(ttc); +} + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params) +{ + bool match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(dev, + ft_field_support.outer_ip_version); + struct mlx5_ttc_table *ttc; + int err; + + ttc = kvzalloc(sizeof(*ttc), GFP_KERNEL); + if (!ttc) + return ERR_PTR(-ENOMEM); + + WARN_ON_ONCE(params->ft_attr.max_fte); + params->ft_attr.max_fte = MLX5_TTC_TABLE_SIZE; + ttc->t = mlx5_create_flow_table(params->ns, ¶ms->ft_attr); + if (IS_ERR(ttc->t)) { + err = PTR_ERR(ttc->t); + kvfree(ttc); + return ERR_PTR(err); + } + + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); + if (err) + goto destroy_ft; + + err = mlx5_generate_ttc_table_rules(dev, params, ttc); + if (err) + goto destroy_ft; + + return ttc; + +destroy_ft: + mlx5_destroy_ttc_table(ttc); + return ERR_PTR(err); +} + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest) +{ + return mlx5_modify_rule_destination(ttc->rules[type].rule, new_dest, + NULL); +} + +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination *dest = &ttc->rules[type].default_dest; + + WARN_ONCE(dest->type != MLX5_FLOW_DESTINATION_TYPE_TIR, + "TTC[%d] default dest is not setup yet", type); + + return *dest; +} + +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest = mlx5_ttc_get_default_dest(ttc, type); + + return mlx5_ttc_fwd_dest(ttc, type, &dest); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h new file mode 100644 index 000000000000..ce95be8f8382 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2020 Mellanox Technologies. */ + +#ifndef __ML5_FS_TTC_H__ +#define __ML5_FS_TTC_H__ + +#include <linux/mlx5/fs.h> + +enum mlx5_traffic_types { + MLX5_TT_IPV4_TCP, + MLX5_TT_IPV6_TCP, + MLX5_TT_IPV4_UDP, + MLX5_TT_IPV6_UDP, + MLX5_TT_IPV4_IPSEC_AH, + MLX5_TT_IPV6_IPSEC_AH, + MLX5_TT_IPV4_IPSEC_ESP, + MLX5_TT_IPV6_IPSEC_ESP, + MLX5_TT_IPV4, + MLX5_TT_IPV6, + MLX5_TT_ANY, + MLX5_NUM_TT, + MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY, +}; + +enum mlx5_tunnel_types { + MLX5_TT_IPV4_GRE, + MLX5_TT_IPV6_GRE, + MLX5_TT_IPV4_IPIP, + MLX5_TT_IPV6_IPIP, + MLX5_TT_IPV4_IPV6, + MLX5_TT_IPV6_IPV6, + MLX5_NUM_TUNNEL_TT, +}; + +struct mlx5_ttc_rule { + struct mlx5_flow_handle *rule; + struct mlx5_flow_destination default_dest; +}; + +struct mlx5_ttc_table; + +struct ttc_params { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table_attr ft_attr; + struct mlx5_flow_destination dests[MLX5_NUM_TT]; + bool inner_ttc; + struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; +}; + +struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); +void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc); + +struct mlx5_ttc_table *mlx5_create_inner_ttc_table(struct mlx5_core_dev *dev, + struct ttc_params *params); + +int mlx5_ttc_fwd_dest(struct mlx5_ttc_table *ttc, enum mlx5_traffic_types type, + struct mlx5_flow_destination *new_dest); +struct mlx5_flow_destination +mlx5_ttc_get_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); +int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type); + +bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); +u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt); + +#endif /* __MLX5_FS_TTC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index eb1b316560a8..6fe560307c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1179,6 +1179,7 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_ec; } + mlx5_lag_add_mdev(dev); err = mlx5_sriov_attach(dev); if (err) { mlx5_core_err(dev, "sriov init failed %d\n", err); @@ -1186,11 +1187,11 @@ static int mlx5_load(struct mlx5_core_dev *dev) } mlx5_sf_dev_table_create(dev); - mlx5_lag_add_mdev(dev); return 0; err_sriov: + mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); err_ec: mlx5_sf_hw_table_destroy(dev); @@ -1222,9 +1223,9 @@ err_irq_table: static void mlx5_unload(struct mlx5_core_dev *dev) { - mlx5_lag_remove_mdev(dev); mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); + mlx5_lag_remove_mdev(dev); mlx5_ec_cleanup(dev); mlx5_sf_hw_table_destroy(dev); mlx5_vhca_event_stop(dev); @@ -1271,7 +1272,7 @@ int mlx5_init_one(struct mlx5_core_dev *dev) set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); - err = mlx5_devlink_register(priv_to_devlink(dev), dev->device); + err = mlx5_devlink_register(priv_to_devlink(dev)); if (err) goto err_devlink_reg; @@ -1452,7 +1453,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id) struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&pdev->dev); if (!devlink) { dev_err(&pdev->dev, "devlink alloc failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 343807ac2036..14ffd74eeabe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -168,6 +168,8 @@ void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, struct net_device *netdev); void mlx5_lag_add_mdev(struct mlx5_core_dev *dev); void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev); +void mlx5_lag_disable_change(struct mlx5_core_dev *dev); +void mlx5_lag_enable_change(struct mlx5_core_dev *dev); int mlx5_events_init(struct mlx5_core_dev *dev); void mlx5_events_cleanup(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index 42c8ee03fe3e..052f48068dc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -14,7 +14,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia struct devlink *devlink; int err; - devlink = mlx5_devlink_alloc(); + devlink = mlx5_devlink_alloc(&adev->dev); if (!devlink) return -ENOMEM; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 1be048769309..720195c4be7c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -164,12 +164,12 @@ static bool mlx5_sf_is_active(const struct mlx5_sf *sf) return sf->hw_state == MLX5_VHCA_STATE_ACTIVE || sf->hw_state == MLX5_VHCA_STATE_IN_USE; } -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err = 0; @@ -248,11 +248,11 @@ out: return err; } -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack) { - struct mlx5_core_dev *dev = devlink_priv(devlink); + struct mlx5_core_dev *dev = devlink_priv(dl_port->devlink); struct mlx5_sf_table *table; struct mlx5_sf *sf; int err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 81ce13b19ee8..3a480e06ecc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -24,11 +24,11 @@ int mlx5_devlink_sf_port_new(struct devlink *devlink, unsigned int *new_port_index); int mlx5_devlink_sf_port_del(struct devlink *devlink, unsigned int port_index, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_get(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_get(struct devlink_port *dl_port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); -int mlx5_devlink_sf_port_fn_state_set(struct devlink *devlink, struct devlink_port *dl_port, +int mlx5_devlink_sf_port_fn_state_set(struct devlink_port *dl_port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); #else diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c index 12cf323a5943..754f89222858 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c @@ -790,7 +790,7 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe)); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET(cqc, cqc, uar_page, uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index e775f08fb464..f080fab3de2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1927,7 +1927,8 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, if (!reload) { alloc_size = sizeof(*mlxsw_core) + mlxsw_driver->priv_size; - devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size); + devlink = devlink_alloc(&mlxsw_devlink_ops, alloc_size, + mlxsw_bus_info->dev); if (!devlink) { err = -ENOMEM; goto err_devlink_alloc; @@ -1974,7 +1975,7 @@ __mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_emad_init; if (!reload) { - err = devlink_register(devlink, mlxsw_bus_info->dev); + err = devlink_register(devlink); if (err) goto err_devlink_register; } diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index d39ae2a6fb49..7bdbb2d09a14 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -1,6 +1,5 @@ config SPARX5_SWITCH tristate "Sparx5 switch driver" - depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c index 9d485a9d1f1f..cb68eaaac881 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_netdev.c @@ -13,19 +13,26 @@ */ #define VSTAX 73 -static void ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) +#define ifh_encode_bitfield(ifh, value, pos, _width) \ + ({ \ + u32 width = (_width); \ + \ + /* Max width is 5 bytes - 40 bits. In worst case this will + * spread over 6 bytes - 48 bits + */ \ + compiletime_assert(width <= 40, \ + "Unsupported width, must be <= 40"); \ + __ifh_encode_bitfield((ifh), (value), (pos), width); \ + }) + +static void __ifh_encode_bitfield(void *ifh, u64 value, u32 pos, u32 width) { u8 *ifh_hdr = ifh; /* Calculate the Start IFH byte position of this IFH bit position */ u32 byte = (35 - (pos / 8)); /* Calculate the Start bit position in the Start IFH byte */ u32 bit = (pos % 8); - u64 encode = GENMASK(bit + width - 1, bit) & (value << bit); - - /* Max width is 5 bytes - 40 bits. In worst case this will - * spread over 6 bytes - 48 bits - */ - compiletime_assert(width <= 40, "Unsupported width, must be <= 40"); + u64 encode = GENMASK_ULL(bit + width - 1, bit) & (value << bit); /* The b0-b7 goes into the start IFH byte */ if (encode & 0xFF) diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 4bd7e9d9ec61..aa41c9cde643 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1103,7 +1103,8 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (!np && !pdev->dev.platform_data) return -ENODEV; - devlink = devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot)); + devlink = + devlink_alloc(&ocelot_devlink_ops, sizeof(*ocelot), &pdev->dev); if (!devlink) return -ENOMEM; @@ -1187,7 +1188,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) if (err) goto out_put_ports; - err = devlink_register(devlink, ocelot->dev); + err = devlink_register(devlink); if (err) goto out_ocelot_deinit; diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c index ce3eca5d152b..d74a80f010c5 100644 --- a/drivers/net/ethernet/natsemi/jazzsonic.c +++ b/drivers/net/ethernet/natsemi/jazzsonic.c @@ -193,8 +193,6 @@ static int jazz_sonic_probe(struct platform_device *pdev) SET_NETDEV_DEV(dev, &pdev->dev); platform_set_drvdata(pdev, dev); - netdev_boot_setup_check(dev); - dev->base_addr = res->start; dev->irq = platform_get_irq(pdev, 0); err = sonic_probe1(dev); diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c index bd9d026e609d..3f982033944b 100644 --- a/drivers/net/ethernet/natsemi/natsemi.c +++ b/drivers/net/ethernet/natsemi/natsemi.c @@ -819,7 +819,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) printk(version); #endif - i = pci_enable_device(pdev); + i = pcim_enable_device(pdev); if (i) return i; /* natsemi has a non-standard PM control register @@ -852,7 +852,7 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) ioaddr = ioremap(iostart, iosize); if (!ioaddr) { i = -ENOMEM; - goto err_ioremap; + goto err_pci_request_regions; } /* Work around the dropped serial bit. */ @@ -974,9 +974,6 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent) err_register_netdev: iounmap(ioaddr); - err_ioremap: - pci_release_regions(pdev); - err_pci_request_regions: free_netdev(dev); return i; @@ -3241,7 +3238,6 @@ static void natsemi_remove1(struct pci_dev *pdev) NATSEMI_REMOVE_FILE(pdev, dspcfg_workaround); unregister_netdev (dev); - pci_release_regions (pdev); iounmap(ioaddr); free_netdev (dev); } diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 28d9e98db81a..ca4686094701 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -215,7 +215,6 @@ int xtsonic_probe(struct platform_device *pdev) lp->device = &pdev->dev; platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); - netdev_boot_setup_check(dev); dev->base_addr = resmem->start; dev->irq = resirq->start; diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index 20fb4ad29865..df4a3f3da83a 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -3512,13 +3512,13 @@ static void vxge_device_unregister(struct __vxge_hw_device *hldev) kfree(vdev->vpaths); - /* we are safe to free it now */ - free_netdev(dev); - vxge_debug_init(vdev->level_trace, "%s: ethernet device unregistered", buf); vxge_debug_entryexit(vdev->level_trace, "%s: %s:%d Exiting...", buf, __func__, __LINE__); + + /* we are safe to free it now */ + free_netdev(dev); } /* diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 742a420152b3..bb3b8a7f6c5d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -692,7 +692,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, goto err_pci_disable; } - devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf)); + devlink = devlink_alloc(&nfp_devlink_ops, sizeof(*pf), &pdev->dev); if (!devlink) { err = -ENOMEM; goto err_rel_regions; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 15078f9dc9f1..5bfa22accf2c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3281,17 +3281,12 @@ static int nfp_net_dp_swap_enable(struct nfp_net *nn, struct nfp_net_dp *dp) for (r = 0; r < nn->max_r_vecs; r++) nfp_net_vector_assign_rings(&nn->dp, &nn->r_vecs[r], r); - err = netif_set_real_num_rx_queues(nn->dp.netdev, nn->dp.num_rx_rings); + err = netif_set_real_num_queues(nn->dp.netdev, + nn->dp.num_stack_tx_rings, + nn->dp.num_rx_rings); if (err) return err; - if (nn->dp.netdev->real_num_tx_queues != nn->dp.num_stack_tx_rings) { - err = netif_set_real_num_tx_queues(nn->dp.netdev, - nn->dp.num_stack_tx_rings); - if (err) - return err; - } - return nfp_net_set_config_and_enable(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index a213784ffa54..0bf2ff5717bc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -286,6 +286,8 @@ nfp_net_get_link_ksettings(struct net_device *netdev, /* Init to unknowns */ ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); cmd->base.port = PORT_OTHER; cmd->base.speed = SPEED_UNKNOWN; cmd->base.duplex = DUPLEX_UNKNOWN; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 921db40047d7..d10a93801344 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -701,7 +701,7 @@ int nfp_net_pci_probe(struct nfp_pf *pf) if (err) goto err_unmap; - err = devlink_register(devlink, &pf->pdev->dev); + err = devlink_register(devlink); if (err) goto err_app_clean; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c index cd520e4c5522..c7d0e195d176 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_devlink.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_devlink.c @@ -64,7 +64,7 @@ struct ionic *ionic_devlink_alloc(struct device *dev) { struct devlink *dl; - dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic)); + dl = devlink_alloc(&ionic_dl_ops, sizeof(struct ionic), dev); return devlink_priv(dl); } @@ -82,7 +82,7 @@ int ionic_devlink_register(struct ionic *ionic) struct devlink_port_attrs attrs = {}; int err; - err = devlink_register(dl, ionic->dev); + err = devlink_register(dl); if (err) { dev_warn(ionic->dev, "devlink_register failed: %d\n", err); return err; diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f21f80cee188..f52c47a71f4b 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -30,7 +30,7 @@ static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = { */ }; -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode); +static void ionic_lif_rx_mode(struct ionic_lif *lif); static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr); static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr); static void ionic_link_status_check(struct ionic_lif *lif); @@ -54,7 +54,19 @@ static void ionic_dim_work(struct work_struct *work) cur_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); qcq = container_of(dim, struct ionic_qcq, dim); new_coal = ionic_coal_usec_to_hw(qcq->q.lif->ionic, cur_moder.usec); - qcq->intr.dim_coal_hw = new_coal ? new_coal : 1; + new_coal = new_coal ? new_coal : 1; + + if (qcq->intr.dim_coal_hw != new_coal) { + unsigned int qi = qcq->cq.bound_q->index; + struct ionic_lif *lif = qcq->q.lif; + + qcq->intr.dim_coal_hw = new_coal; + + ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, + lif->rxqcqs[qi]->intr.index, + qcq->intr.dim_coal_hw); + } + dim->state = DIM_START_MEASURE; } @@ -78,7 +90,7 @@ static void ionic_lif_deferred_work(struct work_struct *work) switch (w->type) { case IONIC_DW_TYPE_RX_MODE: - ionic_lif_rx_mode(lif, w->rx_mode); + ionic_lif_rx_mode(lif); break; case IONIC_DW_TYPE_RX_ADDR_ADD: ionic_lif_addr_add(lif, w->addr); @@ -1302,10 +1314,8 @@ static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr) return 0; } -static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, - bool can_sleep) +static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add) { - struct ionic_deferred_work *work; unsigned int nmfilters; unsigned int nufilters; @@ -1331,97 +1341,46 @@ static int ionic_lif_addr(struct ionic_lif *lif, const u8 *addr, bool add, lif->nucast--; } - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) - return -ENOMEM; - work->type = add ? IONIC_DW_TYPE_RX_ADDR_ADD : - IONIC_DW_TYPE_RX_ADDR_DEL; - memcpy(work->addr, addr, ETH_ALEN); - netdev_dbg(lif->netdev, "deferred: rx_filter %s %pM\n", - add ? "add" : "del", addr); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - netdev_dbg(lif->netdev, "rx_filter %s %pM\n", - add ? "add" : "del", addr); - if (add) - return ionic_lif_addr_add(lif, addr); - else - return ionic_lif_addr_del(lif, addr); - } + netdev_dbg(lif->netdev, "rx_filter %s %pM\n", + add ? "add" : "del", addr); + if (add) + return ionic_lif_addr_add(lif, addr); + else + return ionic_lif_addr_del(lif, addr); return 0; } static int ionic_addr_add(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_SLEEP); -} - -static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr) -{ - return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR, CAN_NOT_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, ADD_ADDR); } static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_SLEEP); + return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR); } -static int ionic_ndo_addr_del(struct net_device *netdev, const u8 *addr) +static void ionic_lif_rx_mode(struct ionic_lif *lif) { - return ionic_lif_addr(netdev_priv(netdev), addr, DEL_ADDR, CAN_NOT_SLEEP); -} - -static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode) -{ - struct ionic_admin_ctx ctx = { - .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), - .cmd.rx_mode_set = { - .opcode = IONIC_CMD_RX_MODE_SET, - .lif_index = cpu_to_le16(lif->index), - .rx_mode = cpu_to_le16(rx_mode), - }, - }; + struct net_device *netdev = lif->netdev; + unsigned int nfilters; + unsigned int nd_flags; char buf[128]; - int err; + u16 rx_mode; int i; #define REMAIN(__x) (sizeof(buf) - (__x)) - i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", - lif->rx_mode, rx_mode); - if (rx_mode & IONIC_RX_MODE_F_UNICAST) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); - if (rx_mode & IONIC_RX_MODE_F_MULTICAST) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); - if (rx_mode & IONIC_RX_MODE_F_BROADCAST) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); - if (rx_mode & IONIC_RX_MODE_F_PROMISC) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); - if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) - i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); - netdev_dbg(lif->netdev, "lif%d %s\n", lif->index, buf); - - err = ionic_adminq_post_wait(lif, &ctx); - if (err) - netdev_warn(lif->netdev, "set rx_mode 0x%04x failed: %d\n", - rx_mode, err); - else - lif->rx_mode = rx_mode; -} + mutex_lock(&lif->config_lock); -static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) -{ - struct ionic_lif *lif = netdev_priv(netdev); - struct ionic_deferred_work *work; - unsigned int nfilters; - unsigned int rx_mode; + /* grab the flags once for local use */ + nd_flags = netdev->flags; rx_mode = IONIC_RX_MODE_F_UNICAST; - rx_mode |= (netdev->flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; - rx_mode |= (netdev->flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; - rx_mode |= (netdev->flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; - rx_mode |= (netdev->flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; + rx_mode |= (nd_flags & IFF_MULTICAST) ? IONIC_RX_MODE_F_MULTICAST : 0; + rx_mode |= (nd_flags & IFF_BROADCAST) ? IONIC_RX_MODE_F_BROADCAST : 0; + rx_mode |= (nd_flags & IFF_PROMISC) ? IONIC_RX_MODE_F_PROMISC : 0; + rx_mode |= (nd_flags & IFF_ALLMULTI) ? IONIC_RX_MODE_F_ALLMULTI : 0; /* sync unicast addresses * next check to see if we're in an overflow state @@ -1430,49 +1389,83 @@ static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) * we remove our overflow flag and check the netdev flags * to see if we can disable NIC PROMISC */ - if (can_sleep) - __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_uc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); + __dev_uc_sync(netdev, ionic_addr_add, ionic_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_ucast_filters); if (netdev_uc_count(netdev) + 1 > nfilters) { rx_mode |= IONIC_RX_MODE_F_PROMISC; lif->uc_overflow = true; } else if (lif->uc_overflow) { lif->uc_overflow = false; - if (!(netdev->flags & IFF_PROMISC)) + if (!(nd_flags & IFF_PROMISC)) rx_mode &= ~IONIC_RX_MODE_F_PROMISC; } /* same for multicast */ - if (can_sleep) - __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); - else - __dev_mc_sync(netdev, ionic_ndo_addr_add, ionic_ndo_addr_del); + __dev_mc_sync(netdev, ionic_addr_add, ionic_addr_del); nfilters = le32_to_cpu(lif->identity->eth.max_mcast_filters); if (netdev_mc_count(netdev) > nfilters) { rx_mode |= IONIC_RX_MODE_F_ALLMULTI; lif->mc_overflow = true; } else if (lif->mc_overflow) { lif->mc_overflow = false; - if (!(netdev->flags & IFF_ALLMULTI)) + if (!(nd_flags & IFF_ALLMULTI)) rx_mode &= ~IONIC_RX_MODE_F_ALLMULTI; } + i = scnprintf(buf, sizeof(buf), "rx_mode 0x%04x -> 0x%04x:", + lif->rx_mode, rx_mode); + if (rx_mode & IONIC_RX_MODE_F_UNICAST) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_UNICAST"); + if (rx_mode & IONIC_RX_MODE_F_MULTICAST) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_MULTICAST"); + if (rx_mode & IONIC_RX_MODE_F_BROADCAST) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_BROADCAST"); + if (rx_mode & IONIC_RX_MODE_F_PROMISC) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_PROMISC"); + if (rx_mode & IONIC_RX_MODE_F_ALLMULTI) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_ALLMULTI"); + if (rx_mode & IONIC_RX_MODE_F_RDMA_SNIFFER) + i += scnprintf(&buf[i], REMAIN(i), " RX_MODE_F_RDMA_SNIFFER"); + netdev_dbg(netdev, "lif%d %s\n", lif->index, buf); + if (lif->rx_mode != rx_mode) { - if (!can_sleep) { - work = kzalloc(sizeof(*work), GFP_ATOMIC); - if (!work) { - netdev_err(lif->netdev, "rxmode change dropped\n"); - return; - } - work->type = IONIC_DW_TYPE_RX_MODE; - work->rx_mode = rx_mode; - netdev_dbg(lif->netdev, "deferred: rx_mode\n"); - ionic_lif_deferred_enqueue(&lif->deferred, work); - } else { - ionic_lif_rx_mode(lif, rx_mode); + struct ionic_admin_ctx ctx = { + .work = COMPLETION_INITIALIZER_ONSTACK(ctx.work), + .cmd.rx_mode_set = { + .opcode = IONIC_CMD_RX_MODE_SET, + .lif_index = cpu_to_le16(lif->index), + }, + }; + int err; + + ctx.cmd.rx_mode_set.rx_mode = cpu_to_le16(rx_mode); + err = ionic_adminq_post_wait(lif, &ctx); + if (err) + netdev_warn(netdev, "set rx_mode 0x%04x failed: %d\n", + rx_mode, err); + else + lif->rx_mode = rx_mode; + } + + mutex_unlock(&lif->config_lock); +} + +static void ionic_set_rx_mode(struct net_device *netdev, bool can_sleep) +{ + struct ionic_lif *lif = netdev_priv(netdev); + struct ionic_deferred_work *work; + + if (!can_sleep) { + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + netdev_err(lif->netdev, "rxmode change dropped\n"); + return; } + work->type = IONIC_DW_TYPE_RX_MODE; + netdev_dbg(lif->netdev, "deferred: rx_mode\n"); + ionic_lif_deferred_enqueue(&lif->deferred, work); + } else { + ionic_lif_rx_mode(lif); } } @@ -3074,6 +3067,7 @@ void ionic_lif_deinit(struct ionic_lif *lif) ionic_lif_qcq_deinit(lif, lif->notifyqcq); ionic_lif_qcq_deinit(lif, lif->adminqcq); + mutex_destroy(&lif->config_lock); mutex_destroy(&lif->queue_lock); ionic_lif_reset(lif); } @@ -3201,7 +3195,7 @@ static int ionic_station_set(struct ionic_lif *lif) */ if (!ether_addr_equal(ctx.comp.lif_getattr.mac, netdev->dev_addr)) - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); } else { /* Update the netdev mac with the device's mac */ memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len); @@ -3218,7 +3212,7 @@ static int ionic_station_set(struct ionic_lif *lif) netdev_dbg(lif->netdev, "adding station MAC addr %pM\n", netdev->dev_addr); - ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR, CAN_SLEEP); + ionic_lif_addr(lif, netdev->dev_addr, ADD_ADDR); return 0; } @@ -3241,6 +3235,7 @@ int ionic_lif_init(struct ionic_lif *lif) lif->hw_index = le16_to_cpu(comp.hw_index); mutex_init(&lif->queue_lock); + mutex_init(&lif->config_lock); /* now that we have the hw_index we can figure out our doorbell page */ lif->dbid_count = le32_to_cpu(lif->ionic->ident.dev.ndbpgs_per_lif); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h index 346506f01715..69ab59fedb6c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h @@ -108,7 +108,6 @@ struct ionic_deferred_work { struct list_head list; enum ionic_deferred_work_type type; union { - unsigned int rx_mode; u8 addr[ETH_ALEN]; u8 fw_status; }; @@ -179,6 +178,7 @@ struct ionic_lif { unsigned int index; unsigned int hw_index; struct mutex queue_lock; /* lock for queue structures */ + struct mutex config_lock; /* lock for config actions */ spinlock_t adminq_lock; /* lock for AdminQ operations */ struct ionic_qcq *adminqcq; struct ionic_qcq *notifyqcq; @@ -199,7 +199,7 @@ struct ionic_lif { unsigned int nrxq_descs; u32 rx_copybreak; u64 rxq_features; - unsigned int rx_mode; + u16 rx_mode; u64 hw_features; bool registered; bool mc_overflow; @@ -302,7 +302,7 @@ int ionic_lif_identify(struct ionic *ionic, u8 lif_type, int ionic_lif_size(struct ionic *ionic); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -int ionic_lif_hwstamp_replay(struct ionic_lif *lif); +void ionic_lif_hwstamp_replay(struct ionic_lif *lif); int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr); int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr); ktime_t ionic_lif_phc_ktime(struct ionic_lif *lif, u64 counter); @@ -311,10 +311,7 @@ void ionic_lif_unregister_phc(struct ionic_lif *lif); void ionic_lif_alloc_phc(struct ionic_lif *lif); void ionic_lif_free_phc(struct ionic_lif *lif); #else -static inline int ionic_lif_hwstamp_replay(struct ionic_lif *lif) -{ - return -EOPNOTSUPP; -} +static inline void ionic_lif_hwstamp_replay(struct ionic_lif *lif) {} static inline int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) { diff --git a/drivers/net/ethernet/pensando/ionic/ionic_phc.c b/drivers/net/ethernet/pensando/ionic/ionic_phc.c index 736ebc5da0f7..afc45da399d4 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_phc.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_phc.c @@ -188,6 +188,9 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) struct hwtstamp_config config; int err; + if (!lif->phc || !lif->phc->ptp) + return -EOPNOTSUPP; + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) return -EFAULT; @@ -203,15 +206,16 @@ int ionic_lif_hwstamp_set(struct ionic_lif *lif, struct ifreq *ifr) return 0; } -int ionic_lif_hwstamp_replay(struct ionic_lif *lif) +void ionic_lif_hwstamp_replay(struct ionic_lif *lif) { int err; + if (!lif->phc || !lif->phc->ptp) + return; + err = ionic_lif_hwstamp_set_ts_config(lif, NULL); if (err) netdev_info(lif->netdev, "hwstamp replay failed: %d\n", err); - - return err; } int ionic_lif_hwstamp_get(struct ionic_lif *lif, struct ifreq *ifr) diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 2ba19246d763..37c39581b659 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -269,12 +269,11 @@ static void ionic_rx_clean(struct ionic_queue *q, } } - if (likely(netdev->features & NETIF_F_RXCSUM)) { - if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC) { - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = (__force __wsum)le16_to_cpu(comp->csum); - stats->csum_complete++; - } + if (likely(netdev->features & NETIF_F_RXCSUM) && + (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_CALC)) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = (__force __wsum)le16_to_cpu(comp->csum); + stats->csum_complete++; } else { stats->csum_none++; } @@ -446,11 +445,12 @@ void ionic_rx_empty(struct ionic_queue *q) q->tail_idx = 0; } -static void ionic_dim_update(struct ionic_qcq *qcq) +static void ionic_dim_update(struct ionic_qcq *qcq, int napi_mode) { struct dim_sample dim_sample; struct ionic_lif *lif; unsigned int qi; + u64 pkts, bytes; if (!qcq->intr.dim_coal_hw) return; @@ -458,14 +458,23 @@ static void ionic_dim_update(struct ionic_qcq *qcq) lif = qcq->q.lif; qi = qcq->cq.bound_q->index; - ionic_intr_coal_init(lif->ionic->idev.intr_ctrl, - lif->rxqcqs[qi]->intr.index, - qcq->intr.dim_coal_hw); + switch (napi_mode) { + case IONIC_LIF_F_TX_DIM_INTR: + pkts = lif->txqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes; + break; + case IONIC_LIF_F_RX_DIM_INTR: + pkts = lif->rxqstats[qi].pkts; + bytes = lif->rxqstats[qi].bytes; + break; + default: + pkts = lif->txqstats[qi].pkts + lif->rxqstats[qi].pkts; + bytes = lif->txqstats[qi].bytes + lif->rxqstats[qi].bytes; + break; + } dim_update_sample(qcq->cq.bound_intr->rearm_count, - lif->txqstats[qi].pkts, - lif->txqstats[qi].bytes, - &dim_sample); + pkts, bytes, &dim_sample); net_dim(&qcq->dim, dim_sample); } @@ -486,7 +495,7 @@ int ionic_tx_napi(struct napi_struct *napi, int budget) ionic_tx_service, NULL, NULL); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_TX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -525,7 +534,7 @@ int ionic_rx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(cq->bound_q); if (work_done < budget && napi_complete_done(napi, work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, IONIC_LIF_F_RX_DIM_INTR); flags |= IONIC_INTR_CRED_UNMASK; cq->bound_intr->rearm_count++; } @@ -571,7 +580,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget) ionic_rx_fill(rxcq->bound_q); if (rx_work_done < budget && napi_complete_done(napi, rx_work_done)) { - ionic_dim_update(qcq); + ionic_dim_update(qcq, 0); flags |= IONIC_INTR_CRED_UNMASK; rxcq->bound_intr->rearm_count++; } diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index cf7f4da68e69..4c7501b9c284 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -207,14 +207,15 @@ struct devlink *qed_devlink_register(struct qed_dev *cdev) struct devlink *dl; int rc; - dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink)); + dl = devlink_alloc(&qed_dl_ops, sizeof(struct qed_devlink), + &cdev->pdev->dev); if (!dl) return ERR_PTR(-ENOMEM); qdevlink = devlink_priv(dl); qdevlink->cdev = cdev; - rc = devlink_register(dl, &cdev->pdev->dev); + rc = devlink_register(dl); if (rc) goto err_free; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 578935f643b8..ab6d4f737316 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -464,12 +464,19 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) u32 int_sts, first_drop_reason, details, address, all_drops_reason; struct qed_ptt *p_ptt = p_hwfn->p_dpc_ptt; + int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); + if (int_sts == 0xdeadbeaf) { + DP_NOTICE(p_hwfn->cdev, + "DORQ is being reset, skipping int_sts handler\n"); + + return 0; + } + /* int_sts may be zero since all PFs were interrupted for doorbell * overflow but another one already handled it. Can abort here. If * This PF also requires overflow recovery we will be interrupted again. * The masked almost full indication may also be set. Ignoring. */ - int_sts = qed_rd(p_hwfn, p_ptt, DORQ_REG_INT_STS); if (!(int_sts & ~DORQ_REG_INT_STS_DORQ_FIFO_AFULL)) return 0; @@ -528,6 +535,9 @@ static int qed_dorq_attn_int_sts(struct qed_hwfn *p_hwfn) static int qed_dorq_attn_cb(struct qed_hwfn *p_hwfn) { + if (p_hwfn->cdev->recov_in_prog) + return 0; + p_hwfn->db_recovery_info.dorq_attn = true; qed_dorq_attn_overflow(p_hwfn); diff --git a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c index a99861124630..fc8b3e64f153 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_iwarp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_iwarp.c @@ -1624,8 +1624,6 @@ qed_iwarp_get_listener(struct qed_hwfn *p_hwfn, static const u32 ip_zero[4] = { 0, 0, 0, 0 }; bool found = false; - qed_iwarp_print_cm_info(p_hwfn, cm_info); - list_for_each_entry(listener, &p_hwfn->p_rdma_info->iwarp.listen_list, list_entry) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index aa48b1b7eddc..6871d892eabf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1215,6 +1215,10 @@ static void qed_slowpath_task(struct work_struct *work) if (test_and_clear_bit(QED_SLOWPATH_PERIODIC_DB_REC, &hwfn->slowpath_task_flags)) { + /* skip qed_db_rec_handler during recovery/unload */ + if (hwfn->cdev->recov_in_prog || !hwfn->slowpath_wq_active) + goto out; + qed_db_rec_handler(hwfn, ptt); if (hwfn->periodic_db_rec_count--) qed_slowpath_delayed_work(hwfn, @@ -1222,6 +1226,7 @@ static void qed_slowpath_task(struct work_struct *work) QED_PERIODIC_DB_REC_INTERVAL); } +out: qed_ptt_release(hwfn, ptt); } diff --git a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c index c1dd71d19f3f..3b84d00cf987 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c +++ b/drivers/net/ethernet/qlogic/qed/qed_nvmetcp_fw_funcs.c @@ -4,7 +4,6 @@ #include <linux/kernel.h> #include <linux/module.h> #include <linux/pci.h> -#include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> #include <linux/types.h> diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 8693117a6180..66c69f0f9af1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -492,6 +492,7 @@ struct qede_fastpath { #define QEDE_SP_HW_ERR 4 #define QEDE_SP_ARFS_CONFIG 5 #define QEDE_SP_AER 7 +#define QEDE_SP_DISABLE 8 #ifdef CONFIG_RFS_ACCEL int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c index c59b72c90293..a2e4dfb5cb44 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_filter.c +++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c @@ -831,7 +831,7 @@ int qede_configure_vlan_filters(struct qede_dev *edev) int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qede_dev *edev = netdev_priv(dev); - struct qede_vlan *vlan = NULL; + struct qede_vlan *vlan; int rc = 0; DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid); @@ -842,7 +842,7 @@ int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) if (vlan->vid == vid) break; - if (!vlan || (vlan->vid != vid)) { + if (list_entry_is_head(vlan, &edev->vlan_list, list)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "Vlan isn't configured\n"); goto out; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 033bf2c7f56c..d400e9b235bf 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1005,6 +1005,13 @@ static void qede_sp_task(struct work_struct *work) struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work); + /* Disable execution of this deferred work once + * qede removal is in progress, this stop any future + * scheduling of sp_task. + */ + if (test_bit(QEDE_SP_DISABLE, &edev->sp_flags)) + return; + /* The locking scheme depends on the specific flag: * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to * ensure that ongoing flows are ended and new ones are not started. @@ -1292,6 +1299,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY)); if (mode != QEDE_REMOVE_RECOVERY) { + set_bit(QEDE_SP_DISABLE, &edev->sp_flags); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 2376b2729633..c00ad57575ea 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -154,7 +154,7 @@ static int ql_wait_for_drvr_lock(struct ql3_adapter *qdev) "driver lock acquired\n"); return 1; } - ssleep(1); + mdelay(1000); } while (++i < 10); netdev_err(qdev->ndev, "Timed out waiting for driver lock...\n"); @@ -3274,7 +3274,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) if ((value & ISP_CONTROL_SR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); /* @@ -3310,7 +3310,7 @@ static int ql_adapter_reset(struct ql3_adapter *qdev) ispControlStatus); if ((value & ISP_CONTROL_FSR) == 0) break; - ssleep(1); + mdelay(1000); } while ((--max_wait_time)); } if (max_wait_time == 0) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index d8f0863b3934..f6b6651decf3 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1021,7 +1021,7 @@ clear_diag_irq: static void qlcnic_create_loopback_buff(unsigned char *data, u8 mac[]) { - unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00}; + static const unsigned char random_data[] = {0xa8, 0x06, 0x45, 0x00}; memset(data, 0x4e, QLCNIC_ILB_PKT_SIZE); diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index ec6f7f993eb7..60a0c0e9ded2 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -443,7 +443,7 @@ static int sis900_probe(struct pci_dev *pci_dev, #endif /* setup various bits in PCI command register */ - ret = pci_enable_device(pci_dev); + ret = pcim_enable_device(pci_dev); if(ret) return ret; i = dma_set_mask(&pci_dev->dev, DMA_BIT_MASK(32)); @@ -469,7 +469,7 @@ static int sis900_probe(struct pci_dev *pci_dev, ioaddr = pci_iomap(pci_dev, 0, 0); if (!ioaddr) { ret = -ENOMEM; - goto err_out_cleardev; + goto err_out; } sis_priv = netdev_priv(net_dev); @@ -581,8 +581,6 @@ err_unmap_tx: sis_priv->tx_ring_dma); err_out_unmap: pci_iounmap(pci_dev, ioaddr); -err_out_cleardev: - pci_release_regions(pci_dev); err_out: free_netdev(net_dev); return ret; @@ -2499,7 +2497,6 @@ static void sis900_remove(struct pci_dev *pci_dev) sis_priv->tx_ring_dma); pci_iounmap(pci_dev, sis_priv->ioaddr); free_netdev(net_dev); - pci_release_regions(pci_dev); } static int __maybe_unused sis900_suspend(struct device *dev) diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index c52a38df0e0d..72e42a868346 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -23,6 +23,7 @@ config SMC9194 tristate "SMC 9194 support" depends on ISA select CRC32 + select NETDEV_LEGACY_INIT help This is support for the SMC9xxx based Ethernet cards. Choose this option if you have a DELL laptop with the docking station, or diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c index bf7c8c8b1350..0ce403fa5f1a 100644 --- a/drivers/net/ethernet/smsc/smc9194.c +++ b/drivers/net/ethernet/smsc/smc9194.c @@ -1508,7 +1508,7 @@ MODULE_PARM_DESC(io, "SMC 99194 I/O base address"); MODULE_PARM_DESC(irq, "SMC 99194 IRQ number"); MODULE_PARM_DESC(ifport, "SMC 99194 interface port (0-default, 1-TP, 2-AUI)"); -int __init init_module(void) +static int __init smc_init_module(void) { if (io == 0) printk(KERN_WARNING @@ -1518,13 +1518,15 @@ int __init init_module(void) devSMC9194 = smc_init(-1); return PTR_ERR_OR_ZERO(devSMC9194); } +module_init(smc_init_module); -void __exit cleanup_module(void) +static void __exit smc_cleanup_module(void) { unregister_netdev(devSMC9194); free_irq(devSMC9194->irq, devSMC9194); release_region(devSMC9194->base_addr, SMC_IO_EXTENT); free_netdev(devSMC9194); } +module_exit(smc_cleanup_module); #endif /* MODULE */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c index 28dd0ed85a82..f7dc8458cde8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c @@ -289,10 +289,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) val &= ~NSS_COMMON_GMAC_CTL_PHY_IFACE_SEL; break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val); @@ -309,10 +306,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_SRC_CTRL_OFFSET(gmac->id); break; default: - dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", - phy_modes(gmac->phy_mode)); - err = -EINVAL; - goto err_remove_config_dt; + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val); @@ -329,8 +323,7 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) NSS_COMMON_CLK_GATE_GMII_TX_EN(gmac->id); break; default: - /* We don't get here; the switch above will have errored out */ - unreachable(); + goto err_unsupported_phy; } regmap_write(gmac->nss_common, NSS_COMMON_CLK_GATE, val); @@ -361,6 +354,11 @@ static int ipq806x_gmac_probe(struct platform_device *pdev) return 0; +err_unsupported_phy: + dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n", + phy_modes(gmac->phy_mode)); + err = -EINVAL; + err_remove_config_dt: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index 67ba083eb90c..b21745368983 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -1249,6 +1249,7 @@ const struct stmmac_ops dwmac410_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, @@ -1300,6 +1301,7 @@ const struct stmmac_ops dwmac510_ops = { .config_l3_filter = dwmac4_config_l3_filter, .config_l4_filter = dwmac4_config_l4_filter, .est_configure = dwmac5_est_configure, + .est_irq_status = dwmac5_est_irq_status, .fpe_configure = dwmac5_fpe_configure, .fpe_send_mpacket = dwmac5_fpe_send_mpacket, .fpe_irq_status = dwmac5_fpe_irq_status, diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 006fd4237725..1501e8906be4 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -8191,8 +8191,9 @@ static int niu_pci_vpd_fetch(struct niu *np, u32 start) err = niu_pci_vpd_scan_props(np, here, end); if (err < 0) return err; + /* ret == 1 is not an error */ if (err == 1) - return -EINVAL; + return 0; } return 0; } @@ -9207,7 +9208,7 @@ static int niu_get_of_props(struct niu *np) else dp = pci_device_to_OF_node(np->pdev); - phy_type = of_get_property(dp, "phy-type", &prop_len); + phy_type = of_get_property(dp, "phy-type", NULL); if (!phy_type) { netdev_err(dev, "%pOF: OF node lacks phy-type property\n", dp); return -EINVAL; @@ -9241,12 +9242,12 @@ static int niu_get_of_props(struct niu *np) return -EINVAL; } - model = of_get_property(dp, "model", &prop_len); + model = of_get_property(dp, "model", NULL); if (model) strcpy(np->vpd.model, model); - if (of_find_property(dp, "hot-swappable-phy", &prop_len)) { + if (of_find_property(dp, "hot-swappable-phy", NULL)) { np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER | NIU_FLAGS_HOTPLUG_PHY); } diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 7ac8e5ecbe97..affcf92cd3aa 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -64,7 +64,6 @@ config TI_CPSW config TI_CPSW_SWITCHDEV tristate "TI CPSW Switch Support with switchdev" depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST - depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV depends on TI_CPTS || !TI_CPTS select PAGE_POOL @@ -110,7 +109,6 @@ config TI_K3_AM65_CPSW_NUSS config TI_K3_AM65_CPSW_SWITCHDEV bool "TI K3 AM654x/J721E CPSW Switch mode support" depends on TI_K3_AM65_CPSW_NUSS - depends on BRIDGE || BRIDGE=n depends on NET_SWITCHDEV help This enables switchdev support for TI K3 CPSWxG Ethernet diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 4f67d1a98c0d..130346f74ee8 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -7,7 +7,6 @@ #include <linux/clk.h> #include <linux/etherdevice.h> -#include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/interrupt.h> #include <linux/kernel.h> @@ -28,6 +27,7 @@ #include <linux/sys_soc.h> #include <linux/dma/ti-cppi5.h> #include <linux/dma/k3-udma-glue.h> +#include <net/switchdev.h> #include "cpsw_ale.h" #include "cpsw_sl.h" @@ -519,6 +519,10 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common, } napi_enable(&common->napi_rx); + if (common->rx_irq_disabled) { + common->rx_irq_disabled = false; + enable_irq(common->rx_chns.irq); + } dev_dbg(common->dev, "cpsw_nuss started\n"); return 0; @@ -872,8 +876,12 @@ static int am65_cpsw_nuss_rx_poll(struct napi_struct *napi_rx, int budget) dev_dbg(common->dev, "%s num_rx:%d %d\n", __func__, num_rx, budget); - if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) - enable_irq(common->rx_chns.irq); + if (num_rx < budget && napi_complete_done(napi_rx, num_rx)) { + if (common->rx_irq_disabled) { + common->rx_irq_disabled = false; + enable_irq(common->rx_chns.irq); + } + } return num_rx; } @@ -1078,19 +1086,20 @@ static int am65_cpsw_nuss_tx_poll(struct napi_struct *napi_tx, int budget) else num_tx = am65_cpsw_nuss_tx_compl_packets(tx_chn->common, tx_chn->id, budget); - num_tx = min(num_tx, budget); - if (num_tx < budget) { - napi_complete(napi_tx); + if (num_tx >= budget) + return budget; + + if (napi_complete_done(napi_tx, num_tx)) enable_irq(tx_chn->irq); - } - return num_tx; + return 0; } static irqreturn_t am65_cpsw_nuss_rx_irq(int irq, void *dev_id) { struct am65_cpsw_common *common = dev_id; + common->rx_irq_disabled = true; disable_irq_nosync(irq); napi_schedule(&common->napi_rx); @@ -2061,8 +2070,12 @@ static void am65_cpsw_port_offload_fwd_mark_update(struct am65_cpsw_common *comm for (i = 1; i <= common->port_num; i++) { struct am65_cpsw_port *port = am65_common_get_port(common, i); - struct am65_cpsw_ndev_priv *priv = am65_ndev_to_priv(port->ndev); + struct am65_cpsw_ndev_priv *priv; + + if (!port->ndev) + continue; + priv = am65_ndev_to_priv(port->ndev); priv->offload_fwd_mark = set_val; } } @@ -2409,14 +2422,14 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) int i; common->devlink = - devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv)); + devlink_alloc(&am65_cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!common->devlink) return -ENOMEM; dl_priv = devlink_priv(common->devlink); dl_priv->common = common; - ret = devlink_register(common->devlink, dev); + ret = devlink_register(common->devlink); if (ret) { dev_err(dev, "devlink reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 5d93e346f05e..048ed10143c1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -126,6 +126,8 @@ struct am65_cpsw_common { struct am65_cpsw_rx_chn rx_chns; struct napi_struct napi_rx; + bool rx_irq_disabled; + u32 nuss_ver; u32 cpsw_ver; unsigned long bus_freq; diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index abf9a2a6f7eb..9f70e40779f6 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -431,7 +431,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -905,7 +905,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct cpdma_chan *txch; int ret, q_idx; - if (skb_padto(skb, CPSW_MIN_PACKET_SIZE)) { + if (skb_put_padto(skb, CPSW_MIN_PACKET_SIZE)) { cpsw_err(priv, tx_err, "packet pad failed\n"); ndev->stats.tx_dropped++; return NET_XMIT_DROP; diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c index b4f55ff4e84f..ff3a96b084ee 100644 --- a/drivers/net/ethernet/ti/cpsw_new.c +++ b/drivers/net/ethernet/ti/cpsw_new.c @@ -11,7 +11,6 @@ #include <linux/module.h> #include <linux/irqreturn.h> #include <linux/interrupt.h> -#include <linux/if_bridge.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/net_tstamp.h> @@ -29,6 +28,7 @@ #include <linux/kmemleak.h> #include <linux/sys_soc.h> +#include <net/switchdev.h> #include <net/page_pool.h> #include <net/pkt_cls.h> #include <net/devlink.h> @@ -375,7 +375,7 @@ static void cpsw_rx_handler(void *token, int len, int status) skb->protocol = eth_type_trans(skb, ndev); /* mark skb for recycling */ - skb_mark_for_recycle(skb, page, pool); + skb_mark_for_recycle(skb); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -1800,14 +1800,14 @@ static int cpsw_register_devlink(struct cpsw_common *cpsw) struct cpsw_devlink *dl_priv; int ret = 0; - cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv)); + cpsw->devlink = devlink_alloc(&cpsw_devlink_ops, sizeof(*dl_priv), dev); if (!cpsw->devlink) return -ENOMEM; dl_priv = devlink_priv(cpsw->devlink); dl_priv->cpsw = cpsw; - ret = devlink_register(cpsw->devlink, dev); + ret = devlink_register(cpsw->devlink); if (ret) { dev_err(dev, "DL reg fail ret:%d\n", ret); goto dl_free; diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 637796670746..b1c5cbe7478b 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -943,7 +943,7 @@ static int emac_dev_xmit(struct sk_buff *skb, struct net_device *ndev) goto fail_tx; } - ret_code = skb_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE); + ret_code = skb_put_padto(skb, EMAC_DEF_MIN_ETHPKTSIZE); if (unlikely(ret_code < 0)) { if (netif_msg_tx_err(priv) && net_ratelimit()) dev_err(emac_dev, "DaVinci EMAC: packet pad failed"); diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c index 3de67ba066a6..a2fcdb1abdb9 100644 --- a/drivers/net/ipa/gsi.c +++ b/drivers/net/ipa/gsi.c @@ -198,77 +198,6 @@ static void gsi_irq_type_disable(struct gsi *gsi, enum gsi_irq_type_id type_id) gsi_irq_type_update(gsi, gsi->type_enabled_bitmap & ~BIT(type_id)); } -/* Turn off all GSI interrupts initially; there is no gsi_irq_teardown() */ -static void gsi_irq_setup(struct gsi *gsi) -{ - /* Disable all interrupt types */ - gsi_irq_type_update(gsi, 0); - - /* Clear all type-specific interrupt masks */ - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); - iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); - - /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */ - if (gsi->version > IPA_VERSION_3_1) { - u32 offset; - - /* These registers are in the non-adjusted address range */ - offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET; - iowrite32(0, gsi->virt_raw + offset); - offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET; - iowrite32(0, gsi->virt_raw + offset); - } - - iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); -} - -/* Get # supported channel and event rings; there is no gsi_ring_teardown() */ -static int gsi_ring_setup(struct gsi *gsi) -{ - struct device *dev = gsi->dev; - u32 count; - u32 val; - - if (gsi->version < IPA_VERSION_3_5_1) { - /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */ - gsi->channel_count = GSI_CHANNEL_COUNT_MAX; - gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX; - - return 0; - } - - val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); - - count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); - if (!count) { - dev_err(dev, "GSI reports zero channels supported\n"); - return -EINVAL; - } - if (count > GSI_CHANNEL_COUNT_MAX) { - dev_warn(dev, "limiting to %u channels; hardware supports %u\n", - GSI_CHANNEL_COUNT_MAX, count); - count = GSI_CHANNEL_COUNT_MAX; - } - gsi->channel_count = count; - - count = u32_get_bits(val, NUM_EV_PER_EE_FMASK); - if (!count) { - dev_err(dev, "GSI reports zero event rings supported\n"); - return -EINVAL; - } - if (count > GSI_EVT_RING_COUNT_MAX) { - dev_warn(dev, - "limiting to %u event rings; hardware supports %u\n", - GSI_EVT_RING_COUNT_MAX, count); - count = GSI_EVT_RING_COUNT_MAX; - } - gsi->evt_ring_count = count; - - return 0; -} - /* Event ring commands are performed one at a time. Their completion * is signaled by the event ring control GSI interrupt type, which is * only enabled when we issue an event ring command. Only the event @@ -920,12 +849,13 @@ static void gsi_channel_program(struct gsi_channel *channel, bool doorbell) /* All done! */ } -static int __gsi_channel_start(struct gsi_channel *channel, bool start) +static int __gsi_channel_start(struct gsi_channel *channel, bool resume) { struct gsi *gsi = channel->gsi; int ret; - if (!start) + /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */ + if (resume && gsi->version < IPA_VERSION_4_0) return 0; mutex_lock(&gsi->mutex); @@ -947,7 +877,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id) napi_enable(&channel->napi); gsi_irq_ieob_enable_one(gsi, channel->evt_ring_id); - ret = __gsi_channel_start(channel, true); + ret = __gsi_channel_start(channel, false); if (ret) { gsi_irq_ieob_disable_one(gsi, channel->evt_ring_id); napi_disable(&channel->napi); @@ -971,7 +901,7 @@ static int gsi_channel_stop_retry(struct gsi_channel *channel) return ret; } -static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) +static int __gsi_channel_stop(struct gsi_channel *channel, bool suspend) { struct gsi *gsi = channel->gsi; int ret; @@ -979,7 +909,8 @@ static int __gsi_channel_stop(struct gsi_channel *channel, bool stop) /* Wait for any underway transactions to complete before stopping. */ gsi_channel_trans_quiesce(channel); - if (!stop) + /* Prior to IPA v4.0 suspend/resume is not implemented by GSI */ + if (suspend && gsi->version < IPA_VERSION_4_0) return 0; mutex_lock(&gsi->mutex); @@ -997,7 +928,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id) struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - ret = __gsi_channel_stop(channel, true); + ret = __gsi_channel_stop(channel, false); if (ret) return ret; @@ -1026,13 +957,13 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell) mutex_unlock(&gsi->mutex); } -/* Stop a STARTED channel for suspend (using stop if requested) */ -int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) +/* Stop a started channel for suspend */ +int gsi_channel_suspend(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; int ret; - ret = __gsi_channel_stop(channel, stop); + ret = __gsi_channel_stop(channel, true); if (ret) return ret; @@ -1042,12 +973,24 @@ int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop) return 0; } -/* Resume a suspended channel (starting will be requested if STOPPED) */ -int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start) +/* Resume a suspended channel (starting if stopped) */ +int gsi_channel_resume(struct gsi *gsi, u32 channel_id) { struct gsi_channel *channel = &gsi->channel[channel_id]; - return __gsi_channel_start(channel, start); + return __gsi_channel_start(channel, true); +} + +/* Prevent all GSI interrupts while suspended */ +void gsi_suspend(struct gsi *gsi) +{ + disable_irq(gsi->irq); +} + +/* Allow all GSI interrupts again when resuming */ +void gsi_resume(struct gsi *gsi) +{ + enable_irq(gsi->irq); } /** @@ -1372,33 +1315,20 @@ static irqreturn_t gsi_isr(int irq, void *dev_id) return IRQ_HANDLED; } +/* Init function for GSI IRQ lookup; there is no gsi_irq_exit() */ static int gsi_irq_init(struct gsi *gsi, struct platform_device *pdev) { - struct device *dev = &pdev->dev; - unsigned int irq; int ret; ret = platform_get_irq_byname(pdev, "gsi"); if (ret <= 0) return ret ? : -EINVAL; - irq = ret; - - ret = request_irq(irq, gsi_isr, 0, "gsi", gsi); - if (ret) { - dev_err(dev, "error %d requesting \"gsi\" IRQ\n", ret); - return ret; - } - gsi->irq = irq; + gsi->irq = ret; return 0; } -static void gsi_irq_exit(struct gsi *gsi) -{ - free_irq(gsi->irq, gsi); -} - /* Return the transaction associated with a transfer completion event */ static struct gsi_trans *gsi_event_trans(struct gsi_channel *channel, struct gsi_event *event) @@ -1876,6 +1806,93 @@ static void gsi_channel_teardown(struct gsi *gsi) gsi_irq_disable(gsi); } +/* Turn off all GSI interrupts initially */ +static int gsi_irq_setup(struct gsi *gsi) +{ + int ret; + + /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ + iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); + + /* Disable all interrupt types */ + gsi_irq_type_update(gsi, 0); + + /* Clear all type-specific interrupt masks */ + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_EV_CH_IRQ_MSK_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_GLOB_IRQ_EN_OFFSET); + iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET); + + /* The inter-EE interrupts are not supported for IPA v3.0-v3.1 */ + if (gsi->version > IPA_VERSION_3_1) { + u32 offset; + + /* These registers are in the non-adjusted address range */ + offset = GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET; + iowrite32(0, gsi->virt_raw + offset); + offset = GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET; + iowrite32(0, gsi->virt_raw + offset); + } + + iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET); + + ret = request_irq(gsi->irq, gsi_isr, 0, "gsi", gsi); + if (ret) + dev_err(gsi->dev, "error %d requesting \"gsi\" IRQ\n", ret); + + return ret; +} + +static void gsi_irq_teardown(struct gsi *gsi) +{ + free_irq(gsi->irq, gsi); +} + +/* Get # supported channel and event rings; there is no gsi_ring_teardown() */ +static int gsi_ring_setup(struct gsi *gsi) +{ + struct device *dev = gsi->dev; + u32 count; + u32 val; + + if (gsi->version < IPA_VERSION_3_5_1) { + /* No HW_PARAM_2 register prior to IPA v3.5.1, assume the max */ + gsi->channel_count = GSI_CHANNEL_COUNT_MAX; + gsi->evt_ring_count = GSI_EVT_RING_COUNT_MAX; + + return 0; + } + + val = ioread32(gsi->virt + GSI_GSI_HW_PARAM_2_OFFSET); + + count = u32_get_bits(val, NUM_CH_PER_EE_FMASK); + if (!count) { + dev_err(dev, "GSI reports zero channels supported\n"); + return -EINVAL; + } + if (count > GSI_CHANNEL_COUNT_MAX) { + dev_warn(dev, "limiting to %u channels; hardware supports %u\n", + GSI_CHANNEL_COUNT_MAX, count); + count = GSI_CHANNEL_COUNT_MAX; + } + gsi->channel_count = count; + + count = u32_get_bits(val, NUM_EV_PER_EE_FMASK); + if (!count) { + dev_err(dev, "GSI reports zero event rings supported\n"); + return -EINVAL; + } + if (count > GSI_EVT_RING_COUNT_MAX) { + dev_warn(dev, + "limiting to %u event rings; hardware supports %u\n", + GSI_EVT_RING_COUNT_MAX, count); + count = GSI_EVT_RING_COUNT_MAX; + } + gsi->evt_ring_count = count; + + return 0; +} + /* Setup function for GSI. GSI firmware must be loaded and initialized */ int gsi_setup(struct gsi *gsi) { @@ -1889,25 +1906,34 @@ int gsi_setup(struct gsi *gsi) return -EIO; } - gsi_irq_setup(gsi); /* No matching teardown required */ + ret = gsi_irq_setup(gsi); + if (ret) + return ret; ret = gsi_ring_setup(gsi); /* No matching teardown required */ if (ret) - return ret; + goto err_irq_teardown; /* Initialize the error log */ iowrite32(0, gsi->virt + GSI_ERROR_LOG_OFFSET); - /* Writing 1 indicates IRQ interrupts; 0 would be MSI */ - iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET); + ret = gsi_channel_setup(gsi); + if (ret) + goto err_irq_teardown; - return gsi_channel_setup(gsi); + return 0; + +err_irq_teardown: + gsi_irq_teardown(gsi); + + return ret; } /* Inverse of gsi_setup() */ void gsi_teardown(struct gsi *gsi) { gsi_channel_teardown(gsi); + gsi_irq_teardown(gsi); } /* Initialize a channel's event ring */ @@ -2204,20 +2230,18 @@ int gsi_init(struct gsi *gsi, struct platform_device *pdev, init_completion(&gsi->completion); - ret = gsi_irq_init(gsi, pdev); + ret = gsi_irq_init(gsi, pdev); /* No matching exit required */ if (ret) goto err_iounmap; ret = gsi_channel_init(gsi, count, data); if (ret) - goto err_irq_exit; + goto err_iounmap; mutex_init(&gsi->mutex); return 0; -err_irq_exit: - gsi_irq_exit(gsi); err_iounmap: iounmap(gsi->virt_raw); @@ -2229,7 +2253,6 @@ void gsi_exit(struct gsi *gsi) { mutex_destroy(&gsi->mutex); gsi_channel_exit(gsi); - gsi_irq_exit(gsi); iounmap(gsi->virt_raw); } diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h index 81cd7b07f6e1..88b80dc3db79 100644 --- a/drivers/net/ipa/gsi.h +++ b/drivers/net/ipa/gsi.h @@ -232,8 +232,35 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id); */ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool doorbell); -int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop); -int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start); +/** + * gsi_suspend() - Prepare the GSI subsystem for suspend + * @gsi: GSI pointer + */ +void gsi_suspend(struct gsi *gsi); + +/** + * gsi_resume() - Resume the GSI subsystem following suspend + * @gsi: GSI pointer + */ +void gsi_resume(struct gsi *gsi); + +/** + * gsi_channel_suspend() - Suspend a GSI channel + * @gsi: GSI pointer + * @channel_id: Channel to suspend + * + * For IPA v4.0+, suspend is implemented by stopping the channel. + */ +int gsi_channel_suspend(struct gsi *gsi, u32 channel_id); + +/** + * gsi_channel_resume() - Resume a suspended GSI channel + * @gsi: GSI pointer + * @channel_id: Channel to resume + * + * For IPA v4.0+, the stopped channel is started again. + */ +int gsi_channel_resume(struct gsi *gsi, u32 channel_id); /** * gsi_init() - Initialize the GSI subsystem diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index 71ba996096bb..34152fe02963 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -28,19 +28,8 @@ struct ipa_smp2p; struct ipa_interrupt; /** - * enum ipa_flag - IPA state flags - * @IPA_FLAG_RESUMED: Whether resume from suspend has been signaled - * @IPA_FLAG_COUNT: Number of defined IPA flags - */ -enum ipa_flag { - IPA_FLAG_RESUMED, - IPA_FLAG_COUNT, /* Last; not a flag */ -}; - -/** * struct ipa - IPA information * @gsi: Embedded GSI structure - * @flags: Boolean state flags * @version: IPA hardware version * @pdev: Platform device * @completion: Used to signal pipeline clear transfer complete @@ -83,7 +72,6 @@ enum ipa_flag { */ struct ipa { struct gsi gsi; - DECLARE_BITMAP(flags, IPA_FLAG_COUNT); enum ipa_version version; struct platform_device *pdev; struct completion completion; diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c index 69ef6ea41e61..a67b6136e3c0 100644 --- a/drivers/net/ipa/ipa_clock.c +++ b/drivers/net/ipa/ipa_clock.c @@ -9,9 +9,12 @@ #include <linux/clk.h> #include <linux/device.h> #include <linux/interconnect.h> +#include <linux/pm.h> +#include <linux/bitops.h> #include "ipa.h" #include "ipa_clock.h" +#include "ipa_endpoint.h" #include "ipa_modem.h" #include "ipa_data.h" @@ -43,10 +46,21 @@ struct ipa_interconnect { }; /** + * enum ipa_power_flag - IPA power flags + * @IPA_POWER_FLAG_RESUMED: Whether resume from suspend has been signaled + * @IPA_POWER_FLAG_COUNT: Number of defined power flags + */ +enum ipa_power_flag { + IPA_POWER_FLAG_RESUMED, + IPA_POWER_FLAG_COUNT, /* Last; not a flag */ +}; + +/** * struct ipa_clock - IPA clocking information * @count: Clocking reference count * @mutex: Protects clock enable/disable * @core: IPA core clock + * @flags: Boolean state flags * @interconnect_count: Number of elements in interconnect[] * @interconnect: Interconnect array */ @@ -54,6 +68,7 @@ struct ipa_clock { refcount_t count; struct mutex mutex; /* protects clock enable/disable */ struct clk *core; + DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; struct ipa_interconnect *interconnect; }; @@ -144,8 +159,12 @@ static int ipa_interconnect_enable(struct ipa *ipa) ret = icc_set_bw(interconnect->path, interconnect->average_bandwidth, interconnect->peak_bandwidth); - if (ret) + if (ret) { + dev_err(&ipa->pdev->dev, + "error %d enabling %s interconnect\n", + ret, icc_get_name(interconnect->path)); goto out_unwind; + } interconnect++; } @@ -159,10 +178,11 @@ out_unwind: } /* To disable an interconnect, we just its bandwidth to 0 */ -static void ipa_interconnect_disable(struct ipa *ipa) +static int ipa_interconnect_disable(struct ipa *ipa) { struct ipa_interconnect *interconnect; struct ipa_clock *clock = ipa->clock; + struct device *dev = &ipa->pdev->dev; int result = 0; u32 count; int ret; @@ -172,13 +192,16 @@ static void ipa_interconnect_disable(struct ipa *ipa) while (count--) { interconnect--; ret = icc_set_bw(interconnect->path, 0, 0); - if (ret && !result) - result = ret; + if (ret) { + dev_err(dev, "error %d disabling %s interconnect\n", + ret, icc_get_name(interconnect->path)); + /* Try to disable all; record only the first error */ + if (!result) + result = ret; + } } - if (result) - dev_err(&ipa->pdev->dev, - "error %d disabling IPA interconnects\n", ret); + return result; } /* Turn on IPA clocks, including interconnects */ @@ -191,8 +214,10 @@ static int ipa_clock_enable(struct ipa *ipa) return ret; ret = clk_prepare_enable(ipa->clock->core); - if (ret) - ipa_interconnect_disable(ipa); + if (ret) { + dev_err(&ipa->pdev->dev, "error %d enabling core clock\n", ret); + (void)ipa_interconnect_disable(ipa); + } return ret; } @@ -201,7 +226,7 @@ static int ipa_clock_enable(struct ipa *ipa) static void ipa_clock_disable(struct ipa *ipa) { clk_disable_unprepare(ipa->clock->core); - ipa_interconnect_disable(ipa); + (void)ipa_interconnect_disable(ipa); } /* Get an IPA clock reference, but only if the reference count is @@ -238,13 +263,8 @@ void ipa_clock_get(struct ipa *ipa) goto out_mutex_unlock; ret = ipa_clock_enable(ipa); - if (ret) { - dev_err(&ipa->pdev->dev, "error %d enabling IPA clock\n", ret); - goto out_mutex_unlock; - } - - refcount_set(&clock->count, 1); - + if (!ret) + refcount_set(&clock->count, 1); out_mutex_unlock: mutex_unlock(&clock->mutex); } @@ -271,6 +291,40 @@ u32 ipa_clock_rate(struct ipa *ipa) return ipa->clock ? (u32)clk_get_rate(ipa->clock->core) : 0; } +/** + * ipa_suspend_handler() - Handle the suspend IPA interrupt + * @ipa: IPA pointer + * @irq_id: IPA interrupt type (unused) + * + * If an RX endpoint is suspended, and the IPA has a packet destined for + * that endpoint, the IPA generates a SUSPEND interrupt to inform the AP + * that it should resume the endpoint. If we get one of these interrupts + * we just wake up the system. + */ +static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) +{ + /* Just report the event, and let system resume handle the rest. + * More than one endpoint could signal this; if so, ignore + * all but the first. + */ + if (!test_and_set_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags)) + pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); + + /* Acknowledge/clear the suspend interrupt on all endpoints */ + ipa_interrupt_suspend_clear_all(ipa->interrupt); +} + +void ipa_power_setup(struct ipa *ipa) +{ + ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, + ipa_suspend_handler); +} + +void ipa_power_teardown(struct ipa *ipa) +{ + ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); +} + /* Initialize IPA clocking */ struct ipa_clock * ipa_clock_init(struct device *dev, const struct ipa_clock_data *data) @@ -329,3 +383,62 @@ void ipa_clock_exit(struct ipa_clock *clock) kfree(clock); clk_put(clk); } + +/** + * ipa_suspend() - Power management system suspend callback + * @dev: IPA device structure + * + * Return: Always returns zero + * + * Called by the PM framework when a system suspend operation is invoked. + * Suspends endpoints and releases the clock reference held to keep + * the IPA clock running until this point. + */ +static int ipa_suspend(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + __clear_bit(IPA_POWER_FLAG_RESUMED, ipa->clock->flags); + ipa_endpoint_suspend(ipa); + gsi_suspend(&ipa->gsi); + } + + ipa_clock_put(ipa); + + return 0; +} + +/** + * ipa_resume() - Power management system resume callback + * @dev: IPA device structure + * + * Return: Always returns 0 + * + * Called by the PM framework when a system resume operation is invoked. + * Takes an IPA clock reference to keep the clock running until suspend, + * and resumes endpoints. + */ +static int ipa_resume(struct device *dev) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + /* This clock reference will keep the IPA out of suspend + * until we get a power management suspend request. + */ + ipa_clock_get(ipa); + + /* Endpoints aren't usable until setup is complete */ + if (ipa->setup_complete) { + gsi_resume(&ipa->gsi); + ipa_endpoint_resume(ipa); + } + + return 0; +} + +const struct dev_pm_ops ipa_pm_ops = { + .suspend = ipa_suspend, + .resume = ipa_resume, +}; diff --git a/drivers/net/ipa/ipa_clock.h b/drivers/net/ipa/ipa_clock.h index 1fe634760e59..2a0f7ff3c9e6 100644 --- a/drivers/net/ipa/ipa_clock.h +++ b/drivers/net/ipa/ipa_clock.h @@ -11,6 +11,9 @@ struct device; struct ipa; struct ipa_clock_data; +/* IPA device power management function block */ +extern const struct dev_pm_ops ipa_pm_ops; + /** * ipa_clock_rate() - Return the current IPA core clock rate * @ipa: IPA structure @@ -20,6 +23,18 @@ struct ipa_clock_data; u32 ipa_clock_rate(struct ipa *ipa); /** + * ipa_power_setup() - Set up IPA power management + * @ipa: IPA pointer + */ +void ipa_power_setup(struct ipa *ipa); + +/** + * ipa_power_teardown() - Inverse of ipa_power_setup() + * @ipa: IPA pointer + */ +void ipa_power_teardown(struct ipa *ipa); + +/** * ipa_clock_init() - Initialize IPA clocking * @dev: IPA device * @data: Clock configuration data diff --git a/drivers/net/ipa/ipa_data-v4.9.c b/drivers/net/ipa/ipa_data-v4.9.c index 6ab928266b5c..8d83e14819e2 100644 --- a/drivers/net/ipa/ipa_data-v4.9.c +++ b/drivers/net/ipa/ipa_data-v4.9.c @@ -418,18 +418,13 @@ static const struct ipa_mem_data ipa_mem_data = { /* Interconnect rates are in 1000 byte/second units */ static const struct ipa_interconnect_data ipa_interconnect_data[] = { { - .name = "ipa_to_llcc", + .name = "memory", .peak_bandwidth = 600000, /* 600 MBps */ .average_bandwidth = 150000, /* 150 MBps */ }, - { - .name = "llcc_to_ebi1", - .peak_bandwidth = 1804000, /* 1.804 GBps */ - .average_bandwidth = 150000, /* 150 MBps */ - }, /* Average rate is unused for the next interconnect */ { - .name = "appss_to_ipa", + .name = "config", .peak_bandwidth = 74000, /* 74 MBps */ .average_bandwidth = 0, /* unused */ }, diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index 8070d1a1d5df..08ee37ae2881 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -1587,7 +1587,6 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) { struct device *dev = &endpoint->ipa->pdev->dev; struct gsi *gsi = &endpoint->ipa->gsi; - bool stop_channel; int ret; if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id))) @@ -1598,11 +1597,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint) (void)ipa_endpoint_program_suspend(endpoint, true); } - /* Starting with IPA v4.0, endpoints are suspended by stopping the - * underlying GSI channel rather than using endpoint suspend mode. - */ - stop_channel = endpoint->ipa->version >= IPA_VERSION_4_0; - ret = gsi_channel_suspend(gsi, endpoint->channel_id, stop_channel); + ret = gsi_channel_suspend(gsi, endpoint->channel_id); if (ret) dev_err(dev, "error %d suspending channel %u\n", ret, endpoint->channel_id); @@ -1612,7 +1607,6 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) { struct device *dev = &endpoint->ipa->pdev->dev; struct gsi *gsi = &endpoint->ipa->gsi; - bool start_channel; int ret; if (!(endpoint->ipa->enabled & BIT(endpoint->endpoint_id))) @@ -1621,11 +1615,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint) if (!endpoint->toward_ipa) (void)ipa_endpoint_program_suspend(endpoint, false); - /* Starting with IPA v4.0, the underlying GSI channel must be - * restarted for resume. - */ - start_channel = endpoint->ipa->version >= IPA_VERSION_4_0; - ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel); + ret = gsi_channel_resume(gsi, endpoint->channel_id); if (ret) dev_err(dev, "error %d resuming channel %u\n", ret, endpoint->channel_id); diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 2e728d4914c8..25bbb456e007 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -80,29 +80,6 @@ #define IPA_XO_CLOCK_DIVIDER 192 /* 1 is subtracted where used */ /** - * ipa_suspend_handler() - Handle the suspend IPA interrupt - * @ipa: IPA pointer - * @irq_id: IPA interrupt type (unused) - * - * If an RX endpoint is in suspend state, and the IPA has a packet - * destined for that endpoint, the IPA generates a SUSPEND interrupt - * to inform the AP that it should resume the endpoint. If we get - * one of these interrupts we just resume everything. - */ -static void ipa_suspend_handler(struct ipa *ipa, enum ipa_irq_id irq_id) -{ - /* Just report the event, and let system resume handle the rest. - * More than one endpoint could signal this; if so, ignore - * all but the first. - */ - if (!test_and_set_bit(IPA_FLAG_RESUMED, ipa->flags)) - pm_wakeup_dev_event(&ipa->pdev->dev, 0, true); - - /* Acknowledge/clear the suspend interrupt on all endpoints */ - ipa_interrupt_suspend_clear_all(ipa->interrupt); -} - -/** * ipa_setup() - Set up IPA hardware * @ipa: IPA pointer * @@ -124,12 +101,11 @@ int ipa_setup(struct ipa *ipa) if (ret) return ret; - ipa_interrupt_add(ipa->interrupt, IPA_IRQ_TX_SUSPEND, - ipa_suspend_handler); + ipa_power_setup(ipa); ret = device_init_wakeup(dev, true); if (ret) - goto err_interrupt_remove; + goto err_gsi_teardown; ipa_endpoint_setup(ipa); @@ -177,9 +153,9 @@ err_command_disable: ipa_endpoint_disable_one(command_endpoint); err_endpoint_teardown: ipa_endpoint_teardown(ipa); + ipa_power_teardown(ipa); (void)device_init_wakeup(dev, false); -err_interrupt_remove: - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); +err_gsi_teardown: gsi_teardown(&ipa->gsi); return ret; @@ -204,8 +180,8 @@ static void ipa_teardown(struct ipa *ipa) command_endpoint = ipa->name_map[IPA_ENDPOINT_AP_COMMAND_TX]; ipa_endpoint_disable_one(command_endpoint); ipa_endpoint_teardown(ipa); + ipa_power_teardown(ipa); (void)device_init_wakeup(&ipa->pdev->dev, false); - ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_TX_SUSPEND); gsi_teardown(&ipa->gsi); } @@ -474,7 +450,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) ret = ipa_endpoint_config(ipa); if (ret) - goto err_interrupt_deconfig; + goto err_uc_deconfig; ipa_table_config(ipa); /* No deconfig required */ @@ -491,7 +467,7 @@ static int ipa_config(struct ipa *ipa, const struct ipa_data *data) err_endpoint_deconfig: ipa_endpoint_deconfig(ipa); -err_interrupt_deconfig: +err_uc_deconfig: ipa_uc_deconfig(ipa); ipa_interrupt_deconfig(ipa->interrupt); ipa->interrupt = NULL; @@ -874,62 +850,6 @@ static void ipa_shutdown(struct platform_device *pdev) dev_err(&pdev->dev, "shutdown: remove returned %d\n", ret); } -/** - * ipa_suspend() - Power management system suspend callback - * @dev: IPA device structure - * - * Return: Always returns zero - * - * Called by the PM framework when a system suspend operation is invoked. - * Suspends endpoints and releases the clock reference held to keep - * the IPA clock running until this point. - */ -static int ipa_suspend(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* Endpoints aren't usable until setup is complete */ - if (ipa->setup_complete) { - __clear_bit(IPA_FLAG_RESUMED, ipa->flags); - ipa_endpoint_suspend(ipa); - } - - ipa_clock_put(ipa); - - return 0; -} - -/** - * ipa_resume() - Power management system resume callback - * @dev: IPA device structure - * - * Return: Always returns 0 - * - * Called by the PM framework when a system resume operation is invoked. - * Takes an IPA clock reference to keep the clock running until suspend, - * and resumes endpoints. - */ -static int ipa_resume(struct device *dev) -{ - struct ipa *ipa = dev_get_drvdata(dev); - - /* This clock reference will keep the IPA out of suspend - * until we get a power management suspend request. - */ - ipa_clock_get(ipa); - - /* Endpoints aren't usable until setup is complete */ - if (ipa->setup_complete) - ipa_endpoint_resume(ipa); - - return 0; -} - -static const struct dev_pm_ops ipa_pm_ops = { - .suspend = ipa_suspend, - .resume = ipa_resume, -}; - static const struct attribute_group *ipa_attribute_groups[] = { &ipa_attribute_group, &ipa_feature_attribute_group, diff --git a/drivers/net/ipa/ipa_modem.c b/drivers/net/ipa/ipa_modem.c index 4ea8287e9d23..ad4019e8016e 100644 --- a/drivers/net/ipa/ipa_modem.c +++ b/drivers/net/ipa/ipa_modem.c @@ -178,6 +178,9 @@ void ipa_modem_suspend(struct net_device *netdev) struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + if (!(netdev->flags & IFF_UP)) + return; + netif_stop_queue(netdev); ipa_endpoint_suspend_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); @@ -194,6 +197,9 @@ void ipa_modem_resume(struct net_device *netdev) struct ipa_priv *priv = netdev_priv(netdev); struct ipa *ipa = priv->ipa; + if (!(netdev->flags & IFF_UP)) + return; + ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]); ipa_endpoint_resume_one(ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]); @@ -225,13 +231,15 @@ int ipa_modem_start(struct ipa *ipa) SET_NETDEV_DEV(netdev, &ipa->pdev->dev); priv = netdev_priv(netdev); priv->ipa = ipa; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; + ipa->modem_netdev = netdev; ret = register_netdev(netdev); - if (!ret) { - ipa->modem_netdev = netdev; - ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = netdev; - ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = netdev; - } else { + if (ret) { + ipa->modem_netdev = NULL; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; + ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; free_netdev(netdev); } @@ -265,13 +273,15 @@ int ipa_modem_stop(struct ipa *ipa) /* Prevent the modem from triggering a call to ipa_setup() */ ipa_smp2p_disable(ipa); - /* Stop the queue and disable the endpoints if it's open */ + /* Clean up the netdev and endpoints if it was started */ if (netdev) { - (void)ipa_stop(netdev); + /* If it was opened, stop it first */ + if (netdev->flags & IFF_UP) + (void)ipa_stop(netdev); + unregister_netdev(netdev); + ipa->modem_netdev = NULL; ipa->name_map[IPA_ENDPOINT_AP_MODEM_RX]->netdev = NULL; ipa->name_map[IPA_ENDPOINT_AP_MODEM_TX]->netdev = NULL; - ipa->modem_netdev = NULL; - unregister_netdev(netdev); free_netdev(netdev); } diff --git a/drivers/net/mhi/Makefile b/drivers/net/mhi/Makefile deleted file mode 100644 index f71b9f8f3c4f..000000000000 --- a/drivers/net/mhi/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_MHI_NET) += mhi_net.o - -mhi_net-y := net.o proto_mbim.o diff --git a/drivers/net/mhi/mhi.h b/drivers/net/mhi/mhi.h deleted file mode 100644 index 1d0c499d27a3..000000000000 --- a/drivers/net/mhi/mhi.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* MHI Network driver - Network over MHI bus - * - * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> - */ - -struct mhi_net_stats { - u64_stats_t rx_packets; - u64_stats_t rx_bytes; - u64_stats_t rx_errors; - u64_stats_t rx_dropped; - u64_stats_t rx_length_errors; - u64_stats_t tx_packets; - u64_stats_t tx_bytes; - u64_stats_t tx_errors; - u64_stats_t tx_dropped; - struct u64_stats_sync tx_syncp; - struct u64_stats_sync rx_syncp; -}; - -struct mhi_net_dev { - struct mhi_device *mdev; - struct net_device *ndev; - struct sk_buff *skbagg_head; - struct sk_buff *skbagg_tail; - const struct mhi_net_proto *proto; - void *proto_data; - struct delayed_work rx_refill; - struct mhi_net_stats stats; - u32 rx_queue_sz; - int msg_enable; - unsigned int mru; -}; - -struct mhi_net_proto { - int (*init)(struct mhi_net_dev *mhi_netdev); - struct sk_buff * (*tx_fixup)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); - void (*rx)(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb); -}; - -extern const struct mhi_net_proto proto_mbim; diff --git a/drivers/net/mhi/proto_mbim.c b/drivers/net/mhi/proto_mbim.c deleted file mode 100644 index 761d90b28ee6..000000000000 --- a/drivers/net/mhi/proto_mbim.c +++ /dev/null @@ -1,310 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* MHI Network driver - Network over MHI bus - * - * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> - * - * This driver copy some code from cdc_ncm, which is: - * Copyright (C) ST-Ericsson 2010-2012 - * and cdc_mbim, which is: - * Copyright (c) 2012 Smith Micro Software, Inc. - * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> - * - */ - -#include <linux/ethtool.h> -#include <linux/if_vlan.h> -#include <linux/ip.h> -#include <linux/mii.h> -#include <linux/netdevice.h> -#include <linux/wwan.h> -#include <linux/skbuff.h> -#include <linux/usb.h> -#include <linux/usb/cdc.h> -#include <linux/usb/usbnet.h> -#include <linux/usb/cdc_ncm.h> - -#include "mhi.h" - -#define MBIM_NDP16_SIGN_MASK 0x00ffffff - -/* Usual WWAN MTU */ -#define MHI_MBIM_DEFAULT_MTU 1500 - -/* 3500 allows to optimize skb allocation, the skbs will basically fit in - * one 4K page. Large MBIM packets will simply be split over several MHI - * transfers and chained by the MHI net layer (zerocopy). - */ -#define MHI_MBIM_DEFAULT_MRU 3500 - -struct mbim_context { - u16 rx_seq; - u16 tx_seq; -}; - -static void __mbim_length_errors_inc(struct mhi_net_dev *dev) -{ - u64_stats_update_begin(&dev->stats.rx_syncp); - u64_stats_inc(&dev->stats.rx_length_errors); - u64_stats_update_end(&dev->stats.rx_syncp); -} - -static void __mbim_errors_inc(struct mhi_net_dev *dev) -{ - u64_stats_update_begin(&dev->stats.rx_syncp); - u64_stats_inc(&dev->stats.rx_errors); - u64_stats_update_end(&dev->stats.rx_syncp); -} - -static int mbim_rx_verify_nth16(struct sk_buff *skb) -{ - struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev); - struct mbim_context *ctx = dev->proto_data; - struct usb_cdc_ncm_nth16 *nth16; - int len; - - if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + - sizeof(struct usb_cdc_ncm_ndp16)) { - netif_dbg(dev, rx_err, dev->ndev, "frame too short\n"); - __mbim_length_errors_inc(dev); - return -EINVAL; - } - - nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; - - if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { - netif_dbg(dev, rx_err, dev->ndev, - "invalid NTH16 signature <%#010x>\n", - le32_to_cpu(nth16->dwSignature)); - __mbim_errors_inc(dev); - return -EINVAL; - } - - /* No limit on the block length, except the size of the data pkt */ - len = le16_to_cpu(nth16->wBlockLength); - if (len > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, - "NTB does not fit into the skb %u/%u\n", len, - skb->len); - __mbim_length_errors_inc(dev); - return -EINVAL; - } - - if (ctx->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && - (ctx->rx_seq || le16_to_cpu(nth16->wSequence)) && - !(ctx->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { - netif_dbg(dev, rx_err, dev->ndev, - "sequence number glitch prev=%d curr=%d\n", - ctx->rx_seq, le16_to_cpu(nth16->wSequence)); - } - ctx->rx_seq = le16_to_cpu(nth16->wSequence); - - return le16_to_cpu(nth16->wNdpIndex); -} - -static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) -{ - struct mhi_net_dev *dev = wwan_netdev_drvpriv(skb->dev); - int ret; - - if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { - netif_dbg(dev, rx_err, dev->ndev, "invalid DPT16 length <%u>\n", - le16_to_cpu(ndp16->wLength)); - return -EINVAL; - } - - ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) - / sizeof(struct usb_cdc_ncm_dpe16)); - ret--; /* Last entry is always a NULL terminator */ - - if (sizeof(struct usb_cdc_ncm_ndp16) + - ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { - netif_dbg(dev, rx_err, dev->ndev, - "Invalid nframes = %d\n", ret); - return -EINVAL; - } - - return ret; -} - -static void mbim_rx(struct mhi_net_dev *mhi_netdev, struct sk_buff *skb) -{ - struct net_device *ndev = mhi_netdev->ndev; - int ndpoffset; - - /* Check NTB header and retrieve first NDP offset */ - ndpoffset = mbim_rx_verify_nth16(skb); - if (ndpoffset < 0) { - net_err_ratelimited("%s: Incorrect NTB header\n", ndev->name); - goto error; - } - - /* Process each NDP */ - while (1) { - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16; - int nframes, n, dpeoffset; - - if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { - net_err_ratelimited("%s: Incorrect NDP offset (%u)\n", - ndev->name, ndpoffset); - __mbim_length_errors_inc(mhi_netdev); - goto error; - } - - /* Check NDP header and retrieve number of datagrams */ - nframes = mbim_rx_verify_ndp16(skb, &ndp16); - if (nframes < 0) { - net_err_ratelimited("%s: Incorrect NDP16\n", ndev->name); - __mbim_length_errors_inc(mhi_netdev); - goto error; - } - - /* Only IP data type supported, no DSS in MHI context */ - if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) - != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { - net_err_ratelimited("%s: Unsupported NDP type\n", ndev->name); - __mbim_errors_inc(mhi_netdev); - goto next_ndp; - } - - /* Only primary IP session 0 (0x00) supported for now */ - if (ndp16.dwSignature & ~cpu_to_le32(MBIM_NDP16_SIGN_MASK)) { - net_err_ratelimited("%s: bad packet session\n", ndev->name); - __mbim_errors_inc(mhi_netdev); - goto next_ndp; - } - - /* de-aggregate and deliver IP packets */ - dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); - for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { - u16 dgram_offset, dgram_len; - struct sk_buff *skbn; - - if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) - break; - - dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); - dgram_len = le16_to_cpu(dpe16.wDatagramLength); - - if (!dgram_offset || !dgram_len) - break; /* null terminator */ - - skbn = netdev_alloc_skb(ndev, dgram_len); - if (!skbn) - continue; - - skb_put(skbn, dgram_len); - skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); - - switch (skbn->data[0] & 0xf0) { - case 0x40: - skbn->protocol = htons(ETH_P_IP); - break; - case 0x60: - skbn->protocol = htons(ETH_P_IPV6); - break; - default: - net_err_ratelimited("%s: unknown protocol\n", - ndev->name); - __mbim_errors_inc(mhi_netdev); - dev_kfree_skb_any(skbn); - continue; - } - - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, skbn->len); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - netif_rx(skbn); - } -next_ndp: - /* Other NDP to process? */ - ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); - if (!ndpoffset) - break; - } - - /* free skb */ - dev_consume_skb_any(skb); - return; -error: - dev_kfree_skb_any(skb); -} - -struct mbim_tx_hdr { - struct usb_cdc_ncm_nth16 nth16; - struct usb_cdc_ncm_ndp16 ndp16; - struct usb_cdc_ncm_dpe16 dpe16[2]; -} __packed; - -static struct sk_buff *mbim_tx_fixup(struct mhi_net_dev *mhi_netdev, - struct sk_buff *skb) -{ - struct mbim_context *ctx = mhi_netdev->proto_data; - unsigned int dgram_size = skb->len; - struct usb_cdc_ncm_nth16 *nth16; - struct usb_cdc_ncm_ndp16 *ndp16; - struct mbim_tx_hdr *mbim_hdr; - - /* For now, this is a partial implementation of CDC MBIM, only one NDP - * is sent, containing the IP packet (no aggregation). - */ - - /* Ensure we have enough headroom for crafting MBIM header */ - if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { - dev_kfree_skb_any(skb); - return NULL; - } - - mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); - - /* Fill NTB header */ - nth16 = &mbim_hdr->nth16; - nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); - nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); - nth16->wSequence = cpu_to_le16(ctx->tx_seq++); - nth16->wBlockLength = cpu_to_le16(skb->len); - nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); - - /* Fill the unique NDP */ - ndp16 = &mbim_hdr->ndp16; - ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN); - ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) - + sizeof(struct usb_cdc_ncm_dpe16) * 2); - ndp16->wNextNdpIndex = 0; - - /* Datagram follows the mbim header */ - ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); - ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); - - /* null termination */ - ndp16->dpe16[1].wDatagramIndex = 0; - ndp16->dpe16[1].wDatagramLength = 0; - - return skb; -} - -static int mbim_init(struct mhi_net_dev *mhi_netdev) -{ - struct net_device *ndev = mhi_netdev->ndev; - - mhi_netdev->proto_data = devm_kzalloc(&ndev->dev, - sizeof(struct mbim_context), - GFP_KERNEL); - if (!mhi_netdev->proto_data) - return -ENOMEM; - - ndev->needed_headroom = sizeof(struct mbim_tx_hdr); - ndev->mtu = MHI_MBIM_DEFAULT_MTU; - - if (!mhi_netdev->mru) - mhi_netdev->mru = MHI_MBIM_DEFAULT_MRU; - - return 0; -} - -const struct mhi_net_proto proto_mbim = { - .init = mbim_init, - .rx = mbim_rx, - .tx_fixup = mbim_tx_fixup, -}; diff --git a/drivers/net/mhi/net.c b/drivers/net/mhi_net.c index 0cc7dcd0ff96..975f7f9bdf4c 100644 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi_net.c @@ -11,28 +11,42 @@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> -#include <linux/wwan.h> - -#include "mhi.h" #define MHI_NET_MIN_MTU ETH_MIN_MTU #define MHI_NET_MAX_MTU 0xffff #define MHI_NET_DEFAULT_MTU 0x4000 -/* When set to false, the default netdev (link 0) is not created, and it's up - * to user to create the link (via wwan rtnetlink). - */ -static bool create_default_iface = true; -module_param(create_default_iface, bool, 0); +struct mhi_net_stats { + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; +}; + +struct mhi_net_dev { + struct mhi_device *mdev; + struct net_device *ndev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + struct delayed_work rx_refill; + struct mhi_net_stats stats; + u32 rx_queue_sz; + int msg_enable; + unsigned int mru; +}; struct mhi_device_info { const char *netname; - const struct mhi_net_proto *proto; }; static int mhi_ndo_open(struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); /* Feed the rx buffer pool */ schedule_delayed_work(&mhi_netdev->rx_refill, 0); @@ -47,7 +61,7 @@ static int mhi_ndo_open(struct net_device *ndev) static int mhi_ndo_stop(struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); netif_stop_queue(ndev); netif_carrier_off(ndev); @@ -58,17 +72,10 @@ static int mhi_ndo_stop(struct net_device *ndev) static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); - const struct mhi_net_proto *proto = mhi_netdev->proto; + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); struct mhi_device *mdev = mhi_netdev->mdev; int err; - if (proto && proto->tx_fixup) { - skb = proto->tx_fixup(mhi_netdev, skb); - if (unlikely(!skb)) - goto exit_drop; - } - err = mhi_queue_skb(mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); if (unlikely(err)) { net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", @@ -93,7 +100,7 @@ exit_drop: static void mhi_ndo_get_stats64(struct net_device *ndev, struct rtnl_link_stats64 *stats) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); unsigned int start; do { @@ -101,8 +108,6 @@ static void mhi_ndo_get_stats64(struct net_device *ndev, stats->rx_packets = u64_stats_read(&mhi_netdev->stats.rx_packets); stats->rx_bytes = u64_stats_read(&mhi_netdev->stats.rx_bytes); stats->rx_errors = u64_stats_read(&mhi_netdev->stats.rx_errors); - stats->rx_dropped = u64_stats_read(&mhi_netdev->stats.rx_dropped); - stats->rx_length_errors = u64_stats_read(&mhi_netdev->stats.rx_length_errors); } while (u64_stats_fetch_retry_irq(&mhi_netdev->stats.rx_syncp, start)); do { @@ -165,7 +170,6 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, struct mhi_result *mhi_res) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); - const struct mhi_net_proto *proto = mhi_netdev->proto; struct sk_buff *skb = mhi_res->buf_addr; int free_desc_count; @@ -217,15 +221,11 @@ static void mhi_net_dl_callback(struct mhi_device *mhi_dev, break; } - if (proto && proto->rx) { - proto->rx(mhi_netdev, skb); - } else { - u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); - u64_stats_inc(&mhi_netdev->stats.rx_packets); - u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); - u64_stats_update_end(&mhi_netdev->stats.rx_syncp); - netif_rx(skb); - } + u64_stats_update_begin(&mhi_netdev->stats.rx_syncp); + u64_stats_inc(&mhi_netdev->stats.rx_packets); + u64_stats_add(&mhi_netdev->stats.rx_bytes, skb->len); + u64_stats_update_end(&mhi_netdev->stats.rx_syncp); + netif_rx(skb); } /* Refill if RX buffers queue becomes low */ @@ -248,7 +248,6 @@ static void mhi_net_ul_callback(struct mhi_device *mhi_dev, u64_stats_update_begin(&mhi_netdev->stats.tx_syncp); if (unlikely(mhi_res->transaction_status)) { - /* MHI layer stopping/resetting the UL channel */ if (mhi_res->transaction_status == -ENOTCONN) { u64_stats_update_end(&mhi_netdev->stats.tx_syncp); @@ -302,33 +301,17 @@ static void mhi_net_rx_refill_work(struct work_struct *work) schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); } -static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, - struct netlink_ext_ack *extack) +static int mhi_net_newlink(struct mhi_device *mhi_dev, struct net_device *ndev) { - const struct mhi_device_info *info; - struct mhi_device *mhi_dev = ctxt; struct mhi_net_dev *mhi_netdev; int err; - info = (struct mhi_device_info *)mhi_dev->id->driver_data; - - /* For now we only support one link (link context 0), driver must be - * reworked to break 1:1 relationship for net MBIM and to forward setup - * call to rmnet(QMAP) otherwise. - */ - if (if_id != 0) - return -EINVAL; - - if (dev_get_drvdata(&mhi_dev->dev)) - return -EBUSY; - - mhi_netdev = wwan_netdev_drvpriv(ndev); + mhi_netdev = netdev_priv(ndev); dev_set_drvdata(&mhi_dev->dev, mhi_netdev); mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; mhi_netdev->skbagg_head = NULL; - mhi_netdev->proto = info->proto; mhi_netdev->mru = mhi_dev->mhi_cntrl->mru; INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); @@ -336,45 +319,29 @@ static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, u64_stats_init(&mhi_netdev->stats.tx_syncp); /* Start MHI channels */ - err = mhi_prepare_for_transfer(mhi_dev); + err = mhi_prepare_for_transfer(mhi_dev, 0); if (err) goto out_err; /* Number of transfer descriptors determines size of the queue */ mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); - if (extack) - err = register_netdevice(ndev); - else - err = register_netdev(ndev); + err = register_netdev(ndev); if (err) - goto out_err; - - if (mhi_netdev->proto) { - err = mhi_netdev->proto->init(mhi_netdev); - if (err) - goto out_err_proto; - } + return err; return 0; -out_err_proto: - unregister_netdevice(ndev); out_err: free_netdev(ndev); return err; } -static void mhi_net_dellink(void *ctxt, struct net_device *ndev, - struct list_head *head) +static void mhi_net_dellink(struct mhi_device *mhi_dev, struct net_device *ndev) { - struct mhi_net_dev *mhi_netdev = wwan_netdev_drvpriv(ndev); - struct mhi_device *mhi_dev = ctxt; + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); - if (head) - unregister_netdevice_queue(ndev, head); - else - unregister_netdev(ndev); + unregister_netdev(ndev); mhi_unprepare_from_transfer(mhi_dev); @@ -383,65 +350,34 @@ static void mhi_net_dellink(void *ctxt, struct net_device *ndev, dev_set_drvdata(&mhi_dev->dev, NULL); } -static const struct wwan_ops mhi_wwan_ops = { - .priv_size = sizeof(struct mhi_net_dev), - .setup = mhi_net_setup, - .newlink = mhi_net_newlink, - .dellink = mhi_net_dellink, -}; - static int mhi_net_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) { const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data; - struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; struct net_device *ndev; int err; - err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev, - WWAN_NO_DEFAULT_LINK); - if (err) - return err; - - if (!create_default_iface) - return 0; - - /* Create a default interface which is used as either RMNET real-dev, - * MBIM link 0 or ip link 0) - */ ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname, NET_NAME_PREDICTABLE, mhi_net_setup); - if (!ndev) { - err = -ENOMEM; - goto err_unregister; - } + if (!ndev) + return -ENOMEM; SET_NETDEV_DEV(ndev, &mhi_dev->dev); - err = mhi_net_newlink(mhi_dev, ndev, 0, NULL); - if (err) - goto err_release; + err = mhi_net_newlink(mhi_dev, ndev); + if (err) { + free_netdev(ndev); + return err; + } return 0; - -err_release: - free_netdev(ndev); -err_unregister: - wwan_unregister_ops(&cntrl->mhi_dev->dev); - - return err; } static void mhi_net_remove(struct mhi_device *mhi_dev) { struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); - struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; - - /* WWAN core takes care of removing remaining links */ - wwan_unregister_ops(&cntrl->mhi_dev->dev); - if (create_default_iface) - mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL); + mhi_net_dellink(mhi_dev, mhi_netdev->ndev); } static const struct mhi_device_info mhi_hwip0 = { @@ -452,18 +388,11 @@ static const struct mhi_device_info mhi_swip0 = { .netname = "mhi_swip%d", }; -static const struct mhi_device_info mhi_hwip0_mbim = { - .netname = "mhi_mbim%d", - .proto = &proto_mbim, -}; - static const struct mhi_device_id mhi_net_id_table[] = { /* Hardware accelerated data PATH (to modem IPA), protocol agnostic */ { .chan = "IP_HW0", .driver_data = (kernel_ulong_t)&mhi_hwip0 }, /* Software data PATH (to modem CPU) */ { .chan = "IP_SW0", .driver_data = (kernel_ulong_t)&mhi_swip0 }, - /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ - { .chan = "IP_HW0_MBIM", .driver_data = (kernel_ulong_t)&mhi_hwip0_mbim }, {} }; MODULE_DEVICE_TABLE(mhi, mhi_net_id_table); diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index ff01e5bdc72e..62d033a1a557 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -183,8 +183,6 @@ new_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -195,12 +193,15 @@ new_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; + + if (nsim_bus_dev->in_reload) { + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return -EBUSY; + } - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_add(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } @@ -212,8 +213,6 @@ del_port_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nsim_bus_dev *nsim_bus_dev = to_nsim_bus_dev(dev); - struct nsim_dev *nsim_dev = dev_get_drvdata(dev); - struct devlink *devlink; unsigned int port_index; int ret; @@ -224,12 +223,15 @@ del_port_store(struct device *dev, struct device_attribute *attr, if (ret) return ret; - devlink = priv_to_devlink(nsim_dev); + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EBUSY; + + if (nsim_bus_dev->in_reload) { + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return -EBUSY; + } - mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); - devlink_reload_disable(devlink); ret = nsim_dev_port_del(nsim_bus_dev, NSIM_DEV_PORT_TYPE_PF, port_index); - devlink_reload_enable(devlink); mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return ret ? ret : count; } diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c index d538a39d4225..54313bd57797 100644 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@ -864,16 +864,24 @@ static int nsim_dev_reload_down(struct devlink *devlink, bool netns_change, struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + if (!mutex_trylock(&nsim_bus_dev->nsim_bus_reload_lock)) + return -EOPNOTSUPP; if (nsim_dev->dont_allow_reload) { /* For testing purposes, user set debugfs dont_allow_reload * value to true. So forbid it. */ NL_SET_ERR_MSG_MOD(extack, "User forbid the reload for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EOPNOTSUPP; } + nsim_bus_dev->in_reload = true; nsim_dev_reload_destroy(nsim_dev); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return 0; } @@ -882,17 +890,26 @@ static int nsim_dev_reload_up(struct devlink *devlink, enum devlink_reload_actio struct netlink_ext_ack *extack) { struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_bus_dev *nsim_bus_dev; + int ret; + + nsim_bus_dev = nsim_dev->nsim_bus_dev; + mutex_lock(&nsim_bus_dev->nsim_bus_reload_lock); + nsim_bus_dev->in_reload = false; if (nsim_dev->fail_reload) { /* For testing purposes, user set debugfs fail_reload * value to true. Fail right away. */ NL_SET_ERR_MSG_MOD(extack, "User setup the reload to fail for testing purposes"); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); return -EINVAL; } *actions_performed = BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT); - return nsim_dev_reload_create(nsim_dev, extack); + ret = nsim_dev_reload_create(nsim_dev, extack); + mutex_unlock(&nsim_bus_dev->nsim_bus_reload_lock); + return ret; } static int nsim_dev_info_get(struct devlink *devlink, @@ -1432,7 +1449,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) int err; devlink = devlink_alloc_ns(&nsim_dev_devlink_ops, sizeof(*nsim_dev), - nsim_bus_dev->initial_net); + nsim_bus_dev->initial_net, &nsim_bus_dev->dev); if (!devlink) return -ENOMEM; nsim_dev = devlink_priv(devlink); @@ -1453,7 +1470,7 @@ int nsim_dev_probe(struct nsim_bus_dev *nsim_bus_dev) if (err) goto err_devlink_free; - err = devlink_register(devlink, &nsim_bus_dev->dev); + err = devlink_register(devlink); if (err) goto err_resources_unregister; diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index 213d3e5056c8..4300261e2f9e 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -1441,7 +1441,7 @@ static u64 nsim_fib_nexthops_res_occ_get(void *priv) static void nsim_fib_set_max_all(struct nsim_fib_data *data, struct devlink *devlink) { - enum nsim_resource_id res_ids[] = { + static const enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_NEXTHOPS, diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 1c20bcbd9d91..793c86dc5a9c 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -362,6 +362,7 @@ struct nsim_bus_dev { struct nsim_vf_config *vfconfigs; /* Lock for devlink->reload_enabled in netdevsim module */ struct mutex nsim_bus_reload_lock; + bool in_reload; bool init; }; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 7bf3011b8e77..83aea5c5cd03 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -288,7 +288,7 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev) if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) { if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E || BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 || - BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E) + BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811) val |= BCM54XX_SHD_SCR3_RXCTXC_DIS; else val |= BCM54XX_SHD_SCR3_TRDDAPD; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 4d53886f7d51..53bdd673ae56 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -401,11 +401,11 @@ static int ksz8041_config_aneg(struct phy_device *phydev) } static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, - const u32 ksz_phy_id) + const bool ksz_8051) { int ret; - if ((phydev->phy_id & MICREL_PHY_ID_MASK) != ksz_phy_id) + if ((phydev->phy_id & MICREL_PHY_ID_MASK) != PHY_ID_KSZ8051) return 0; ret = phy_read(phydev, MII_BMSR); @@ -418,7 +418,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, * the switch does not. */ ret &= BMSR_ERCAP; - if (ksz_phy_id == PHY_ID_KSZ8051) + if (ksz_8051) return ret; else return !ret; @@ -426,7 +426,7 @@ static int ksz8051_ksz8795_match_phy_device(struct phy_device *phydev, static int ksz8051_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ8051); + return ksz8051_ksz8795_match_phy_device(phydev, true); } static int ksz8081_config_init(struct phy_device *phydev) @@ -535,7 +535,7 @@ static int ksz8061_config_init(struct phy_device *phydev) static int ksz8795_match_phy_device(struct phy_device *phydev) { - return ksz8051_ksz8795_match_phy_device(phydev, PHY_ID_KSZ87XX); + return ksz8051_ksz8795_match_phy_device(phydev, false); } static int ksz9021_load_values_from_of(struct phy_device *phydev, diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index 924ed5b034a4..edb951695b13 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -506,7 +506,7 @@ static int vsc85xx_ptp_cmp_init(struct phy_device *phydev, enum ts_blk blk) { struct vsc8531_private *vsc8531 = phydev->priv; bool base = phydev->mdio.addr == vsc8531->ts_base_addr; - u8 msgs[] = { + static const u8 msgs[] = { PTP_MSGTYPE_SYNC, PTP_MSGTYPE_DELAY_REQ }; @@ -847,7 +847,7 @@ static int vsc85xx_ts_ptp_action_flow(struct phy_device *phydev, enum ts_blk blk static int vsc85xx_ptp_conf(struct phy_device *phydev, enum ts_blk blk, bool one_step, bool enable) { - u8 msgs[] = { + static const u8 msgs[] = { PTP_MSGTYPE_SYNC, PTP_MSGTYPE_DELAY_REQ }; @@ -1268,8 +1268,8 @@ static void vsc8584_set_input_clk_configured(struct phy_device *phydev) static int __vsc8584_init_ptp(struct phy_device *phydev) { struct vsc8531_private *vsc8531 = phydev->priv; - u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 }; - u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 }; + static const u32 ltc_seq_e[] = { 0, 400000, 0, 0, 0 }; + static const u8 ltc_seq_a[] = { 8, 6, 5, 4, 2 }; u32 val; if (!vsc8584_is_1588_input_clk_configured(phydev)) { diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 207e59e74935..06e2181e5810 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -443,7 +443,7 @@ static int ipheth_probe(struct usb_interface *intf, netdev->netdev_ops = &ipheth_netdev_ops; netdev->watchdog_timeo = IPHETH_TX_TIMEOUT; - strcpy(netdev->name, "eth%d"); + strscpy(netdev->name, "eth%d", sizeof(netdev->name)); dev = netdev_priv(netdev); dev->udev = udev; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 13f86368b78a..4e8d3c28f73e 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1154,7 +1154,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) { struct phy_device *phydev = dev->net->phydev; struct ethtool_link_ksettings ecmd; - int ladv, radv, ret; + int ladv, radv, ret, link; u32 buf; /* clear LAN78xx interrupt status */ @@ -1162,9 +1162,12 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) if (unlikely(ret < 0)) return -EIO; + mutex_lock(&phydev->lock); phy_read_status(phydev); + link = phydev->link; + mutex_unlock(&phydev->lock); - if (!phydev->link && dev->link_on) { + if (!link && dev->link_on) { dev->link_on = false; /* reset MAC */ @@ -1177,7 +1180,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) return -EIO; del_timer(&dev->stat_monitor); - } else if (phydev->link && !dev->link_on) { + } else if (link && !dev->link_on) { dev->link_on = true; phy_ethtool_ksettings_get(phydev, &ecmd); @@ -1466,9 +1469,14 @@ static int lan78xx_set_eee(struct net_device *net, struct ethtool_eee *edata) static u32 lan78xx_get_link(struct net_device *net) { + u32 link; + + mutex_lock(&net->phydev->lock); phy_read_status(net->phydev); + link = net->phydev->link; + mutex_unlock(&net->phydev->lock); - return net->phydev->link; + return link; } static void lan78xx_get_drvinfo(struct net_device *net, diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 0475ef0efdca..36dafcb3d04a 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1,31 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 1999-2013 Petko Manolov (petkan@nucleusys.com) + * Copyright (c) 1999-2021 Petko Manolov (petkan@nucleusys.com) * - * ChangeLog: - * .... Most of the time spent on reading sources & docs. - * v0.2.x First official release for the Linux kernel. - * v0.3.0 Beutified and structured, some bugs fixed. - * v0.3.x URBifying bulk requests and bugfixing. First relatively - * stable release. Still can touch device's registers only - * from top-halves. - * v0.4.0 Control messages remained unurbified are now URBs. - * Now we can touch the HW at any time. - * v0.4.9 Control urbs again use process context to wait. Argh... - * Some long standing bugs (enable_net_traffic) fixed. - * Also nasty trick about resubmiting control urb from - * interrupt context used. Please let me know how it - * behaves. Pegasus II support added since this version. - * TODO: suppressing HCD warnings spewage on disconnect. - * v0.4.13 Ethernet address is now set at probe(), not at open() - * time as this seems to break dhcpd. - * v0.5.0 branch to 2.5.x kernels - * v0.5.1 ethtool support added - * v0.5.5 rx socket buffers are in a pool and the their allocation - * is out of the interrupt routine. - * ... - * v0.9.3 simplified [get|set]_register(s), async update registers - * logic revisited, receive skb_pool removed. */ #include <linux/sched.h> @@ -45,7 +21,6 @@ /* * Version Information */ -#define DRIVER_VERSION "v0.9.3 (2013/04/25)" #define DRIVER_AUTHOR "Petko Manolov <petkan@nucleusys.com>" #define DRIVER_DESC "Pegasus/Pegasus II USB Ethernet driver" @@ -132,9 +107,15 @@ static int get_registers(pegasus_t *pegasus, __u16 indx, __u16 size, void *data) static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, const void *data) { - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, + int ret; + + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REGS, PEGASUS_REQT_WRITE, 0, indx, data, size, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } /* @@ -145,10 +126,15 @@ static int set_registers(pegasus_t *pegasus, __u16 indx, __u16 size, static int set_register(pegasus_t *pegasus, __u16 indx, __u8 data) { void *buf = &data; + int ret; - return usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, + ret = usb_control_msg_send(pegasus->usb, 0, PEGASUS_REQ_SET_REG, PEGASUS_REQT_WRITE, data, indx, buf, 1, 1000, GFP_NOIO); + if (ret < 0) + netif_dbg(pegasus, drv, pegasus->net, "%s failed with %d\n", __func__, ret); + + return ret; } static int update_eth_regs_async(pegasus_t *pegasus) @@ -188,10 +174,9 @@ static int update_eth_regs_async(pegasus_t *pegasus) static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) { - int i; - __u8 data[4] = { phy, 0, 0, indx }; + int i, ret; __le16 regdi; - int ret = -ETIMEDOUT; + __u8 data[4] = { phy, 0, 0, indx }; if (cmd & PHY_WRITE) { __le16 *t = (__le16 *) & data[1]; @@ -207,12 +192,15 @@ static int __mii_op(pegasus_t *p, __u8 phy, __u8 indx, __u16 *regd, __u8 cmd) if (data[0] & PHY_DONE) break; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } if (cmd & PHY_READ) { ret = get_registers(p, PhyData, 2, ®di); + if (ret < 0) + goto fail; *regd = le16_to_cpu(regdi); - return ret; } return 0; fail: @@ -235,9 +223,13 @@ static int write_mii_word(pegasus_t *pegasus, __u8 phy, __u8 indx, __u16 *regd) static int mdio_read(struct net_device *dev, int phy_id, int loc) { pegasus_t *pegasus = netdev_priv(dev); + int ret; u16 res; - read_mii_word(pegasus, phy_id, loc, &res); + ret = read_mii_word(pegasus, phy_id, loc, &res); + if (ret < 0) + return ret; + return (int)res; } @@ -251,10 +243,9 @@ static void mdio_write(struct net_device *dev, int phy_id, int loc, int val) static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) { - int i; - __u8 tmp = 0; + int ret, i; __le16 retdatai; - int ret; + __u8 tmp = 0; set_register(pegasus, EpromCtrl, 0); set_register(pegasus, EpromOffset, index); @@ -262,21 +253,25 @@ static int read_eprom_word(pegasus_t *pegasus, __u8 index, __u16 *retdata) for (i = 0; i < REG_TIMEOUT; i++) { ret = get_registers(pegasus, EpromCtrl, 1, &tmp); + if (ret < 0) + goto fail; if (tmp & EPROM_DONE) break; - if (ret == -ESHUTDOWN) - goto fail; } - if (i >= REG_TIMEOUT) + if (i >= REG_TIMEOUT) { + ret = -ETIMEDOUT; goto fail; + } ret = get_registers(pegasus, EpromData, 2, &retdatai); + if (ret < 0) + goto fail; *retdata = le16_to_cpu(retdatai); return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); - return -ETIMEDOUT; + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } #ifdef PEGASUS_WRITE_EEPROM @@ -324,10 +319,10 @@ static int write_eprom_word(pegasus_t *pegasus, __u8 index, __u16 data) return ret; fail: - netif_warn(pegasus, drv, pegasus->net, "%s failed\n", __func__); + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return -ETIMEDOUT; } -#endif /* PEGASUS_WRITE_EEPROM */ +#endif /* PEGASUS_WRITE_EEPROM */ static inline int get_node_id(pegasus_t *pegasus, u8 *id) { @@ -367,19 +362,21 @@ static void set_ethernet_addr(pegasus_t *pegasus) return; err: eth_hw_addr_random(pegasus->net); - dev_info(&pegasus->intf->dev, "software assigned MAC address.\n"); + netif_dbg(pegasus, drv, pegasus->net, "software assigned MAC address.\n"); return; } static inline int reset_mac(pegasus_t *pegasus) { + int ret, i; __u8 data = 0x8; - int i; set_register(pegasus, EthCtrl1, data); for (i = 0; i < REG_TIMEOUT; i++) { - get_registers(pegasus, EthCtrl1, 1, &data); + ret = get_registers(pegasus, EthCtrl1, 1, &data); + if (ret < 0) + goto fail; if (~data & 0x08) { if (loopback) break; @@ -402,22 +399,29 @@ static inline int reset_mac(pegasus_t *pegasus) } if (usb_dev_id[pegasus->dev_index].vendor == VENDOR_ELCON) { __u16 auxmode; - read_mii_word(pegasus, 3, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 3, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 3, 0x1b, &auxmode); } return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); + return ret; } static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) { - __u16 linkpart; - __u8 data[4]; pegasus_t *pegasus = netdev_priv(dev); int ret; + __u16 linkpart; + __u8 data[4]; - read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + ret = read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); + if (ret < 0) + goto fail; data[0] = 0xc8; /* TX & RX enable, append status, no CRC */ data[1] = 0; if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL)) @@ -435,11 +439,16 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) usb_dev_id[pegasus->dev_index].vendor == VENDOR_LINKSYS2 || usb_dev_id[pegasus->dev_index].vendor == VENDOR_DLINK) { u16 auxmode; - read_mii_word(pegasus, 0, 0x1b, &auxmode); + ret = read_mii_word(pegasus, 0, 0x1b, &auxmode); + if (ret < 0) + goto fail; auxmode |= 4; write_mii_word(pegasus, 0, 0x1b, &auxmode); } + return 0; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); return ret; } @@ -447,9 +456,9 @@ static void read_bulk_callback(struct urb *urb) { pegasus_t *pegasus = urb->context; struct net_device *net; + u8 *buf = urb->transfer_buffer; int rx_status, count = urb->actual_length; int status = urb->status; - u8 *buf = urb->transfer_buffer; __u16 pkt_len; if (!pegasus) @@ -735,12 +744,16 @@ static inline void disable_net_traffic(pegasus_t *pegasus) set_registers(pegasus, EthCtrl0, sizeof(tmp), &tmp); } -static inline void get_interrupt_interval(pegasus_t *pegasus) +static inline int get_interrupt_interval(pegasus_t *pegasus) { u16 data; u8 interval; + int ret; + + ret = read_eprom_word(pegasus, 4, &data); + if (ret < 0) + return ret; - read_eprom_word(pegasus, 4, &data); interval = data >> 8; if (pegasus->usb->speed != USB_SPEED_HIGH) { if (interval < 0x80) { @@ -755,6 +768,8 @@ static inline void get_interrupt_interval(pegasus_t *pegasus) } } pegasus->intr_interval = interval; + + return 0; } static void set_carrier(struct net_device *net) @@ -880,7 +895,6 @@ static void pegasus_get_drvinfo(struct net_device *dev, pegasus_t *pegasus = netdev_priv(dev); strlcpy(info->driver, driver_name, sizeof(info->driver)); - strlcpy(info->version, DRIVER_VERSION, sizeof(info->version)); usb_make_path(pegasus->usb, info->bus_info, sizeof(info->bus_info)); } @@ -999,8 +1013,7 @@ static int pegasus_siocdevprivate(struct net_device *net, struct ifreq *rq, data[0] = pegasus->phy; fallthrough; case SIOCDEVPRIVATE + 1: - read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); - res = 0; + res = read_mii_word(pegasus, data[0], data[1] & 0x1f, &data[3]); break; case SIOCDEVPRIVATE + 2: if (!capable(CAP_NET_ADMIN)) @@ -1034,22 +1047,25 @@ static void pegasus_set_multicast(struct net_device *net) static __u8 mii_phy_probe(pegasus_t *pegasus) { - int i; + int i, ret; __u16 tmp; for (i = 0; i < 32; i++) { - read_mii_word(pegasus, i, MII_BMSR, &tmp); + ret = read_mii_word(pegasus, i, MII_BMSR, &tmp); + if (ret < 0) + goto fail; if (tmp == 0 || tmp == 0xffff || (tmp & BMSR_MEDIA) == 0) continue; else return i; } - +fail: return 0xff; } static inline void setup_pegasus_II(pegasus_t *pegasus) { + int ret; __u8 data = 0xa5; set_register(pegasus, Reg1d, 0); @@ -1061,7 +1077,9 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg7b, 2); set_register(pegasus, 0x83, data); - get_registers(pegasus, 0x83, 1, &data); + ret = get_registers(pegasus, 0x83, 1, &data); + if (ret < 0) + goto fail; if (data == 0xa5) pegasus->chip = 0x8513; @@ -1076,6 +1094,10 @@ static inline void setup_pegasus_II(pegasus_t *pegasus) set_register(pegasus, Reg81, 6); else set_register(pegasus, Reg81, 2); + + return; +fail: + netif_dbg(pegasus, drv, pegasus->net, "%s failed\n", __func__); } static void check_carrier(struct work_struct *work) @@ -1150,7 +1172,9 @@ static int pegasus_probe(struct usb_interface *intf, | NETIF_MSG_PROBE | NETIF_MSG_LINK); pegasus->features = usb_dev_id[dev_index].private; - get_interrupt_interval(pegasus); + res = get_interrupt_interval(pegasus); + if (res) + goto out2; if (reset_mac(pegasus)) { dev_err(&intf->dev, "can't reset MAC\n"); res = -EIO; @@ -1297,7 +1321,7 @@ static void __init parse_id(char *id) static int __init pegasus_init(void) { - pr_info("%s: %s, " DRIVER_DESC "\n", driver_name, DRIVER_VERSION); + pr_info("%s: " DRIVER_DESC "\n", driver_name); if (devid) parse_id(devid); return usb_register(&pegasus_driver); diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 470e1c1e6353..840c1c2ab16a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1725,7 +1725,7 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) dev->interrupt_count = 0; dev->net = net; - strcpy (net->name, "usb%d"); + strscpy(net->name, "usb%d", sizeof(net->name)); memcpy (net->dev_addr, node_id, sizeof node_id); /* rx and tx sides can use different message sizes; @@ -1752,13 +1752,13 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if ((dev->driver_info->flags & FLAG_ETHER) != 0 && ((dev->driver_info->flags & FLAG_POINTTOPOINT) == 0 || (net->dev_addr [0] & 0x02) == 0)) - strcpy (net->name, "eth%d"); + strscpy(net->name, "eth%d", sizeof(net->name)); /* WLAN devices should always be named "wlan%d" */ if ((dev->driver_info->flags & FLAG_WLAN) != 0) - strcpy(net->name, "wlan%d"); + strscpy(net->name, "wlan%d", sizeof(net->name)); /* WWAN devices should always be named "wwan%d" */ if ((dev->driver_info->flags & FLAG_WWAN) != 0) - strcpy(net->name, "wwan%d"); + strscpy(net->name, "wwan%d", sizeof(net->name)); /* devices that cannot do ARP */ if ((dev->driver_info->flags & FLAG_NOARP) != 0) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 56c3f8519093..2e42210a6503 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -380,7 +380,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, struct page *page, unsigned int offset, unsigned int len, unsigned int truesize, bool hdr_valid, unsigned int metasize, - bool whole_page) + unsigned int headroom) { struct sk_buff *skb; struct virtio_net_hdr_mrg_rxbuf *hdr; @@ -398,28 +398,16 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, else hdr_padded_len = sizeof(struct padded_vnet_hdr); - /* If whole_page, there is an offset between the beginning of the + /* If headroom is not 0, there is an offset between the beginning of the * data and the allocated space, otherwise the data and the allocated * space are aligned. * * Buffers with headroom use PAGE_SIZE as alloc size, see * add_recvbuf_mergeable() + get_mergeable_buf_len() */ - if (whole_page) { - /* Buffers with whole_page use PAGE_SIZE as alloc size, - * see add_recvbuf_mergeable() + get_mergeable_buf_len() - */ - truesize = PAGE_SIZE; - - /* page maybe head page, so we should get the buf by p, not the - * page - */ - tailroom = truesize - len - offset_in_page(p); - buf = (char *)((unsigned long)p & PAGE_MASK); - } else { - tailroom = truesize - len; - buf = p; - } + truesize = headroom ? PAGE_SIZE : truesize; + tailroom = truesize - len - headroom; + buf = p - headroom; len -= hdr_len; offset += hdr_padded_len; @@ -978,7 +966,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, put_page(page); head_skb = page_to_skb(vi, rq, xdp_page, offset, len, PAGE_SIZE, false, - metasize, true); + metasize, + VIRTIO_XDP_HEADROOM); return head_skb; } break; @@ -1029,7 +1018,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, rcu_read_unlock(); head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, - metasize, !!headroom); + metasize, headroom); curr_skb = head_skb; if (unlikely(!curr_skb)) @@ -2208,14 +2197,14 @@ static int virtnet_set_channels(struct net_device *dev, if (vi->rq[0].xdp_prog) return -EINVAL; - get_online_cpus(); + cpus_read_lock(); err = _virtnet_set_queues(vi, queue_pairs); if (err) { - put_online_cpus(); + cpus_read_unlock(); goto err; } virtnet_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); netif_set_real_num_tx_queues(dev, queue_pairs); netif_set_real_num_rx_queues(dev, queue_pairs); @@ -2970,9 +2959,9 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; - get_online_cpus(); + cpus_read_lock(); virtnet_set_affinity(vi); - put_online_cpus(); + cpus_read_unlock(); return 0; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 2b1b944d4b28..662e26117353 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -857,30 +857,24 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; - int ret = -EINVAL; nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (!skb2) { - ret = -ENOMEM; - goto err; + skb = skb_expand_head(skb, hh_len); + if (!skb) { + dev->stats.tx_errors++; + return -ENOMEM; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { + int ret; + sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); @@ -889,9 +883,8 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s } rcu_read_unlock_bh(); -err: vrf_tx_error(skb->dev, skb); - return ret; + return -EINVAL; } static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 473df2505c8e..592a8389fc5a 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -290,30 +290,6 @@ config SLIC_DS26522 To compile this driver as a module, choose M here: the module will be called slic_ds26522. -config DSCC4_PCISYNC - bool "Etinc PCISYNC features" - depends on DSCC4 - help - Due to Etinc's design choice for its PCISYNC cards, some operations - are only allowed on specific ports of the DSCC4. This option is the - only way for the driver to know that it shouldn't return a success - code for these operations. - - Please say Y if your card is an Etinc's PCISYNC. - -config DSCC4_PCI_RST - bool "Hard reset support" - depends on DSCC4 - help - Various DSCC4 bugs forbid any reliable software reset of the ASIC. - As a replacement, some vendors provide a way to assert the PCI #RST - pin of DSCC4 through the GPIO port of the card. If you choose Y, - the driver will make use of this feature before module removal - (i.e. rmmod). The feature is known to be available on Commtech's - cards. Contact your manufacturer for details. - - Say Y if your card supports this feature. - config IXP4XX_HSS tristate "Intel IXP4xx HSS (synchronous serial port) support" depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR @@ -337,33 +313,6 @@ config LAPBETHER To compile this driver as a module, choose M here: the module will be called lapbether. - If unsure, say N. - -config SBNI - tristate "Granch SBNI12 Leased Line adapter support" - depends on X86 - help - Driver for ISA SBNI12-xx cards which are low cost alternatives to - leased line modems. - - You can find more information and last versions of drivers and - utilities at <http://www.granch.ru/>. If you have any question you - can send email to <sbni@granch.ru>. - - To compile this driver as a module, choose M here: the - module will be called sbni. - - If unsure, say N. - -config SBNI_MULTILINE - bool "Multiple line feature support" - depends on SBNI - help - Schedule traffic for some parallel lines, via SBNI12 adapters. - - If you have two computers connected with two parallel lines it's - possible to increase transfer rate nearly twice. You should have - a program named 'sbniconfig' to configure adapters. If unsure, say N. diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 081666c36ca2..f6b92efffc94 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -22,7 +22,6 @@ obj-$(CONFIG_FARSYNC) += farsync.o obj-$(CONFIG_LANMEDIA) += lmc/ obj-$(CONFIG_LAPBETHER) += lapbether.o -obj-$(CONFIG_SBNI) += sbni.o obj-$(CONFIG_N2) += n2.o obj-$(CONFIG_C101) += c101.o obj-$(CONFIG_WANXL) += wanxl.o diff --git a/drivers/net/wan/hostess_sv11.c b/drivers/net/wan/hostess_sv11.c index 15a754310fd7..e985e54ba75d 100644 --- a/drivers/net/wan/hostess_sv11.c +++ b/drivers/net/wan/hostess_sv11.c @@ -319,16 +319,18 @@ MODULE_DESCRIPTION("Modular driver for the Comtrol Hostess SV11"); static struct z8530_dev *sv11_unit; -int init_module(void) +static int sv11_module_init(void) { sv11_unit = sv11_init(io, irq); if (!sv11_unit) return -ENODEV; return 0; } +module_init(sv11_module_init); -void cleanup_module(void) +static void sv11_module_cleanup(void) { if (sv11_unit) sv11_shutdown(sv11_unit); } +module_exit(sv11_module_cleanup); diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c deleted file mode 100644 index 469fe979d664..000000000000 --- a/drivers/net/wan/sbni.c +++ /dev/null @@ -1,1639 +0,0 @@ -/* sbni.c: Granch SBNI12 leased line adapters driver for linux - * - * Written 2001 by Denis I.Timofeev (timofeev@granch.ru) - * - * Previous versions were written by Yaroslav Polyakov, - * Alexey Zverev and Max Khon. - * - * Driver supports SBNI12-02,-04,-05,-10,-11 cards, single and - * double-channel, PCI and ISA modifications. - * More info and useful utilities to work with SBNI12 cards you can find - * at http://www.granch.com (English) or http://www.granch.ru (Russian) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License. - * - * - * 5.0.1 Jun 22 2001 - * - Fixed bug in probe - * 5.0.0 Jun 06 2001 - * - Driver was completely redesigned by Denis I.Timofeev, - * - now PCI/Dual, ISA/Dual (with single interrupt line) models are - * - supported - * 3.3.0 Thu Feb 24 21:30:28 NOVT 2000 - * - PCI cards support - * 3.2.0 Mon Dec 13 22:26:53 NOVT 1999 - * - Completely rebuilt all the packet storage system - * - to work in Ethernet-like style. - * 3.1.1 just fixed some bugs (5 aug 1999) - * 3.1.0 added balancing feature (26 apr 1999) - * 3.0.1 just fixed some bugs (14 apr 1999). - * 3.0.0 Initial Revision, Yaroslav Polyakov (24 Feb 1999) - * - added pre-calculation for CRC, fixed bug with "len-2" frames, - * - removed outbound fragmentation (MTU=1000), written CRC-calculation - * - on asm, added work with hard_headers and now we have our own cache - * - for them, optionally supported word-interchange on some chipsets, - * - * Known problem: this driver wasn't tested on multiprocessor machine. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/ptrace.h> -#include <linux/fcntl.h> -#include <linux/ioport.h> -#include <linux/interrupt.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/pci.h> -#include <linux/skbuff.h> -#include <linux/timer.h> -#include <linux/init.h> -#include <linux/delay.h> - -#include <net/net_namespace.h> -#include <net/arp.h> -#include <net/Space.h> - -#include <asm/io.h> -#include <asm/types.h> -#include <asm/byteorder.h> -#include <asm/irq.h> -#include <linux/uaccess.h> - -#include "sbni.h" - -/* device private data */ - -struct net_local { - struct timer_list watchdog; - struct net_device *watchdog_dev; - - spinlock_t lock; - struct sk_buff *rx_buf_p; /* receive buffer ptr */ - struct sk_buff *tx_buf_p; /* transmit buffer ptr */ - - unsigned int framelen; /* current frame length */ - unsigned int maxframe; /* maximum valid frame length */ - unsigned int state; - unsigned int inppos, outpos; /* positions in rx/tx buffers */ - - /* transmitting frame number - from frames qty to 1 */ - unsigned int tx_frameno; - - /* expected number of next receiving frame */ - unsigned int wait_frameno; - - /* count of failed attempts to frame send - 32 attempts do before - error - while receiver tunes on opposite side of wire */ - unsigned int trans_errors; - - /* idle time; send pong when limit exceeded */ - unsigned int timer_ticks; - - /* fields used for receive level autoselection */ - int delta_rxl; - unsigned int cur_rxl_index, timeout_rxl; - unsigned long cur_rxl_rcvd, prev_rxl_rcvd; - - struct sbni_csr1 csr1; /* current value of CSR1 */ - struct sbni_in_stats in_stats; /* internal statistics */ - - struct net_device *second; /* for ISA/dual cards */ - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *master; - struct net_device *link; -#endif -}; - - -static int sbni_card_probe( unsigned long ); -static int sbni_pci_probe( struct net_device * ); -static struct net_device *sbni_probe1(struct net_device *, unsigned long, int); -static int sbni_open( struct net_device * ); -static int sbni_close( struct net_device * ); -static netdev_tx_t sbni_start_xmit(struct sk_buff *, - struct net_device * ); -static int sbni_siocdevprivate(struct net_device *, struct ifreq *, - void __user *, int); -static void set_multicast_list( struct net_device * ); - -static irqreturn_t sbni_interrupt( int, void * ); -static void handle_channel( struct net_device * ); -static int recv_frame( struct net_device * ); -static void send_frame( struct net_device * ); -static int upload_data( struct net_device *, - unsigned, unsigned, unsigned, u32 ); -static void download_data( struct net_device *, u32 * ); -static void sbni_watchdog(struct timer_list *); -static void interpret_ack( struct net_device *, unsigned ); -static int append_frame_to_pkt( struct net_device *, unsigned, u32 ); -static void indicate_pkt( struct net_device * ); -static void card_start( struct net_device * ); -static void prepare_to_send( struct sk_buff *, struct net_device * ); -static void drop_xmit_queue( struct net_device * ); -static void send_frame_header( struct net_device *, u32 * ); -static int skip_tail( unsigned int, unsigned int, u32 ); -static int check_fhdr( u32, u32 *, u32 *, u32 *, u32 *, u32 * ); -static void change_level( struct net_device * ); -static void timeout_change_level( struct net_device * ); -static u32 calc_crc32( u32, u8 *, u32 ); -static struct sk_buff * get_rx_buf( struct net_device * ); -static int sbni_init( struct net_device * ); - -#ifdef CONFIG_SBNI_MULTILINE -static int enslave( struct net_device *, struct net_device * ); -static int emancipate( struct net_device * ); -#endif - -static const char version[] = - "Granch SBNI12 driver ver 5.0.1 Jun 22 2001 Denis I.Timofeev.\n"; - -static bool skip_pci_probe __initdata = false; -static int scandone __initdata = 0; -static int num __initdata = 0; - -static unsigned char rxl_tab[]; -static u32 crc32tab[]; - -/* A list of all installed devices, for removing the driver module. */ -static struct net_device *sbni_cards[ SBNI_MAX_NUM_CARDS ]; - -/* Lists of device's parameters */ -static u32 io[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 irq[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 baud[ SBNI_MAX_NUM_CARDS ] __initdata; -static u32 rxl[ SBNI_MAX_NUM_CARDS ] __initdata = - { [0 ... SBNI_MAX_NUM_CARDS-1] = -1 }; -static u32 mac[ SBNI_MAX_NUM_CARDS ] __initdata; - -#ifndef MODULE -typedef u32 iarr[]; -static iarr *dest[5] __initdata = { &io, &irq, &baud, &rxl, &mac }; -#endif - -/* A zero-terminated list of I/O addresses to be probed on ISA bus */ -static unsigned int netcard_portlist[ ] __initdata = { - 0x210, 0x214, 0x220, 0x224, 0x230, 0x234, 0x240, 0x244, 0x250, 0x254, - 0x260, 0x264, 0x270, 0x274, 0x280, 0x284, 0x290, 0x294, 0x2a0, 0x2a4, - 0x2b0, 0x2b4, 0x2c0, 0x2c4, 0x2d0, 0x2d4, 0x2e0, 0x2e4, 0x2f0, 0x2f4, - 0 }; - -#define NET_LOCAL_LOCK(dev) (((struct net_local *)netdev_priv(dev))->lock) - -/* - * Look for SBNI card which addr stored in dev->base_addr, if nonzero. - * Otherwise, look through PCI bus. If none PCI-card was found, scan ISA. - */ - -static inline int __init -sbni_isa_probe( struct net_device *dev ) -{ - if( dev->base_addr > 0x1ff && - request_region( dev->base_addr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, dev->base_addr, dev->irq ) ) - - return 0; - else { - pr_err("base address 0x%lx is busy, or adapter is malfunctional!\n", - dev->base_addr); - return -ENODEV; - } -} - -static const struct net_device_ops sbni_netdev_ops = { - .ndo_open = sbni_open, - .ndo_stop = sbni_close, - .ndo_start_xmit = sbni_start_xmit, - .ndo_set_rx_mode = set_multicast_list, - .ndo_siocdevprivate = sbni_siocdevprivate, - .ndo_set_mac_address = eth_mac_addr, - .ndo_validate_addr = eth_validate_addr, -}; - -static void __init sbni_devsetup(struct net_device *dev) -{ - ether_setup( dev ); - dev->netdev_ops = &sbni_netdev_ops; -} - -int __init sbni_probe(int unit) -{ - struct net_device *dev; - int err; - - dev = alloc_netdev(sizeof(struct net_local), "sbni", - NET_NAME_UNKNOWN, sbni_devsetup); - if (!dev) - return -ENOMEM; - - dev->netdev_ops = &sbni_netdev_ops; - - sprintf(dev->name, "sbni%d", unit); - netdev_boot_setup_check(dev); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - return err; - } - - err = register_netdev(dev); - if (err) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev(dev); - return err; - } - pr_info_once("%s", version); - return 0; -} - -static int __init sbni_init(struct net_device *dev) -{ - int i; - if( dev->base_addr ) - return sbni_isa_probe( dev ); - /* otherwise we have to perform search our adapter */ - - if( io[ num ] != -1 ) { - dev->base_addr = io[ num ]; - dev->irq = irq[ num ]; - } else if( scandone || io[ 0 ] != -1 ) { - return -ENODEV; - } - - /* if io[ num ] contains non-zero address, then that is on ISA bus */ - if( dev->base_addr ) - return sbni_isa_probe( dev ); - - /* ...otherwise - scan PCI first */ - if( !skip_pci_probe && !sbni_pci_probe( dev ) ) - return 0; - - if( io[ num ] == -1 ) { - /* Auto-scan will be stopped when first ISA card were found */ - scandone = 1; - if( num > 0 ) - return -ENODEV; - } - - for( i = 0; netcard_portlist[ i ]; ++i ) { - int ioaddr = netcard_portlist[ i ]; - if( request_region( ioaddr, SBNI_IO_EXTENT, dev->name ) && - sbni_probe1( dev, ioaddr, 0 )) - return 0; - } - - return -ENODEV; -} - - -static int __init -sbni_pci_probe( struct net_device *dev ) -{ - struct pci_dev *pdev = NULL; - - while( (pdev = pci_get_class( PCI_CLASS_NETWORK_OTHER << 8, pdev )) - != NULL ) { - int pci_irq_line; - unsigned long pci_ioaddr; - - if( pdev->vendor != SBNI_PCI_VENDOR && - pdev->device != SBNI_PCI_DEVICE ) - continue; - - pci_ioaddr = pci_resource_start( pdev, 0 ); - pci_irq_line = pdev->irq; - - /* Avoid already found cards from previous calls */ - if( !request_region( pci_ioaddr, SBNI_IO_EXTENT, dev->name ) ) { - if (pdev->subsystem_device != 2) - continue; - - /* Dual adapter is present */ - if (!request_region(pci_ioaddr += 4, SBNI_IO_EXTENT, - dev->name ) ) - continue; - } - - if (pci_irq_line <= 0 || pci_irq_line >= nr_irqs) - pr_warn( -"WARNING: The PCI BIOS assigned this PCI card to IRQ %d, which is unlikely to work!.\n" -"You should use the PCI BIOS setup to assign a valid IRQ line.\n", - pci_irq_line ); - - /* avoiding re-enable dual adapters */ - if( (pci_ioaddr & 7) == 0 && pci_enable_device( pdev ) ) { - release_region( pci_ioaddr, SBNI_IO_EXTENT ); - pci_dev_put( pdev ); - return -EIO; - } - if( sbni_probe1( dev, pci_ioaddr, pci_irq_line ) ) { - SET_NETDEV_DEV(dev, &pdev->dev); - /* not the best thing to do, but this is all messed up - for hotplug systems anyway... */ - pci_dev_put( pdev ); - return 0; - } - } - return -ENODEV; -} - - -static struct net_device * __init -sbni_probe1( struct net_device *dev, unsigned long ioaddr, int irq ) -{ - struct net_local *nl; - - if( sbni_card_probe( ioaddr ) ) { - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - outb( 0, ioaddr + CSR0 ); - - if( irq < 2 ) { - unsigned long irq_mask; - - irq_mask = probe_irq_on(); - outb( EN_INT | TR_REQ, ioaddr + CSR0 ); - outb( PR_RES, ioaddr + CSR1 ); - mdelay(50); - irq = probe_irq_off(irq_mask); - outb( 0, ioaddr + CSR0 ); - - if( !irq ) { - pr_err("%s: can't detect device irq!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - } else if( irq == 2 ) - irq = 9; - - dev->irq = irq; - dev->base_addr = ioaddr; - - /* Fill in sbni-specific dev fields. */ - nl = netdev_priv(dev); - if( !nl ) { - pr_err("%s: unable to get memory!\n", dev->name); - release_region( ioaddr, SBNI_IO_EXTENT ); - return NULL; - } - - memset( nl, 0, sizeof(struct net_local) ); - spin_lock_init( &nl->lock ); - - /* store MAC address (generate if that isn't known) */ - *(__be16 *)dev->dev_addr = htons( 0x00ff ); - *(__be32 *)(dev->dev_addr + 2) = htonl( 0x01000000 | - ((mac[num] ? - mac[num] : - (u32)((long)netdev_priv(dev))) & 0x00ffffff)); - - /* store link settings (speed, receive level ) */ - nl->maxframe = DEFAULT_FRAME_LEN; - nl->csr1.rate = baud[ num ]; - - if( (nl->cur_rxl_index = rxl[ num ]) == -1 ) { - /* autotune rxl */ - nl->cur_rxl_index = DEF_RXL; - nl->delta_rxl = DEF_RXL_DELTA; - } else { - nl->delta_rxl = 0; - } - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - if( inb( ioaddr + CSR0 ) & 0x01 ) - nl->state |= FL_SLOW_MODE; - - pr_notice("%s: ioaddr %#lx, irq %d, MAC: 00:ff:01:%02x:%02x:%02x\n", - dev->name, dev->base_addr, dev->irq, - ((u8 *)dev->dev_addr)[3], - ((u8 *)dev->dev_addr)[4], - ((u8 *)dev->dev_addr)[5]); - - pr_notice("%s: speed %d", - dev->name, - ((nl->state & FL_SLOW_MODE) ? 500000 : 2000000) - / (1 << nl->csr1.rate)); - - if( nl->delta_rxl == 0 ) - pr_cont(", receive level 0x%x (fixed)\n", nl->cur_rxl_index); - else - pr_cont(", receive level (auto)\n"); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master = dev; - nl->link = NULL; -#endif - - sbni_cards[ num++ ] = dev; - return dev; -} - -/* -------------------------------------------------------------------------- */ - -#ifdef CONFIG_SBNI_MULTILINE - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_device *p; - - netif_stop_queue( dev ); - - /* Looking for idle device in the list */ - for( p = dev; p; ) { - struct net_local *nl = netdev_priv(p); - spin_lock( &nl->lock ); - if( nl->tx_buf_p || (nl->state & FL_LINE_DOWN) ) { - p = nl->link; - spin_unlock( &nl->lock ); - } else { - /* Idle dev is found */ - prepare_to_send( skb, p ); - spin_unlock( &nl->lock ); - netif_start_queue( dev ); - return NETDEV_TX_OK; - } - } - - return NETDEV_TX_BUSY; -} - -#else /* CONFIG_SBNI_MULTILINE */ - -static netdev_tx_t -sbni_start_xmit( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - netif_stop_queue( dev ); - spin_lock( &nl->lock ); - - prepare_to_send( skb, dev ); - - spin_unlock( &nl->lock ); - return NETDEV_TX_OK; -} - -#endif /* CONFIG_SBNI_MULTILINE */ - -/* -------------------------------------------------------------------------- */ - -/* interrupt handler */ - -/* - * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not - * be looked as two independent single-channel devices. Every channel seems - * as Ethernet interface but interrupt handler must be common. Really, first - * channel ("master") driver only registers the handler. In its struct net_local - * it has got pointer to "slave" channel's struct net_local and handles that's - * interrupts too. - * dev of successfully attached ISA SBNI boards is linked to list. - * While next board driver is initialized, it scans this list. If one - * has found dev with same irq and ioaddr different by 4 then it assumes - * this board to be "master". - */ - -static irqreturn_t -sbni_interrupt( int irq, void *dev_id ) -{ - struct net_device *dev = dev_id; - struct net_local *nl = netdev_priv(dev); - int repeat; - - spin_lock( &nl->lock ); - if( nl->second ) - spin_lock(&NET_LOCAL_LOCK(nl->second)); - - do { - repeat = 0; - if( inb( dev->base_addr + CSR0 ) & (RC_RDY | TR_RDY) ) { - handle_channel( dev ); - repeat = 1; - } - if( nl->second && /* second channel present */ - (inb( nl->second->base_addr+CSR0 ) & (RC_RDY | TR_RDY)) ) { - handle_channel( nl->second ); - repeat = 1; - } - } while( repeat ); - - if( nl->second ) - spin_unlock(&NET_LOCAL_LOCK(nl->second)); - spin_unlock( &nl->lock ); - return IRQ_HANDLED; -} - - -static void -handle_channel( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - int req_ans; - unsigned char csr0; - -#ifdef CONFIG_SBNI_MULTILINE - /* Lock the master device because we going to change its local data */ - if( nl->state & FL_SLAVE ) - spin_lock(&NET_LOCAL_LOCK(nl->master)); -#endif - - outb( (inb( ioaddr + CSR0 ) & ~EN_INT) | TR_REQ, ioaddr + CSR0 ); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - for(;;) { - csr0 = inb( ioaddr + CSR0 ); - if( ( csr0 & (RC_RDY | TR_RDY) ) == 0 ) - break; - - req_ans = !(nl->state & FL_PREV_OK); - - if( csr0 & RC_RDY ) - req_ans = recv_frame( dev ); - - /* - * TR_RDY always equals 1 here because we have owned the marker, - * and we set TR_REQ when disabled interrupts - */ - csr0 = inb( ioaddr + CSR0 ); - if( !(csr0 & TR_RDY) || (csr0 & RC_RDY) ) - netdev_err(dev, "internal error!\n"); - - /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ - if( req_ans || nl->tx_frameno != 0 ) - send_frame( dev ); - else - /* send marker without any data */ - outb( inb( ioaddr + CSR0 ) & ~TR_REQ, ioaddr + CSR0 ); - } - - outb( inb( ioaddr + CSR0 ) | EN_INT, ioaddr + CSR0 ); - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - spin_unlock(&NET_LOCAL_LOCK(nl->master)); -#endif -} - - -/* - * Routine returns 1 if it needs to acknowledge received frame. - * Empty frame received without errors won't be acknowledged. - */ - -static int -recv_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - unsigned long ioaddr = dev->base_addr; - - u32 crc = CRC32_INITIAL; - - unsigned framelen = 0, frameno, ack; - unsigned is_first, frame_ok = 0; - - if( check_fhdr( ioaddr, &framelen, &frameno, &ack, &is_first, &crc ) ) { - frame_ok = framelen > 4 - ? upload_data( dev, framelen, frameno, is_first, crc ) - : skip_tail( ioaddr, framelen, crc ); - if( frame_ok ) - interpret_ack( dev, ack ); - } - - outb( inb( ioaddr + CSR0 ) ^ CT_ZER, ioaddr + CSR0 ); - if( frame_ok ) { - nl->state |= FL_PREV_OK; - if( framelen > 4 ) - nl->in_stats.all_rx_number++; - } else { - nl->state &= ~FL_PREV_OK; - change_level( dev ); - nl->in_stats.all_rx_number++; - nl->in_stats.bad_rx_number++; - } - - return !frame_ok || framelen > 4; -} - - -static void -send_frame( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = CRC32_INITIAL; - - if( nl->state & FL_NEED_RESEND ) { - - /* if frame was sended but not ACK'ed - resend it */ - if( nl->trans_errors ) { - --nl->trans_errors; - if( nl->framelen != 0 ) - nl->in_stats.resend_tx_number++; - } else { - /* cannot xmit with many attempts */ -#ifdef CONFIG_SBNI_MULTILINE - if( (nl->state & FL_SLAVE) || nl->link ) -#endif - nl->state |= FL_LINE_DOWN; - drop_xmit_queue( dev ); - goto do_send; - } - } else - nl->trans_errors = TR_ERROR_COUNT; - - send_frame_header( dev, &crc ); - nl->state |= FL_NEED_RESEND; - /* - * FL_NEED_RESEND will be cleared after ACK, but if empty - * frame sended then in prepare_to_send next frame - */ - - - if( nl->framelen ) { - download_data( dev, &crc ); - nl->in_stats.all_tx_number++; - nl->state |= FL_WAIT_ACK; - } - - outsb( dev->base_addr + DAT, (u8 *)&crc, sizeof crc ); - -do_send: - outb( inb( dev->base_addr + CSR0 ) & ~TR_REQ, dev->base_addr + CSR0 ); - - if( nl->tx_frameno ) - /* next frame exists - we request card to send it */ - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, - dev->base_addr + CSR0 ); -} - - -/* - * Write the frame data into adapter's buffer memory, and calculate CRC. - * Do padding if necessary. - */ - -static void -download_data( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->tx_buf_p; - - unsigned len = min_t(unsigned int, skb->len - nl->outpos, nl->framelen); - - outsb( dev->base_addr + DAT, skb->data + nl->outpos, len ); - *crc_p = calc_crc32( *crc_p, skb->data + nl->outpos, len ); - - /* if packet too short we should write some more bytes to pad */ - for( len = nl->framelen - len; len--; ) { - outb( 0, dev->base_addr + DAT ); - *crc_p = CRC32( 0, *crc_p ); - } -} - - -static int -upload_data( struct net_device *dev, unsigned framelen, unsigned frameno, - unsigned is_first, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - int frame_ok; - - if( is_first ) { - nl->wait_frameno = frameno; - nl->inppos = 0; - } - - if( nl->wait_frameno == frameno ) { - - if( nl->inppos + framelen <= ETHER_MAX_LEN ) - frame_ok = append_frame_to_pkt( dev, framelen, crc ); - - /* - * if CRC is right but framelen incorrect then transmitter - * error was occurred... drop entire packet - */ - else if( (frame_ok = skip_tail( dev->base_addr, framelen, crc )) - != 0 ) { - nl->wait_frameno = 0; - nl->inppos = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_missed_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_missed_errors++; -#endif - } - /* now skip all frames until is_first != 0 */ - } else - frame_ok = skip_tail( dev->base_addr, framelen, crc ); - - if( is_first && !frame_ok ) { - /* - * Frame has been broken, but we had already stored - * is_first... Drop entire packet. - */ - nl->wait_frameno = 0; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.rx_errors++; - nl->master->stats.rx_crc_errors++; -#else - dev->stats.rx_errors++; - dev->stats.rx_crc_errors++; -#endif - } - - return frame_ok; -} - - -static inline void -send_complete( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_packets++; - nl->master->stats.tx_bytes += nl->tx_buf_p->len; -#else - dev->stats.tx_packets++; - dev->stats.tx_bytes += nl->tx_buf_p->len; -#endif - dev_consume_skb_irq(nl->tx_buf_p); - - nl->tx_buf_p = NULL; - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->framelen = 0; -} - - -static void -interpret_ack( struct net_device *dev, unsigned ack ) -{ - struct net_local *nl = netdev_priv(dev); - - if( ack == FRAME_SENT_OK ) { - nl->state &= ~FL_NEED_RESEND; - - if( nl->state & FL_WAIT_ACK ) { - nl->outpos += nl->framelen; - - if( --nl->tx_frameno ) { - nl->framelen = min_t(unsigned int, - nl->maxframe, - nl->tx_buf_p->len - nl->outpos); - } else { - send_complete( dev ); -#ifdef CONFIG_SBNI_MULTILINE - netif_wake_queue( nl->master ); -#else - netif_wake_queue( dev ); -#endif - } - } - } - - nl->state &= ~FL_WAIT_ACK; -} - - -/* - * Glue received frame with previous fragments of packet. - * Indicate packet when last frame would be accepted. - */ - -static int -append_frame_to_pkt( struct net_device *dev, unsigned framelen, u32 crc ) -{ - struct net_local *nl = netdev_priv(dev); - - u8 *p; - - if( nl->inppos + framelen > ETHER_MAX_LEN ) - return 0; - - if( !nl->rx_buf_p && !(nl->rx_buf_p = get_rx_buf( dev )) ) - return 0; - - p = nl->rx_buf_p->data + nl->inppos; - insb( dev->base_addr + DAT, p, framelen ); - if( calc_crc32( crc, p, framelen ) != CRC32_REMAINDER ) - return 0; - - nl->inppos += framelen - 4; - if( --nl->wait_frameno == 0 ) /* last frame received */ - indicate_pkt( dev ); - - return 1; -} - - -/* - * Prepare to start output on adapter. - * Transmitter will be actually activated when marker is accepted. - */ - -static void -prepare_to_send( struct sk_buff *skb, struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - unsigned int len; - - /* nl->tx_buf_p == NULL here! */ - if( nl->tx_buf_p ) - netdev_err(dev, "memory leak!\n"); - - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - - len = skb->len; - if( len < SBNI_MIN_LEN ) - len = SBNI_MIN_LEN; - - nl->tx_buf_p = skb; - nl->tx_frameno = DIV_ROUND_UP(len, nl->maxframe); - nl->framelen = len < nl->maxframe ? len : nl->maxframe; - - outb( inb( dev->base_addr + CSR0 ) | TR_REQ, dev->base_addr + CSR0 ); -#ifdef CONFIG_SBNI_MULTILINE - netif_trans_update(nl->master); -#else - netif_trans_update(dev); -#endif -} - - -static void -drop_xmit_queue( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->tx_buf_p ) { - dev_kfree_skb_any( nl->tx_buf_p ); - nl->tx_buf_p = NULL; -#ifdef CONFIG_SBNI_MULTILINE - nl->master->stats.tx_errors++; - nl->master->stats.tx_carrier_errors++; -#else - dev->stats.tx_errors++; - dev->stats.tx_carrier_errors++; -#endif - } - - nl->tx_frameno = 0; - nl->framelen = 0; - nl->outpos = 0; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); -#ifdef CONFIG_SBNI_MULTILINE - netif_start_queue( nl->master ); - netif_trans_update(nl->master); -#else - netif_start_queue( dev ); - netif_trans_update(dev); -#endif -} - - -static void -send_frame_header( struct net_device *dev, u32 *crc_p ) -{ - struct net_local *nl = netdev_priv(dev); - - u32 crc = *crc_p; - u32 len_field = nl->framelen + 6; /* CRC + frameno + reserved */ - u8 value; - - if( nl->state & FL_NEED_RESEND ) - len_field |= FRAME_RETRY; /* non-first attempt... */ - - if( nl->outpos == 0 ) - len_field |= FRAME_FIRST; - - len_field |= (nl->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; - outb( SBNI_SIG, dev->base_addr + DAT ); - - value = (u8) len_field; - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - value = (u8) (len_field >> 8); - outb( value, dev->base_addr + DAT ); - crc = CRC32( value, crc ); - - outb( nl->tx_frameno, dev->base_addr + DAT ); - crc = CRC32( nl->tx_frameno, crc ); - outb( 0, dev->base_addr + DAT ); - crc = CRC32( 0, crc ); - *crc_p = crc; -} - - -/* - * if frame tail not needed (incorrect number or received twice), - * it won't store, but CRC will be calculated - */ - -static int -skip_tail( unsigned int ioaddr, unsigned int tail_len, u32 crc ) -{ - while( tail_len-- ) - crc = CRC32( inb( ioaddr + DAT ), crc ); - - return crc == CRC32_REMAINDER; -} - - -/* - * Preliminary checks if frame header is correct, calculates its CRC - * and split it to simple fields - */ - -static int -check_fhdr( u32 ioaddr, u32 *framelen, u32 *frameno, u32 *ack, - u32 *is_first, u32 *crc_p ) -{ - u32 crc = *crc_p; - u8 value; - - if( inb( ioaddr + DAT ) != SBNI_SIG ) - return 0; - - value = inb( ioaddr + DAT ); - *framelen = (u32)value; - crc = CRC32( value, crc ); - value = inb( ioaddr + DAT ); - *framelen |= ((u32)value) << 8; - crc = CRC32( value, crc ); - - *ack = *framelen & FRAME_ACK_MASK; - *is_first = (*framelen & FRAME_FIRST) != 0; - - if( (*framelen &= FRAME_LEN_MASK) < 6 || - *framelen > SBNI_MAX_FRAME - 3 ) - return 0; - - value = inb( ioaddr + DAT ); - *frameno = (u32)value; - crc = CRC32( value, crc ); - - crc = CRC32( inb( ioaddr + DAT ), crc ); /* reserved byte */ - *framelen -= 2; - - *crc_p = crc; - return 1; -} - - -static struct sk_buff * -get_rx_buf( struct net_device *dev ) -{ - /* +2 is to compensate for the alignment fixup below */ - struct sk_buff *skb = dev_alloc_skb( ETHER_MAX_LEN + 2 ); - if( !skb ) - return NULL; - - skb_reserve( skb, 2 ); /* Align IP on longword boundaries */ - return skb; -} - - -static void -indicate_pkt( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct sk_buff *skb = nl->rx_buf_p; - - skb_put( skb, nl->inppos ); - -#ifdef CONFIG_SBNI_MULTILINE - skb->protocol = eth_type_trans( skb, nl->master ); - netif_rx( skb ); - ++nl->master->stats.rx_packets; - nl->master->stats.rx_bytes += nl->inppos; -#else - skb->protocol = eth_type_trans( skb, dev ); - netif_rx( skb ); - ++dev->stats.rx_packets; - dev->stats.rx_bytes += nl->inppos; -#endif - nl->rx_buf_p = NULL; /* protocol driver will clear this sk_buff */ -} - - -/* -------------------------------------------------------------------------- */ - -/* - * Routine checks periodically wire activity and regenerates marker if - * connect was inactive for a long time. - */ - -static void -sbni_watchdog(struct timer_list *t) -{ - struct net_local *nl = from_timer(nl, t, watchdog); - struct net_device *dev = nl->watchdog_dev; - unsigned long flags; - unsigned char csr0; - - spin_lock_irqsave( &nl->lock, flags ); - - csr0 = inb( dev->base_addr + CSR0 ); - if( csr0 & RC_CHK ) { - - if( nl->timer_ticks ) { - if( csr0 & (RC_RDY | BU_EMP) ) - /* receiving not active */ - nl->timer_ticks--; - } else { - nl->in_stats.timeout_number++; - if( nl->delta_rxl ) - timeout_change_level( dev ); - - outb( *(u_char *)&nl->csr1 | PR_RES, - dev->base_addr + CSR1 ); - csr0 = inb( dev->base_addr + CSR0 ); - } - } else - nl->state &= ~FL_LINE_DOWN; - - outb( csr0 | RC_CHK, dev->base_addr + CSR0 ); - - mod_timer(t, jiffies + SBNI_TIMEOUT); - - spin_unlock_irqrestore( &nl->lock, flags ); -} - - -static unsigned char rxl_tab[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, - 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f -}; - -#define SIZE_OF_TIMEOUT_RXL_TAB 4 -static unsigned char timeout_rxl_tab[] = { - 0x03, 0x05, 0x08, 0x0b -}; - -/* -------------------------------------------------------------------------- */ - -static void -card_start( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->timer_ticks = CHANGE_LEVEL_START_TICKS; - nl->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); - nl->state |= FL_PREV_OK; - - nl->inppos = nl->outpos = 0; - nl->wait_frameno = 0; - nl->tx_frameno = 0; - nl->framelen = 0; - - outb( *(u_char *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - outb( EN_INT, dev->base_addr + CSR0 ); -} - -/* -------------------------------------------------------------------------- */ - -/* Receive level auto-selection */ - -static void -change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->delta_rxl == 0 ) /* do not auto-negotiate RxL */ - return; - - if( nl->cur_rxl_index == 0 ) - nl->delta_rxl = 1; - else if( nl->cur_rxl_index == 15 ) - nl->delta_rxl = -1; - else if( nl->cur_rxl_rcvd < nl->prev_rxl_rcvd ) - nl->delta_rxl = -nl->delta_rxl; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index += nl->delta_rxl ]; - inb( dev->base_addr + CSR0 ); /* needs for PCI cards */ - outb( *(u8 *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - - -static void -timeout_change_level( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - nl->cur_rxl_index = timeout_rxl_tab[ nl->timeout_rxl ]; - if( ++nl->timeout_rxl >= 4 ) - nl->timeout_rxl = 0; - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - inb( dev->base_addr + CSR0 ); - outb( *(unsigned char *)&nl->csr1, dev->base_addr + CSR1 ); - - nl->prev_rxl_rcvd = nl->cur_rxl_rcvd; - nl->cur_rxl_rcvd = 0; -} - -/* -------------------------------------------------------------------------- */ - -/* - * Open/initialize the board. - */ - -static int -sbni_open( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct timer_list *w = &nl->watchdog; - - /* - * For double ISA adapters within "common irq" mode, we have to - * determine whether primary or secondary channel is initialized, - * and set the irq handler only in first case. - */ - if( dev->base_addr < 0x400 ) { /* ISA only */ - struct net_device **p = sbni_cards; - for( ; *p && p < sbni_cards + SBNI_MAX_NUM_CARDS; ++p ) - if( (*p)->irq == dev->irq && - ((*p)->base_addr == dev->base_addr + 4 || - (*p)->base_addr == dev->base_addr - 4) && - (*p)->flags & IFF_UP ) { - - ((struct net_local *) (netdev_priv(*p))) - ->second = dev; - netdev_notice(dev, "using shared irq with %s\n", - (*p)->name); - nl->state |= FL_SECONDARY; - goto handler_attached; - } - } - - if( request_irq(dev->irq, sbni_interrupt, IRQF_SHARED, dev->name, dev) ) { - netdev_err(dev, "unable to get IRQ %d\n", dev->irq); - return -EAGAIN; - } - -handler_attached: - - spin_lock( &nl->lock ); - memset( &dev->stats, 0, sizeof(struct net_device_stats) ); - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - - card_start( dev ); - - netif_start_queue( dev ); - - /* set timer watchdog */ - nl->watchdog_dev = dev; - timer_setup(w, sbni_watchdog, 0); - w->expires = jiffies + SBNI_TIMEOUT; - add_timer( w ); - - spin_unlock( &nl->lock ); - return 0; -} - - -static int -sbni_close( struct net_device *dev ) -{ - struct net_local *nl = netdev_priv(dev); - - if( nl->second && nl->second->flags & IFF_UP ) { - netdev_notice(dev, "Secondary channel (%s) is active!\n", - nl->second->name); - return -EBUSY; - } - -#ifdef CONFIG_SBNI_MULTILINE - if( nl->state & FL_SLAVE ) - emancipate( dev ); - else - while( nl->link ) /* it's master device! */ - emancipate( nl->link ); -#endif - - spin_lock( &nl->lock ); - - nl->second = NULL; - drop_xmit_queue( dev ); - netif_stop_queue( dev ); - - del_timer( &nl->watchdog ); - - outb( 0, dev->base_addr + CSR0 ); - - if( !(nl->state & FL_SECONDARY) ) - free_irq( dev->irq, dev ); - nl->state &= FL_SECONDARY; - - spin_unlock( &nl->lock ); - return 0; -} - - -/* - Valid combinations in CSR0 (for probing): - - VALID_DECODER 0000,0011,1011,1010 - - ; 0 ; - - TR_REQ ; 1 ; + - TR_RDY ; 2 ; - - TR_RDY TR_REQ ; 3 ; + - BU_EMP ; 4 ; + - BU_EMP TR_REQ ; 5 ; + - BU_EMP TR_RDY ; 6 ; - - BU_EMP TR_RDY TR_REQ ; 7 ; + - RC_RDY ; 8 ; + - RC_RDY TR_REQ ; 9 ; + - RC_RDY TR_RDY ; 10 ; - - RC_RDY TR_RDY TR_REQ ; 11 ; - - RC_RDY BU_EMP ; 12 ; - - RC_RDY BU_EMP TR_REQ ; 13 ; - - RC_RDY BU_EMP TR_RDY ; 14 ; - - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - -*/ - -#define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) - - -static int -sbni_card_probe( unsigned long ioaddr ) -{ - unsigned char csr0; - - csr0 = inb( ioaddr + CSR0 ); - if( csr0 != 0xff && csr0 != 0x00 ) { - csr0 &= ~EN_INT; - if( csr0 & BU_EMP ) - csr0 |= EN_INT; - - if( VALID_DECODER & (1 << (csr0 >> 4)) ) - return 0; - } - - return -ENODEV; -} - -/* -------------------------------------------------------------------------- */ - -static int -sbni_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) -{ - struct net_local *nl = netdev_priv(dev); - struct sbni_flags flags; - int error = 0; - -#ifdef CONFIG_SBNI_MULTILINE - struct net_device *slave_dev; - char slave_name[ 8 ]; -#endif - - switch( cmd ) { - case SIOCDEVGETINSTATS : - if (copy_to_user(data, &nl->in_stats, - sizeof(struct sbni_in_stats))) - error = -EFAULT; - break; - - case SIOCDEVRESINSTATS : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - memset( &nl->in_stats, 0, sizeof(struct sbni_in_stats) ); - break; - - case SIOCDEVGHWSTATE : - flags.mac_addr = *(u32 *)(dev->dev_addr + 3); - flags.rate = nl->csr1.rate; - flags.slow_mode = (nl->state & FL_SLOW_MODE) != 0; - flags.rxl = nl->cur_rxl_index; - flags.fixed_rxl = nl->delta_rxl == 0; - - if (copy_to_user(data, &flags, sizeof(flags))) - error = -EFAULT; - break; - - case SIOCDEVSHWSTATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - spin_lock( &nl->lock ); - flags = *(struct sbni_flags*) &ifr->ifr_ifru; - if( flags.fixed_rxl ) { - nl->delta_rxl = 0; - nl->cur_rxl_index = flags.rxl; - } else { - nl->delta_rxl = DEF_RXL_DELTA; - nl->cur_rxl_index = DEF_RXL; - } - - nl->csr1.rxl = rxl_tab[ nl->cur_rxl_index ]; - nl->csr1.rate = flags.rate; - outb( *(u8 *)&nl->csr1 | PR_RES, dev->base_addr + CSR1 ); - spin_unlock( &nl->lock ); - break; - -#ifdef CONFIG_SBNI_MULTILINE - - case SIOCDEVENSLAVE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (copy_from_user(slave_name, data, sizeof(slave_name))) - return -EFAULT; - slave_dev = dev_get_by_name(&init_net, slave_name ); - if( !slave_dev || !(slave_dev->flags & IFF_UP) ) { - netdev_err(dev, "trying to enslave non-active device %s\n", - slave_name); - if (slave_dev) - dev_put(slave_dev); - return -EPERM; - } - - return enslave( dev, slave_dev ); - - case SIOCDEVEMANSIPATE : - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - return emancipate( dev ); - -#endif /* CONFIG_SBNI_MULTILINE */ - - default : - return -EOPNOTSUPP; - } - - return error; -} - - -#ifdef CONFIG_SBNI_MULTILINE - -static int -enslave( struct net_device *dev, struct net_device *slave_dev ) -{ - struct net_local *nl = netdev_priv(dev); - struct net_local *snl = netdev_priv(slave_dev); - - if( nl->state & FL_SLAVE ) /* This isn't master or free device */ - return -EBUSY; - - if( snl->state & FL_SLAVE ) /* That was already enslaved */ - return -EBUSY; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - - /* append to list */ - snl->link = nl->link; - nl->link = slave_dev; - snl->master = dev; - snl->state |= FL_SLAVE; - - /* Summary statistics of MultiLine operation will be stored - in master's counters */ - memset( &slave_dev->stats, 0, sizeof(struct net_device_stats) ); - netif_stop_queue( slave_dev ); - netif_wake_queue( dev ); /* Now we are able to transmit */ - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - netdev_notice(dev, "slave device (%s) attached\n", slave_dev->name); - return 0; -} - - -static int -emancipate( struct net_device *dev ) -{ - struct net_local *snl = netdev_priv(dev); - struct net_device *p = snl->master; - struct net_local *nl = netdev_priv(p); - - if( !(snl->state & FL_SLAVE) ) - return -EINVAL; - - spin_lock( &nl->lock ); - spin_lock( &snl->lock ); - drop_xmit_queue( dev ); - - /* exclude from list */ - for(;;) { /* must be in list */ - struct net_local *t = netdev_priv(p); - if( t->link == dev ) { - t->link = snl->link; - break; - } - p = t->link; - } - - snl->link = NULL; - snl->master = dev; - snl->state &= ~FL_SLAVE; - - netif_start_queue( dev ); - - spin_unlock( &snl->lock ); - spin_unlock( &nl->lock ); - - dev_put( dev ); - return 0; -} - -#endif - -static void -set_multicast_list( struct net_device *dev ) -{ - return; /* sbni always operate in promiscuos mode */ -} - - -#ifdef MODULE -module_param_hw_array(io, int, ioport, NULL, 0); -module_param_hw_array(irq, int, irq, NULL, 0); -module_param_array(baud, int, NULL, 0); -module_param_array(rxl, int, NULL, 0); -module_param_array(mac, int, NULL, 0); -module_param(skip_pci_probe, bool, 0); - -MODULE_LICENSE("GPL"); - - -int __init init_module( void ) -{ - struct net_device *dev; - int err; - - while( num < SBNI_MAX_NUM_CARDS ) { - dev = alloc_netdev(sizeof(struct net_local), "sbni%d", - NET_NAME_UNKNOWN, sbni_devsetup); - if( !dev) - break; - - sprintf( dev->name, "sbni%d", num ); - - err = sbni_init(dev); - if (err) { - free_netdev(dev); - break; - } - - if( register_netdev( dev ) ) { - release_region( dev->base_addr, SBNI_IO_EXTENT ); - free_netdev( dev ); - break; - } - } - - return *sbni_cards ? 0 : -ENODEV; -} - -void -cleanup_module(void) -{ - int i; - - for (i = 0; i < SBNI_MAX_NUM_CARDS; ++i) { - struct net_device *dev = sbni_cards[i]; - if (dev != NULL) { - unregister_netdev(dev); - release_region(dev->base_addr, SBNI_IO_EXTENT); - free_netdev(dev); - } - } -} - -#else /* MODULE */ - -static int __init -sbni_setup( char *p ) -{ - int n, parm; - - if( *p++ != '(' ) - goto bad_param; - - for( n = 0, parm = 0; *p && n < 8; ) { - (*dest[ parm ])[ n ] = simple_strtoul( p, &p, 0 ); - if( !*p || *p == ')' ) - return 1; - if( *p == ';' ) { - ++p; - ++n; - parm = 0; - } else if( *p++ != ',' ) { - break; - } else { - if( ++parm >= 5 ) - break; - } - } -bad_param: - pr_err("Error in sbni kernel parameter!\n"); - return 0; -} - -__setup( "sbni=", sbni_setup ); - -#endif /* MODULE */ - -/* -------------------------------------------------------------------------- */ - -static u32 -calc_crc32( u32 crc, u8 *p, u32 len ) -{ - while( len-- ) - crc = CRC32( *p++, crc ); - - return crc; -} - -static u32 crc32tab[] __attribute__ ((aligned(8))) = { - 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, - 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, - 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, - 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, - 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, - 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, - 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, - 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, - 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, - 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, - 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, - 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, - 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, - 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, - 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, - 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, - 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, - 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, - 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, - 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, - 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, - 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, - 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, - 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, - 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, - 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, - 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, - 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, - 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, - 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, - 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, - 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, - 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, - 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, - 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, - 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, - 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, - 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, - 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, - 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, - 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, - 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, - 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, - 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, - 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, - 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, - 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, - 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, - 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, - 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, - 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, - 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, - 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, - 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, - 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, - 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, - 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, - 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, - 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, - 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, - 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, - 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, - 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, - 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 -}; - diff --git a/drivers/net/wan/sbni.h b/drivers/net/wan/sbni.h deleted file mode 100644 index 84264510a8ed..000000000000 --- a/drivers/net/wan/sbni.h +++ /dev/null @@ -1,147 +0,0 @@ -/* sbni.h: definitions for a Granch SBNI12 driver, version 5.0.0 - * Written 2001 Denis I.Timofeev (timofeev@granch.ru) - * This file is distributed under the GNU GPL - */ - -#ifndef SBNI_H -#define SBNI_H - -#ifdef SBNI_DEBUG -#define DP( A ) A -#else -#define DP( A ) -#endif - - -/* We don't have official vendor id yet... */ -#define SBNI_PCI_VENDOR 0x55 -#define SBNI_PCI_DEVICE 0x9f - -#define ISA_MODE 0x00 -#define PCI_MODE 0x01 - -#define SBNI_IO_EXTENT 4 - -enum sbni_reg { - CSR0 = 0, - CSR1 = 1, - DAT = 2 -}; - -/* CSR0 mapping */ -enum { - BU_EMP = 0x02, - RC_CHK = 0x04, - CT_ZER = 0x08, - TR_REQ = 0x10, - TR_RDY = 0x20, - EN_INT = 0x40, - RC_RDY = 0x80 -}; - - -/* CSR1 mapping */ -#define PR_RES 0x80 - -struct sbni_csr1 { -#ifdef __LITTLE_ENDIAN_BITFIELD - u8 rxl : 5; - u8 rate : 2; - u8 : 1; -#else - u8 : 1; - u8 rate : 2; - u8 rxl : 5; -#endif -}; - -/* fields in frame header */ -#define FRAME_ACK_MASK (unsigned short)0x7000 -#define FRAME_LEN_MASK (unsigned short)0x03FF -#define FRAME_FIRST (unsigned short)0x8000 -#define FRAME_RETRY (unsigned short)0x0800 - -#define FRAME_SENT_BAD (unsigned short)0x4000 -#define FRAME_SENT_OK (unsigned short)0x3000 - - -/* state flags */ -enum { - FL_WAIT_ACK = 0x01, - FL_NEED_RESEND = 0x02, - FL_PREV_OK = 0x04, - FL_SLOW_MODE = 0x08, - FL_SECONDARY = 0x10, -#ifdef CONFIG_SBNI_MULTILINE - FL_SLAVE = 0x20, -#endif - FL_LINE_DOWN = 0x40 -}; - - -enum { - DEFAULT_IOBASEADDR = 0x210, - DEFAULT_INTERRUPTNUMBER = 5, - DEFAULT_RATE = 0, - DEFAULT_FRAME_LEN = 1012 -}; - -#define DEF_RXL_DELTA -1 -#define DEF_RXL 0xf - -#define SBNI_SIG 0x5a - -#define SBNI_MIN_LEN 60 /* Shortest Ethernet frame without FCS */ -#define SBNI_MAX_FRAME 1023 -#define ETHER_MAX_LEN 1518 - -#define SBNI_TIMEOUT (HZ/10) - -#define TR_ERROR_COUNT 32 -#define CHANGE_LEVEL_START_TICKS 4 - -#define SBNI_MAX_NUM_CARDS 16 - -/* internal SBNI-specific statistics */ -struct sbni_in_stats { - u32 all_rx_number; - u32 bad_rx_number; - u32 timeout_number; - u32 all_tx_number; - u32 resend_tx_number; -}; - -/* SBNI ioctl params */ -#define SIOCDEVGETINSTATS SIOCDEVPRIVATE -#define SIOCDEVRESINSTATS SIOCDEVPRIVATE+1 -#define SIOCDEVGHWSTATE SIOCDEVPRIVATE+2 -#define SIOCDEVSHWSTATE SIOCDEVPRIVATE+3 -#define SIOCDEVENSLAVE SIOCDEVPRIVATE+4 -#define SIOCDEVEMANSIPATE SIOCDEVPRIVATE+5 - - -/* data packet for SIOCDEVGHWSTATE/SIOCDEVSHWSTATE ioctl requests */ -struct sbni_flags { - u32 rxl : 4; - u32 rate : 2; - u32 fixed_rxl : 1; - u32 slow_mode : 1; - u32 mac_addr : 24; -}; - -/* - * CRC-32 stuff - */ -#define CRC32(c,crc) (crc32tab[((size_t)(crc) ^ (c)) & 0xff] ^ (((crc) >> 8) & 0x00FFFFFF)) - /* CRC generator 0xEDB88320 */ - /* CRC remainder 0x2144DF1C */ - /* CRC initial value 0x00000000 */ -#define CRC32_REMAINDER 0x2144DF1C -#define CRC32_INITIAL 0x00000000 - -#ifndef __initdata -#define __initdata -#endif - -#endif - diff --git a/drivers/net/wireless/virt_wifi.c b/drivers/net/wireless/virt_wifi.c index 1df959532c7d..514f2c1124b6 100644 --- a/drivers/net/wireless/virt_wifi.c +++ b/drivers/net/wireless/virt_wifi.c @@ -136,6 +136,29 @@ static struct ieee80211_supported_band band_5ghz = { /* Assigned at module init. Guaranteed locally-administered and unicast. */ static u8 fake_router_bssid[ETH_ALEN] __ro_after_init = {}; +static void virt_wifi_inform_bss(struct wiphy *wiphy) +{ + u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); + struct cfg80211_bss *informed_bss; + static const struct { + u8 tag; + u8 len; + u8 ssid[8]; + } __packed ssid = { + .tag = WLAN_EID_SSID, + .len = 8, + .ssid = "VirtWifi", + }; + + informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, + CFG80211_BSS_FTYPE_PRESP, + fake_router_bssid, tsf, + WLAN_CAPABILITY_ESS, 0, + (void *)&ssid, sizeof(ssid), + DBM_TO_MBM(-50), GFP_KERNEL); + cfg80211_put_bss(wiphy, informed_bss); +} + /* Called with the rtnl lock held. */ static int virt_wifi_scan(struct wiphy *wiphy, struct cfg80211_scan_request *request) @@ -156,28 +179,13 @@ static int virt_wifi_scan(struct wiphy *wiphy, /* Acquires and releases the rdev BSS lock. */ static void virt_wifi_scan_result(struct work_struct *work) { - struct { - u8 tag; - u8 len; - u8 ssid[8]; - } __packed ssid = { - .tag = WLAN_EID_SSID, .len = 8, .ssid = "VirtWifi", - }; - struct cfg80211_bss *informed_bss; struct virt_wifi_wiphy_priv *priv = container_of(work, struct virt_wifi_wiphy_priv, scan_result.work); struct wiphy *wiphy = priv_to_wiphy(priv); struct cfg80211_scan_info scan_info = { .aborted = false }; - u64 tsf = div_u64(ktime_get_boottime_ns(), 1000); - informed_bss = cfg80211_inform_bss(wiphy, &channel_5ghz, - CFG80211_BSS_FTYPE_PRESP, - fake_router_bssid, tsf, - WLAN_CAPABILITY_ESS, 0, - (void *)&ssid, sizeof(ssid), - DBM_TO_MBM(-50), GFP_KERNEL); - cfg80211_put_bss(wiphy, informed_bss); + virt_wifi_inform_bss(wiphy); /* Schedules work which acquires and releases the rtnl lock. */ cfg80211_scan_done(priv->scan_request, &scan_info); @@ -225,10 +233,12 @@ static int virt_wifi_connect(struct wiphy *wiphy, struct net_device *netdev, if (!could_schedule) return -EBUSY; - if (sme->bssid) + if (sme->bssid) { ether_addr_copy(priv->connect_requested_bss, sme->bssid); - else + } else { + virt_wifi_inform_bss(wiphy); eth_zero_addr(priv->connect_requested_bss); + } wiphy_debug(wiphy, "connect\n"); @@ -241,11 +251,13 @@ static void virt_wifi_connect_complete(struct work_struct *work) struct virt_wifi_netdev_priv *priv = container_of(work, struct virt_wifi_netdev_priv, connect.work); u8 *requested_bss = priv->connect_requested_bss; - bool has_addr = !is_zero_ether_addr(requested_bss); bool right_addr = ether_addr_equal(requested_bss, fake_router_bssid); u16 status = WLAN_STATUS_SUCCESS; - if (!priv->is_up || (has_addr && !right_addr)) + if (is_zero_ether_addr(requested_bss)) + requested_bss = NULL; + + if (!priv->is_up || (requested_bss && !right_addr)) status = WLAN_STATUS_UNSPECIFIED_FAILURE; else priv->is_connected = true; diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig index de9384326bc8..77dbfc418bce 100644 --- a/drivers/net/wwan/Kconfig +++ b/drivers/net/wwan/Kconfig @@ -38,6 +38,18 @@ config MHI_WWAN_CTRL To compile this driver as a module, choose M here: the module will be called mhi_wwan_ctrl. +config MHI_WWAN_MBIM + tristate "MHI WWAN MBIM network driver for QCOM-based PCIe modems" + depends on MHI_BUS + help + MHI WWAN MBIM is a WWAN network driver for QCOM-based PCIe modems. + It implements MBIM over MHI, for IP data aggregation and muxing. + A default wwan0 network interface is created for MBIM data session + ID 0. Additional links can be created via wwan rtnetlink type. + + To compile this driver as a module, choose M here: the module will be + called mhi_wwan_mbim. + config RPMSG_WWAN_CTRL tristate "RPMSG WWAN control driver" depends on RPMSG diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile index d90ac33abaef..fe51feedac21 100644 --- a/drivers/net/wwan/Makefile +++ b/drivers/net/wwan/Makefile @@ -9,5 +9,6 @@ wwan-objs += wwan_core.o obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o +obj-$(CONFIG_MHI_WWAN_MBIM) += mhi_wwan_mbim.o obj-$(CONFIG_RPMSG_WWAN_CTRL) += rpmsg_wwan_ctrl.o obj-$(CONFIG_IOSM) += iosm/ diff --git a/drivers/net/wwan/iosm/iosm_ipc_mmio.h b/drivers/net/wwan/iosm/iosm_ipc_mmio.h index 45e6923da78f..f861994a6d90 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mmio.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mmio.h @@ -10,10 +10,10 @@ #define IOSM_CP_VERSION 0x0100UL /* DL dir Aggregation support mask */ -#define DL_AGGR BIT(23) +#define DL_AGGR BIT(9) /* UL dir Aggregation support mask */ -#define UL_AGGR BIT(22) +#define UL_AGGR BIT(8) /* UL flow credit support mask */ #define UL_FLOW_CREDIT BIT(21) diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c index 562de275797a..bdb2d32cdb6d 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.c @@ -320,7 +320,7 @@ static void ipc_mux_dl_fcth_decode(struct iosm_mux *ipc_mux, return; } - ul_credits = fct->vfl.nr_of_bytes; + ul_credits = le32_to_cpu(fct->vfl.nr_of_bytes); dev_dbg(ipc_mux->dev, "Flow_Credit:: if_id[%d] Old: %d Grants: %d", if_id, ipc_mux->session[if_id].ul_flow_credits, ul_credits); @@ -586,7 +586,7 @@ static bool ipc_mux_lite_send_qlt(struct iosm_mux *ipc_mux) qlt->reserved[0] = 0; qlt->reserved[1] = 0; - qlt->vfl.nr_of_bytes = session->ul_list.qlen; + qlt->vfl.nr_of_bytes = cpu_to_le32(session->ul_list.qlen); /* Add QLT to the transfer list. */ skb_queue_tail(&ipc_mux->channel->ul_list, diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h index 4a74e3c9457f..aae83db5cbb8 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h +++ b/drivers/net/wwan/iosm/iosm_ipc_mux_codec.h @@ -106,7 +106,7 @@ struct mux_lite_cmdh { * @nr_of_bytes: Number of bytes available to transmit in the queue. */ struct mux_lite_vfl { - u32 nr_of_bytes; + __le32 nr_of_bytes; }; /** diff --git a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c index 91109e27efd3..35d590743d3a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c +++ b/drivers/net/wwan/iosm/iosm_ipc_protocol_ops.c @@ -412,8 +412,8 @@ struct sk_buff *ipc_protocol_dl_td_process(struct iosm_protocol *ipc_protocol, } if (p_td->buffer.address != IPC_CB(skb)->mapping) { - dev_err(ipc_protocol->dev, "invalid buf=%p or skb=%p", - (void *)p_td->buffer.address, skb->data); + dev_err(ipc_protocol->dev, "invalid buf=%llx or skb=%p", + (unsigned long long)p_td->buffer.address, skb->data); ipc_pcie_kfree_skb(ipc_protocol->pcie, skb); skb = NULL; goto ret; diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index b2357ad5d517..b571d9cedba4 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -228,7 +228,7 @@ static void ipc_wwan_dellink(void *ctxt, struct net_device *dev, RCU_INIT_POINTER(ipc_wwan->sub_netlist[if_id], NULL); /* unregistering includes synchronize_net() */ - unregister_netdevice(dev); + unregister_netdevice_queue(dev, head); unlock: mutex_unlock(&ipc_wwan->if_mutex); diff --git a/drivers/net/wwan/mhi_wwan_ctrl.c b/drivers/net/wwan/mhi_wwan_ctrl.c index 1bc6b69aa530..1e18420ce404 100644 --- a/drivers/net/wwan/mhi_wwan_ctrl.c +++ b/drivers/net/wwan/mhi_wwan_ctrl.c @@ -110,7 +110,7 @@ static int mhi_wwan_ctrl_start(struct wwan_port *port) int ret; /* Start mhi device's channel(s) */ - ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev); + ret = mhi_prepare_for_transfer(mhiwwan->mhi_dev, 0); if (ret) return ret; diff --git a/drivers/net/wwan/mhi_wwan_mbim.c b/drivers/net/wwan/mhi_wwan_mbim.c new file mode 100644 index 000000000000..377529bbf124 --- /dev/null +++ b/drivers/net/wwan/mhi_wwan_mbim.c @@ -0,0 +1,658 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* MHI MBIM Network driver - Network/MBIM over MHI bus + * + * Copyright (C) 2021 Linaro Ltd <loic.poulain@linaro.org> + * + * This driver copy some code from cdc_ncm, which is: + * Copyright (C) ST-Ericsson 2010-2012 + * and cdc_mbim, which is: + * Copyright (c) 2012 Smith Micro Software, Inc. + * Copyright (c) 2012 Bjørn Mork <bjorn@mork.no> + * + */ + +#include <linux/ethtool.h> +#include <linux/if_arp.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/mhi.h> +#include <linux/mii.h> +#include <linux/mod_devicetable.h> +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/u64_stats_sync.h> +#include <linux/usb.h> +#include <linux/usb/cdc.h> +#include <linux/usb/usbnet.h> +#include <linux/usb/cdc_ncm.h> +#include <linux/wwan.h> + +/* 3500 allows to optimize skb allocation, the skbs will basically fit in + * one 4K page. Large MBIM packets will simply be split over several MHI + * transfers and chained by the MHI net layer (zerocopy). + */ +#define MHI_DEFAULT_MRU 3500 + +#define MHI_MBIM_DEFAULT_MTU 1500 +#define MHI_MAX_BUF_SZ 0xffff + +#define MBIM_NDP16_SIGN_MASK 0x00ffffff + +#define MHI_MBIM_LINK_HASH_SIZE 8 +#define LINK_HASH(session) ((session) % MHI_MBIM_LINK_HASH_SIZE) + +struct mhi_mbim_link { + struct mhi_mbim_context *mbim; + struct net_device *ndev; + unsigned int session; + + /* stats */ + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_errors; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_errors; + u64_stats_t tx_dropped; + struct u64_stats_sync tx_syncp; + struct u64_stats_sync rx_syncp; + + struct hlist_node hlnode; +}; + +struct mhi_mbim_context { + struct mhi_device *mdev; + struct sk_buff *skbagg_head; + struct sk_buff *skbagg_tail; + unsigned int mru; + u32 rx_queue_sz; + u16 rx_seq; + u16 tx_seq; + struct delayed_work rx_refill; + spinlock_t tx_lock; + struct hlist_head link_list[MHI_MBIM_LINK_HASH_SIZE]; +}; + +struct mbim_tx_hdr { + struct usb_cdc_ncm_nth16 nth16; + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16[2]; +} __packed; + +static struct mhi_mbim_link *mhi_mbim_get_link_rcu(struct mhi_mbim_context *mbim, + unsigned int session) +{ + struct mhi_mbim_link *link; + + hlist_for_each_entry_rcu(link, &mbim->link_list[LINK_HASH(session)], hlnode) { + if (link->session == session) + return link; + } + + return NULL; +} + +static struct sk_buff *mbim_tx_fixup(struct sk_buff *skb, unsigned int session, + u16 tx_seq) +{ + unsigned int dgram_size = skb->len; + struct usb_cdc_ncm_nth16 *nth16; + struct usb_cdc_ncm_ndp16 *ndp16; + struct mbim_tx_hdr *mbim_hdr; + + /* Only one NDP is sent, containing the IP packet (no aggregation) */ + + /* Ensure we have enough headroom for crafting MBIM header */ + if (skb_cow_head(skb, sizeof(struct mbim_tx_hdr))) { + dev_kfree_skb_any(skb); + return NULL; + } + + mbim_hdr = skb_push(skb, sizeof(struct mbim_tx_hdr)); + + /* Fill NTB header */ + nth16 = &mbim_hdr->nth16; + nth16->dwSignature = cpu_to_le32(USB_CDC_NCM_NTH16_SIGN); + nth16->wHeaderLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + nth16->wSequence = cpu_to_le16(tx_seq); + nth16->wBlockLength = cpu_to_le16(skb->len); + nth16->wNdpIndex = cpu_to_le16(sizeof(struct usb_cdc_ncm_nth16)); + + /* Fill the unique NDP */ + ndp16 = &mbim_hdr->ndp16; + ndp16->dwSignature = cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN | (session << 24)); + ndp16->wLength = cpu_to_le16(sizeof(struct usb_cdc_ncm_ndp16) + + sizeof(struct usb_cdc_ncm_dpe16) * 2); + ndp16->wNextNdpIndex = 0; + + /* Datagram follows the mbim header */ + ndp16->dpe16[0].wDatagramIndex = cpu_to_le16(sizeof(struct mbim_tx_hdr)); + ndp16->dpe16[0].wDatagramLength = cpu_to_le16(dgram_size); + + /* null termination */ + ndp16->dpe16[1].wDatagramIndex = 0; + ndp16->dpe16[1].wDatagramLength = 0; + + return skb; +} + +static netdev_tx_t mhi_mbim_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + struct mhi_mbim_context *mbim = link->mbim; + unsigned long flags; + int err = -ENOMEM; + + /* Serialize MHI channel queuing and MBIM seq */ + spin_lock_irqsave(&mbim->tx_lock, flags); + + skb = mbim_tx_fixup(skb, link->session, mbim->tx_seq); + if (unlikely(!skb)) + goto exit_unlock; + + err = mhi_queue_skb(mbim->mdev, DMA_TO_DEVICE, skb, skb->len, MHI_EOT); + + if (mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE)) + netif_stop_queue(ndev); + + if (!err) + mbim->tx_seq++; + +exit_unlock: + spin_unlock_irqrestore(&mbim->tx_lock, flags); + + if (unlikely(err)) { + net_err_ratelimited("%s: Failed to queue TX buf (%d)\n", + ndev->name, err); + dev_kfree_skb_any(skb); + goto exit_drop; + } + + return NETDEV_TX_OK; + +exit_drop: + u64_stats_update_begin(&link->tx_syncp); + u64_stats_inc(&link->tx_dropped); + u64_stats_update_end(&link->tx_syncp); + + return NETDEV_TX_OK; +} + +static int mbim_rx_verify_nth16(struct mhi_mbim_context *mbim, struct sk_buff *skb) +{ + struct usb_cdc_ncm_nth16 *nth16; + int len; + + if (skb->len < sizeof(struct usb_cdc_ncm_nth16) + + sizeof(struct usb_cdc_ncm_ndp16)) { + net_err_ratelimited("frame too short\n"); + return -EINVAL; + } + + nth16 = (struct usb_cdc_ncm_nth16 *)skb->data; + + if (nth16->dwSignature != cpu_to_le32(USB_CDC_NCM_NTH16_SIGN)) { + net_err_ratelimited("invalid NTH16 signature <%#010x>\n", + le32_to_cpu(nth16->dwSignature)); + return -EINVAL; + } + + /* No limit on the block length, except the size of the data pkt */ + len = le16_to_cpu(nth16->wBlockLength); + if (len > skb->len) { + net_err_ratelimited("NTB does not fit into the skb %u/%u\n", + len, skb->len); + return -EINVAL; + } + + if (mbim->rx_seq + 1 != le16_to_cpu(nth16->wSequence) && + (mbim->rx_seq || le16_to_cpu(nth16->wSequence)) && + !(mbim->rx_seq == 0xffff && !le16_to_cpu(nth16->wSequence))) { + net_err_ratelimited("sequence number glitch prev=%d curr=%d\n", + mbim->rx_seq, le16_to_cpu(nth16->wSequence)); + } + mbim->rx_seq = le16_to_cpu(nth16->wSequence); + + return le16_to_cpu(nth16->wNdpIndex); +} + +static int mbim_rx_verify_ndp16(struct sk_buff *skb, struct usb_cdc_ncm_ndp16 *ndp16) +{ + int ret; + + if (le16_to_cpu(ndp16->wLength) < USB_CDC_NCM_NDP16_LENGTH_MIN) { + net_err_ratelimited("invalid DPT16 length <%u>\n", + le16_to_cpu(ndp16->wLength)); + return -EINVAL; + } + + ret = ((le16_to_cpu(ndp16->wLength) - sizeof(struct usb_cdc_ncm_ndp16)) + / sizeof(struct usb_cdc_ncm_dpe16)); + ret--; /* Last entry is always a NULL terminator */ + + if (sizeof(struct usb_cdc_ncm_ndp16) + + ret * sizeof(struct usb_cdc_ncm_dpe16) > skb->len) { + net_err_ratelimited("Invalid nframes = %d\n", ret); + return -EINVAL; + } + + return ret; +} + +static void mhi_mbim_rx(struct mhi_mbim_context *mbim, struct sk_buff *skb) +{ + int ndpoffset; + + /* Check NTB header and retrieve first NDP offset */ + ndpoffset = mbim_rx_verify_nth16(mbim, skb); + if (ndpoffset < 0) { + net_err_ratelimited("mbim: Incorrect NTB header\n"); + goto error; + } + + /* Process each NDP */ + while (1) { + struct usb_cdc_ncm_ndp16 ndp16; + struct usb_cdc_ncm_dpe16 dpe16; + struct mhi_mbim_link *link; + int nframes, n, dpeoffset; + unsigned int session; + + if (skb_copy_bits(skb, ndpoffset, &ndp16, sizeof(ndp16))) { + net_err_ratelimited("mbim: Incorrect NDP offset (%u)\n", + ndpoffset); + goto error; + } + + /* Check NDP header and retrieve number of datagrams */ + nframes = mbim_rx_verify_ndp16(skb, &ndp16); + if (nframes < 0) { + net_err_ratelimited("mbim: Incorrect NDP16\n"); + goto error; + } + + /* Only IP data type supported, no DSS in MHI context */ + if ((ndp16.dwSignature & cpu_to_le32(MBIM_NDP16_SIGN_MASK)) + != cpu_to_le32(USB_CDC_MBIM_NDP16_IPS_SIGN)) { + net_err_ratelimited("mbim: Unsupported NDP type\n"); + goto next_ndp; + } + + session = (le32_to_cpu(ndp16.dwSignature) & ~MBIM_NDP16_SIGN_MASK) >> 24; + + rcu_read_lock(); + + link = mhi_mbim_get_link_rcu(mbim, session); + if (!link) { + net_err_ratelimited("mbim: bad packet session (%u)\n", session); + goto unlock; + } + + /* de-aggregate and deliver IP packets */ + dpeoffset = ndpoffset + sizeof(struct usb_cdc_ncm_ndp16); + for (n = 0; n < nframes; n++, dpeoffset += sizeof(dpe16)) { + u16 dgram_offset, dgram_len; + struct sk_buff *skbn; + + if (skb_copy_bits(skb, dpeoffset, &dpe16, sizeof(dpe16))) + break; + + dgram_offset = le16_to_cpu(dpe16.wDatagramIndex); + dgram_len = le16_to_cpu(dpe16.wDatagramLength); + + if (!dgram_offset || !dgram_len) + break; /* null terminator */ + + skbn = netdev_alloc_skb(link->ndev, dgram_len); + if (!skbn) + continue; + + skb_put(skbn, dgram_len); + skb_copy_bits(skb, dgram_offset, skbn->data, dgram_len); + + switch (skbn->data[0] & 0xf0) { + case 0x40: + skbn->protocol = htons(ETH_P_IP); + break; + case 0x60: + skbn->protocol = htons(ETH_P_IPV6); + break; + default: + net_err_ratelimited("%s: unknown protocol\n", + link->ndev->name); + dev_kfree_skb_any(skbn); + u64_stats_update_begin(&link->rx_syncp); + u64_stats_inc(&link->rx_errors); + u64_stats_update_end(&link->rx_syncp); + continue; + } + + u64_stats_update_begin(&link->rx_syncp); + u64_stats_inc(&link->rx_packets); + u64_stats_add(&link->rx_bytes, skbn->len); + u64_stats_update_end(&link->rx_syncp); + + netif_rx(skbn); + } +unlock: + rcu_read_unlock(); +next_ndp: + /* Other NDP to process? */ + ndpoffset = (int)le16_to_cpu(ndp16.wNextNdpIndex); + if (!ndpoffset) + break; + } + + /* free skb */ + dev_consume_skb_any(skb); + return; +error: + dev_kfree_skb_any(skb); +} + +static struct sk_buff *mhi_net_skb_agg(struct mhi_mbim_context *mbim, + struct sk_buff *skb) +{ + struct sk_buff *head = mbim->skbagg_head; + struct sk_buff *tail = mbim->skbagg_tail; + + /* This is non-paged skb chaining using frag_list */ + if (!head) { + mbim->skbagg_head = skb; + return skb; + } + + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = skb; + else + tail->next = skb; + + head->len += skb->len; + head->data_len += skb->len; + head->truesize += skb->truesize; + + mbim->skbagg_tail = skb; + + return mbim->skbagg_head; +} + +static void mhi_net_rx_refill_work(struct work_struct *work) +{ + struct mhi_mbim_context *mbim = container_of(work, struct mhi_mbim_context, + rx_refill.work); + struct mhi_device *mdev = mbim->mdev; + int err; + + while (!mhi_queue_is_full(mdev, DMA_FROM_DEVICE)) { + struct sk_buff *skb = alloc_skb(MHI_DEFAULT_MRU, GFP_KERNEL); + + if (unlikely(!skb)) + break; + + err = mhi_queue_skb(mdev, DMA_FROM_DEVICE, skb, + MHI_DEFAULT_MRU, MHI_EOT); + if (unlikely(err)) { + kfree_skb(skb); + break; + } + + /* Do not hog the CPU if rx buffers are consumed faster than + * queued (unlikely). + */ + cond_resched(); + } + + /* If we're still starved of rx buffers, reschedule later */ + if (mhi_get_free_desc_count(mdev, DMA_FROM_DEVICE) == mbim->rx_queue_sz) + schedule_delayed_work(&mbim->rx_refill, HZ / 2); +} + +static void mhi_mbim_dl_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + int free_desc_count; + + free_desc_count = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + + if (unlikely(mhi_res->transaction_status)) { + switch (mhi_res->transaction_status) { + case -EOVERFLOW: + /* Packet has been split over multiple transfers */ + skb_put(skb, mhi_res->bytes_xferd); + mhi_net_skb_agg(mbim, skb); + break; + case -ENOTCONN: + /* MHI layer stopping/resetting the DL channel */ + dev_kfree_skb_any(skb); + return; + default: + /* Unknown error, simply drop */ + dev_kfree_skb_any(skb); + } + } else { + skb_put(skb, mhi_res->bytes_xferd); + + if (mbim->skbagg_head) { + /* Aggregate the final fragment */ + skb = mhi_net_skb_agg(mbim, skb); + mbim->skbagg_head = NULL; + } + + mhi_mbim_rx(mbim, skb); + } + + /* Refill if RX buffers queue becomes low */ + if (free_desc_count >= mbim->rx_queue_sz / 2) + schedule_delayed_work(&mbim->rx_refill, 0); +} + +static void mhi_mbim_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(&link->rx_syncp); + stats->rx_packets = u64_stats_read(&link->rx_packets); + stats->rx_bytes = u64_stats_read(&link->rx_bytes); + stats->rx_errors = u64_stats_read(&link->rx_errors); + } while (u64_stats_fetch_retry_irq(&link->rx_syncp, start)); + + do { + start = u64_stats_fetch_begin_irq(&link->tx_syncp); + stats->tx_packets = u64_stats_read(&link->tx_packets); + stats->tx_bytes = u64_stats_read(&link->tx_bytes); + stats->tx_errors = u64_stats_read(&link->tx_errors); + stats->tx_dropped = u64_stats_read(&link->tx_dropped); + } while (u64_stats_fetch_retry_irq(&link->tx_syncp, start)); +} + +static void mhi_mbim_ul_callback(struct mhi_device *mhi_dev, + struct mhi_result *mhi_res) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct sk_buff *skb = mhi_res->buf_addr; + struct net_device *ndev = skb->dev; + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + /* Hardware has consumed the buffer, so free the skb (which is not + * freed by the MHI stack) and perform accounting. + */ + dev_consume_skb_any(skb); + + u64_stats_update_begin(&link->tx_syncp); + if (unlikely(mhi_res->transaction_status)) { + /* MHI layer stopping/resetting the UL channel */ + if (mhi_res->transaction_status == -ENOTCONN) { + u64_stats_update_end(&link->tx_syncp); + return; + } + + u64_stats_inc(&link->tx_errors); + } else { + u64_stats_inc(&link->tx_packets); + u64_stats_add(&link->tx_bytes, mhi_res->bytes_xferd); + } + u64_stats_update_end(&link->tx_syncp); + + if (netif_queue_stopped(ndev) && !mhi_queue_is_full(mbim->mdev, DMA_TO_DEVICE)) + netif_wake_queue(ndev); +} + +static int mhi_mbim_ndo_open(struct net_device *ndev) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + /* Feed the MHI rx buffer pool */ + schedule_delayed_work(&link->mbim->rx_refill, 0); + + /* Carrier is established via out-of-band channel (e.g. qmi) */ + netif_carrier_on(ndev); + + netif_start_queue(ndev); + + return 0; +} + +static int mhi_mbim_ndo_stop(struct net_device *ndev) +{ + netif_stop_queue(ndev); + netif_carrier_off(ndev); + + return 0; +} + +static const struct net_device_ops mhi_mbim_ndo = { + .ndo_open = mhi_mbim_ndo_open, + .ndo_stop = mhi_mbim_ndo_stop, + .ndo_start_xmit = mhi_mbim_ndo_xmit, + .ndo_get_stats64 = mhi_mbim_ndo_get_stats64, +}; + +static int mhi_mbim_newlink(void *ctxt, struct net_device *ndev, u32 if_id, + struct netlink_ext_ack *extack) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + struct mhi_mbim_context *mbim = ctxt; + + link->session = if_id; + link->mbim = mbim; + link->ndev = ndev; + u64_stats_init(&link->rx_syncp); + u64_stats_init(&link->tx_syncp); + + rcu_read_lock(); + if (mhi_mbim_get_link_rcu(mbim, if_id)) { + rcu_read_unlock(); + return -EEXIST; + } + rcu_read_unlock(); + + /* Already protected by RTNL lock */ + hlist_add_head_rcu(&link->hlnode, &mbim->link_list[LINK_HASH(if_id)]); + + return register_netdevice(ndev); +} + +static void mhi_mbim_dellink(void *ctxt, struct net_device *ndev, + struct list_head *head) +{ + struct mhi_mbim_link *link = wwan_netdev_drvpriv(ndev); + + hlist_del_init_rcu(&link->hlnode); + synchronize_rcu(); + + unregister_netdevice_queue(ndev, head); +} + +static void mhi_mbim_setup(struct net_device *ndev) +{ + ndev->header_ops = NULL; /* No header */ + ndev->type = ARPHRD_RAWIP; + ndev->needed_headroom = sizeof(struct mbim_tx_hdr); + ndev->hard_header_len = 0; + ndev->addr_len = 0; + ndev->flags = IFF_POINTOPOINT | IFF_NOARP; + ndev->netdev_ops = &mhi_mbim_ndo; + ndev->mtu = MHI_MBIM_DEFAULT_MTU; + ndev->min_mtu = ETH_MIN_MTU; + ndev->max_mtu = MHI_MAX_BUF_SZ - ndev->needed_headroom; + ndev->tx_queue_len = 1000; +} + +static const struct wwan_ops mhi_mbim_wwan_ops = { + .priv_size = sizeof(struct mhi_mbim_link), + .setup = mhi_mbim_setup, + .newlink = mhi_mbim_newlink, + .dellink = mhi_mbim_dellink, +}; + +static int mhi_mbim_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id) +{ + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + struct mhi_mbim_context *mbim; + int err; + + mbim = devm_kzalloc(&mhi_dev->dev, sizeof(*mbim), GFP_KERNEL); + if (!mbim) + return -ENOMEM; + + spin_lock_init(&mbim->tx_lock); + dev_set_drvdata(&mhi_dev->dev, mbim); + mbim->mdev = mhi_dev; + mbim->mru = mhi_dev->mhi_cntrl->mru ? mhi_dev->mhi_cntrl->mru : MHI_DEFAULT_MRU; + + INIT_DELAYED_WORK(&mbim->rx_refill, mhi_net_rx_refill_work); + + /* Start MHI channels */ + err = mhi_prepare_for_transfer(mhi_dev, 0); + if (err) + return err; + + /* Number of transfer descriptors determines size of the queue */ + mbim->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE); + + /* Register wwan link ops with MHI controller representing WWAN instance */ + return wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_mbim_wwan_ops, mbim, 0); +} + +static void mhi_mbim_remove(struct mhi_device *mhi_dev) +{ + struct mhi_mbim_context *mbim = dev_get_drvdata(&mhi_dev->dev); + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + + mhi_unprepare_from_transfer(mhi_dev); + cancel_delayed_work_sync(&mbim->rx_refill); + wwan_unregister_ops(&cntrl->mhi_dev->dev); + kfree_skb(mbim->skbagg_head); + dev_set_drvdata(&mhi_dev->dev, NULL); +} + +static const struct mhi_device_id mhi_mbim_id_table[] = { + /* Hardware accelerated data PATH (to modem IPA), MBIM protocol */ + { .chan = "IP_HW0_MBIM", .driver_data = 0 }, + {} +}; +MODULE_DEVICE_TABLE(mhi, mhi_mbim_id_table); + +static struct mhi_driver mhi_mbim_driver = { + .probe = mhi_mbim_probe, + .remove = mhi_mbim_remove, + .dl_xfer_cb = mhi_mbim_dl_callback, + .ul_xfer_cb = mhi_mbim_ul_callback, + .id_table = mhi_mbim_id_table, + .driver = { + .name = "mhi_wwan_mbim", + .owner = THIS_MODULE, + }, +}; + +module_mhi_driver(mhi_mbim_driver); + +MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>"); +MODULE_DESCRIPTION("Network/MBIM over MHI"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/nfc/nfcsim.c b/drivers/nfc/nfcsim.c index 15754671eb4d..85bf8d586c70 100644 --- a/drivers/nfc/nfcsim.c +++ b/drivers/nfc/nfcsim.c @@ -192,8 +192,7 @@ static void nfcsim_recv_wq(struct work_struct *work) if (!IS_ERR(skb)) dev_kfree_skb(skb); - - skb = ERR_PTR(-ENODEV); + return; } dev->cb(dev->nfc_digital_dev, dev->arg, skb); diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c index 1421ffd46d9a..1af7a1e632cf 100644 --- a/drivers/nfc/s3fwrn5/firmware.c +++ b/drivers/nfc/s3fwrn5/firmware.c @@ -422,7 +422,7 @@ int s3fwrn5_fw_download(struct s3fwrn5_fw_info *fw_info) tfm = crypto_alloc_shash("sha1", 0, 0); if (IS_ERR(tfm)) { dev_err(&fw_info->ndev->nfc_dev->dev, - "Cannot allocate shash (code=%d)\n", ret); + "Cannot allocate shash (code=%pe)\n", tfm); return PTR_ERR(tfm); } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 11779be42186..dfd9dec0c1f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -900,7 +900,10 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); - cmnd->write_zeroes.control = 0; + if (nvme_ns_has_pi(ns)) + cmnd->write_zeroes.control = cpu_to_le16(NVME_RW_PRINFO_PRACT); + else + cmnd->write_zeroes.control = 0; return BLK_STS_OK; } @@ -3807,6 +3810,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid, static void nvme_ns_remove(struct nvme_ns *ns) { + bool last_path = false; + if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) return; @@ -3815,8 +3820,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); - if (list_empty(&ns->head->list)) - list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); synchronize_rcu(); /* guarantee not available in head->list */ @@ -3836,7 +3839,15 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - nvme_mpath_check_last_path(ns); + /* Synchronize with nvme_init_ns_head() */ + mutex_lock(&ns->head->subsys->lock); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } + mutex_unlock(&ns->head->subsys->lock); + if (last_path) + nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0ea5298469c3..3f32c5e86bfc 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -760,14 +760,21 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) #endif } -void nvme_mpath_remove_disk(struct nvme_ns_head *head) +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { if (!head->disk) return; + kblockd_schedule_work(&head->requeue_work); if (head->disk->flags & GENHD_FL_UP) { nvme_cdev_del(&head->cdev, &head->cdev_device); del_gendisk(head->disk); } +} + +void nvme_mpath_remove_disk(struct nvme_ns_head *head) +{ + if (!head->disk) + return; blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 18ef8dd03a90..5cd1fa3b8464 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -716,14 +716,7 @@ void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); - -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) -{ - struct nvme_ns_head *head = ns->head; - - if (head->disk && list_empty(&head->list)) - kblockd_schedule_work(&head->requeue_work); -} +void nvme_mpath_shutdown_disk(struct nvme_ns_head *head); static inline void nvme_trace_bio_complete(struct request *req) { @@ -772,7 +765,7 @@ static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { } -static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) +static inline void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { } static inline void nvme_trace_bio_complete(struct request *req) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 320051f5a3dd..51852085239e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2631,7 +2631,9 @@ static void nvme_reset_work(struct work_struct *work) bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); int result; - if (WARN_ON(dev->ctrl.state != NVME_CTRL_RESETTING)) { + if (dev->ctrl.state != NVME_CTRL_RESETTING) { + dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", + dev->ctrl.state); result = -ENODEV; goto out; } diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index daaf700eae79..35bac7a25422 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -56,7 +56,7 @@ TRACE_EVENT(nvme_setup_cmd, __field(u8, fctype) __field(u16, cid) __field(u32, nsid) - __field(u64, metadata) + __field(bool, metadata) __array(u8, cdw10, 24) ), TP_fast_assign( @@ -66,13 +66,13 @@ TRACE_EVENT(nvme_setup_cmd, __entry->flags = cmd->common.flags; __entry->cid = cmd->common.command_id; __entry->nsid = le32_to_cpu(cmd->common.nsid); - __entry->metadata = le64_to_cpu(cmd->common.metadata); + __entry->metadata = !!blk_integrity_rq(req); __entry->fctype = cmd->fabrics.fctype; __assign_disk_name(__entry->disk, req->rq_disk); memcpy(__entry->cdw10, &cmd->common.cdw10, sizeof(__entry->cdw10)); ), - TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%llx, cmd=(%s %s)", + TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)", __entry->ctrl_id, __print_disk_name(__entry->disk), __entry->qid, __entry->cid, __entry->nsid, __entry->flags, __entry->metadata, diff --git a/drivers/pcmcia/i82092.c b/drivers/pcmcia/i82092.c index 85887d885b5f..192c9049d654 100644 --- a/drivers/pcmcia/i82092.c +++ b/drivers/pcmcia/i82092.c @@ -112,6 +112,7 @@ static int i82092aa_pci_probe(struct pci_dev *dev, for (i = 0; i < socket_count; i++) { sockets[i].card_state = 1; /* 1 = present but empty */ sockets[i].io_base = pci_resource_start(dev, 0); + sockets[i].dev = dev; sockets[i].socket.features |= SS_CAP_PCCARD; sockets[i].socket.map_size = 0x1000; sockets[i].socket.irq_mask = 0; diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index b9da58ee9b1e..3481479a2942 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -46,34 +46,79 @@ #define AMD_PMC_RESULT_CMD_UNKNOWN 0xFE #define AMD_PMC_RESULT_FAILED 0xFF +/* FCH SSC Registers */ +#define FCH_S0I3_ENTRY_TIME_L_OFFSET 0x30 +#define FCH_S0I3_ENTRY_TIME_H_OFFSET 0x34 +#define FCH_S0I3_EXIT_TIME_L_OFFSET 0x38 +#define FCH_S0I3_EXIT_TIME_H_OFFSET 0x3C +#define FCH_SSC_MAPPING_SIZE 0x800 +#define FCH_BASE_PHY_ADDR_LOW 0xFED81100 +#define FCH_BASE_PHY_ADDR_HIGH 0x00000000 + +/* SMU Message Definations */ +#define SMU_MSG_GETSMUVERSION 0x02 +#define SMU_MSG_LOG_GETDRAM_ADDR_HI 0x04 +#define SMU_MSG_LOG_GETDRAM_ADDR_LO 0x05 +#define SMU_MSG_LOG_START 0x06 +#define SMU_MSG_LOG_RESET 0x07 +#define SMU_MSG_LOG_DUMP_DATA 0x08 +#define SMU_MSG_GET_SUP_CONSTRAINTS 0x09 /* List of supported CPU ids */ #define AMD_CPU_ID_RV 0x15D0 #define AMD_CPU_ID_RN 0x1630 #define AMD_CPU_ID_PCO AMD_CPU_ID_RV #define AMD_CPU_ID_CZN AMD_CPU_ID_RN +#define AMD_CPU_ID_YC 0x14B5 -#define AMD_SMU_FW_VERSION 0x0 #define PMC_MSG_DELAY_MIN_US 100 #define RESPONSE_REGISTER_LOOP_MAX 200 +#define SOC_SUBSYSTEM_IP_MAX 12 +#define DELAY_MIN_US 2000 +#define DELAY_MAX_US 3000 enum amd_pmc_def { MSG_TEST = 0x01, MSG_OS_HINT_PCO, MSG_OS_HINT_RN, }; +struct amd_pmc_bit_map { + const char *name; + u32 bit_mask; +}; + +static const struct amd_pmc_bit_map soc15_ip_blk[] = { + {"DISPLAY", BIT(0)}, + {"CPU", BIT(1)}, + {"GFX", BIT(2)}, + {"VDD", BIT(3)}, + {"ACP", BIT(4)}, + {"VCN", BIT(5)}, + {"ISP", BIT(6)}, + {"NBIO", BIT(7)}, + {"DF", BIT(8)}, + {"USB0", BIT(9)}, + {"USB1", BIT(10)}, + {"LAPIC", BIT(11)}, + {} +}; + struct amd_pmc_dev { void __iomem *regbase; - void __iomem *smu_base; + void __iomem *smu_virt_addr; + void __iomem *fch_virt_addr; u32 base_addr; u32 cpu_id; + u32 active_ips; struct device *dev; + struct mutex lock; /* generic mutex lock */ #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbgfs_dir; #endif /* CONFIG_DEBUG_FS */ }; static struct amd_pmc_dev pmc; +static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret); static inline u32 amd_pmc_reg_read(struct amd_pmc_dev *dev, int reg_offset) { @@ -85,18 +130,77 @@ static inline void amd_pmc_reg_write(struct amd_pmc_dev *dev, int reg_offset, u3 iowrite32(val, dev->regbase + reg_offset); } +struct smu_metrics { + u32 table_version; + u32 hint_count; + u32 s0i3_cyclecount; + u32 timein_s0i2; + u64 timeentering_s0i3_lastcapture; + u64 timeentering_s0i3_totaltime; + u64 timeto_resume_to_os_lastcapture; + u64 timeto_resume_to_os_totaltime; + u64 timein_s0i3_lastcapture; + u64 timein_s0i3_totaltime; + u64 timein_swdrips_lastcapture; + u64 timein_swdrips_totaltime; + u64 timecondition_notmet_lastcapture[SOC_SUBSYSTEM_IP_MAX]; + u64 timecondition_notmet_totaltime[SOC_SUBSYSTEM_IP_MAX]; +} __packed; + #ifdef CONFIG_DEBUG_FS static int smu_fw_info_show(struct seq_file *s, void *unused) { struct amd_pmc_dev *dev = s->private; - u32 value; + struct smu_metrics table; + int idx; + + if (dev->cpu_id == AMD_CPU_ID_PCO) + return -EINVAL; + + memcpy_fromio(&table, dev->smu_virt_addr, sizeof(struct smu_metrics)); + + seq_puts(s, "\n=== SMU Statistics ===\n"); + seq_printf(s, "Table Version: %d\n", table.table_version); + seq_printf(s, "Hint Count: %d\n", table.hint_count); + seq_printf(s, "S0i3 Cycle Count: %d\n", table.s0i3_cyclecount); + seq_printf(s, "Time (in us) to S0i3: %lld\n", table.timeentering_s0i3_lastcapture); + seq_printf(s, "Time (in us) in S0i3: %lld\n", table.timein_s0i3_lastcapture); + + seq_puts(s, "\n=== Active time (in us) ===\n"); + for (idx = 0 ; idx < SOC_SUBSYSTEM_IP_MAX ; idx++) { + if (soc15_ip_blk[idx].bit_mask & dev->active_ips) + seq_printf(s, "%-8s : %lld\n", soc15_ip_blk[idx].name, + table.timecondition_notmet_lastcapture[idx]); + } - value = ioread32(dev->smu_base + AMD_SMU_FW_VERSION); - seq_printf(s, "SMU FW Info: %x\n", value); return 0; } DEFINE_SHOW_ATTRIBUTE(smu_fw_info); +static int s0ix_stats_show(struct seq_file *s, void *unused) +{ + struct amd_pmc_dev *dev = s->private; + u64 entry_time, exit_time, residency; + + entry_time = ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_H_OFFSET); + entry_time = entry_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_ENTRY_TIME_L_OFFSET); + + exit_time = ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_H_OFFSET); + exit_time = exit_time << 32 | ioread32(dev->fch_virt_addr + FCH_S0I3_EXIT_TIME_L_OFFSET); + + /* It's in 48MHz. We need to convert it */ + residency = exit_time - entry_time; + do_div(residency, 48); + + seq_puts(s, "=== S0ix statistics ===\n"); + seq_printf(s, "S0ix Entry Time: %lld\n", entry_time); + seq_printf(s, "S0ix Exit Time: %lld\n", exit_time); + seq_printf(s, "Residency Time: %lld\n", residency); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(s0ix_stats); + static void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) { debugfs_remove_recursive(dev->dbgfs_dir); @@ -107,6 +211,8 @@ static void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) dev->dbgfs_dir = debugfs_create_dir("amd_pmc", NULL); debugfs_create_file("smu_fw_info", 0644, dev->dbgfs_dir, dev, &smu_fw_info_fops); + debugfs_create_file("s0ix_stats", 0644, dev->dbgfs_dir, dev, + &s0ix_stats_fops); } #else static inline void amd_pmc_dbgfs_register(struct amd_pmc_dev *dev) @@ -118,6 +224,32 @@ static inline void amd_pmc_dbgfs_unregister(struct amd_pmc_dev *dev) } #endif /* CONFIG_DEBUG_FS */ +static int amd_pmc_setup_smu_logging(struct amd_pmc_dev *dev) +{ + u32 phys_addr_low, phys_addr_hi; + u64 smu_phys_addr; + + if (dev->cpu_id == AMD_CPU_ID_PCO) + return -EINVAL; + + /* Get Active devices list from SMU */ + amd_pmc_send_cmd(dev, 0, &dev->active_ips, SMU_MSG_GET_SUP_CONSTRAINTS, 1); + + /* Get dram address */ + amd_pmc_send_cmd(dev, 0, &phys_addr_low, SMU_MSG_LOG_GETDRAM_ADDR_LO, 1); + amd_pmc_send_cmd(dev, 0, &phys_addr_hi, SMU_MSG_LOG_GETDRAM_ADDR_HI, 1); + smu_phys_addr = ((u64)phys_addr_hi << 32 | phys_addr_low); + + dev->smu_virt_addr = devm_ioremap(dev->dev, smu_phys_addr, sizeof(struct smu_metrics)); + if (!dev->smu_virt_addr) + return -ENOMEM; + + /* Start the logging */ + amd_pmc_send_cmd(dev, 0, NULL, SMU_MSG_LOG_START, 0); + + return 0; +} + static void amd_pmc_dump_registers(struct amd_pmc_dev *dev) { u32 value; @@ -132,19 +264,19 @@ static void amd_pmc_dump_registers(struct amd_pmc_dev *dev) dev_dbg(dev->dev, "AMD_PMC_REGISTER_MESSAGE:%x\n", value); } -static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) +static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set, u32 *data, u8 msg, bool ret) { int rc; - u8 msg; u32 val; + mutex_lock(&dev->lock); /* Wait until we get a valid response */ rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, - val, val > 0, PMC_MSG_DELAY_MIN_US, + val, val != 0, PMC_MSG_DELAY_MIN_US, PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); if (rc) { dev_err(dev->dev, "failed to talk to SMU\n"); - return rc; + goto out_unlock; } /* Write zero to response register */ @@ -154,34 +286,91 @@ static int amd_pmc_send_cmd(struct amd_pmc_dev *dev, bool set) amd_pmc_reg_write(dev, AMD_PMC_REGISTER_ARGUMENT, set); /* Write message ID to message ID register */ - msg = (dev->cpu_id == AMD_CPU_ID_RN) ? MSG_OS_HINT_RN : MSG_OS_HINT_PCO; amd_pmc_reg_write(dev, AMD_PMC_REGISTER_MESSAGE, msg); - return 0; + + /* Wait until we get a valid response */ + rc = readx_poll_timeout(ioread32, dev->regbase + AMD_PMC_REGISTER_RESPONSE, + val, val != 0, PMC_MSG_DELAY_MIN_US, + PMC_MSG_DELAY_MIN_US * RESPONSE_REGISTER_LOOP_MAX); + if (rc) { + dev_err(dev->dev, "SMU response timed out\n"); + goto out_unlock; + } + + switch (val) { + case AMD_PMC_RESULT_OK: + if (ret) { + /* PMFW may take longer time to return back the data */ + usleep_range(DELAY_MIN_US, 10 * DELAY_MAX_US); + *data = amd_pmc_reg_read(dev, AMD_PMC_REGISTER_ARGUMENT); + } + break; + case AMD_PMC_RESULT_CMD_REJECT_BUSY: + dev_err(dev->dev, "SMU not ready. err: 0x%x\n", val); + rc = -EBUSY; + goto out_unlock; + case AMD_PMC_RESULT_CMD_UNKNOWN: + dev_err(dev->dev, "SMU cmd unknown. err: 0x%x\n", val); + rc = -EINVAL; + goto out_unlock; + case AMD_PMC_RESULT_CMD_REJECT_PREREQ: + case AMD_PMC_RESULT_FAILED: + default: + dev_err(dev->dev, "SMU cmd failed. err: 0x%x\n", val); + rc = -EIO; + goto out_unlock; + } + +out_unlock: + mutex_unlock(&dev->lock); + amd_pmc_dump_registers(dev); + return rc; +} + +static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) +{ + switch (dev->cpu_id) { + case AMD_CPU_ID_PCO: + return MSG_OS_HINT_PCO; + case AMD_CPU_ID_RN: + case AMD_CPU_ID_YC: + return MSG_OS_HINT_RN; + } + return -EINVAL; } static int __maybe_unused amd_pmc_suspend(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); int rc; + u8 msg; + + /* Reset and Start SMU logging - to monitor the s0i3 stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_RESET, 0); + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_START, 0); - rc = amd_pmc_send_cmd(pdev, 1); + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 1, NULL, msg, 0); if (rc) dev_err(pdev->dev, "suspend failed\n"); - amd_pmc_dump_registers(pdev); - return 0; + return rc; } static int __maybe_unused amd_pmc_resume(struct device *dev) { struct amd_pmc_dev *pdev = dev_get_drvdata(dev); int rc; + u8 msg; + + /* Let SMU know that we are looking for stats */ + amd_pmc_send_cmd(pdev, 0, NULL, SMU_MSG_LOG_DUMP_DATA, 0); - rc = amd_pmc_send_cmd(pdev, 0); + msg = amd_pmc_get_os_hint(pdev); + rc = amd_pmc_send_cmd(pdev, 0, NULL, msg, 0); if (rc) dev_err(pdev->dev, "resume failed\n"); - amd_pmc_dump_registers(pdev); return 0; } @@ -190,6 +379,7 @@ static const struct dev_pm_ops amd_pmc_pm_ops = { }; static const struct pci_device_id pmc_pci_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PCO) }, @@ -201,9 +391,8 @@ static int amd_pmc_probe(struct platform_device *pdev) { struct amd_pmc_dev *dev = &pmc; struct pci_dev *rdev; - u32 base_addr_lo; - u32 base_addr_hi; - u64 base_addr; + u32 base_addr_lo, base_addr_hi; + u64 base_addr, fch_phys_addr; int err; u32 val; @@ -248,16 +437,25 @@ static int amd_pmc_probe(struct platform_device *pdev) pci_dev_put(rdev); base_addr = ((u64)base_addr_hi << 32 | base_addr_lo); - dev->smu_base = devm_ioremap(dev->dev, base_addr, AMD_PMC_MAPPING_SIZE); - if (!dev->smu_base) - return -ENOMEM; - dev->regbase = devm_ioremap(dev->dev, base_addr + AMD_PMC_BASE_ADDR_OFFSET, AMD_PMC_MAPPING_SIZE); if (!dev->regbase) return -ENOMEM; - amd_pmc_dump_registers(dev); + mutex_init(&dev->lock); + + /* Use FCH registers to get the S0ix stats */ + base_addr_lo = FCH_BASE_PHY_ADDR_LOW; + base_addr_hi = FCH_BASE_PHY_ADDR_HIGH; + fch_phys_addr = ((u64)base_addr_hi << 32 | base_addr_lo); + dev->fch_virt_addr = devm_ioremap(dev->dev, fch_phys_addr, FCH_SSC_MAPPING_SIZE); + if (!dev->fch_virt_addr) + return -ENOMEM; + + /* Use SMU to get the s0i3 debug stats */ + err = amd_pmc_setup_smu_logging(dev); + if (err) + dev_err(dev->dev, "SMU debugging info not supported on this platform\n"); platform_set_drvdata(pdev, dev); amd_pmc_dbgfs_register(dev); @@ -269,11 +467,14 @@ static int amd_pmc_remove(struct platform_device *pdev) struct amd_pmc_dev *dev = platform_get_drvdata(pdev); amd_pmc_dbgfs_unregister(dev); + mutex_destroy(&dev->lock); return 0; } static const struct acpi_device_id amd_pmc_acpi_ids[] = { {"AMDI0005", 0}, + {"AMDI0006", 0}, + {"AMDI0007", 0}, {"AMD0004", 0}, { } }; diff --git a/drivers/platform/x86/gigabyte-wmi.c b/drivers/platform/x86/gigabyte-wmi.c index 5529d7b0abea..fbb224a82e34 100644 --- a/drivers/platform/x86/gigabyte-wmi.c +++ b/drivers/platform/x86/gigabyte-wmi.c @@ -141,6 +141,7 @@ static u8 gigabyte_wmi_detect_sensor_usability(struct wmi_device *wdev) static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = { DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"), + DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 GAMING X V2"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"), DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"), diff --git a/drivers/platform/x86/intel-hid.c b/drivers/platform/x86/intel-hid.c index 078648a9201b..e5fbe017f8e1 100644 --- a/drivers/platform/x86/intel-hid.c +++ b/drivers/platform/x86/intel-hid.c @@ -25,6 +25,7 @@ static const struct acpi_device_id intel_hid_ids[] = { {"INT33D5", 0}, {"INTC1051", 0}, {"INTC1054", 0}, + {"INTC1070", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, intel_hid_ids); diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c index 3671b5d20613..6cfed4427fb0 100644 --- a/drivers/platform/x86/think-lmi.c +++ b/drivers/platform/x86/think-lmi.c @@ -571,6 +571,11 @@ static ssize_t current_value_store(struct kobject *kobj, else ret = tlmi_save_bios_settings(""); + if (!ret && !tlmi_priv.pending_changes) { + tlmi_priv.pending_changes = true; + /* let userland know it may need to check reboot pending again */ + kobject_uevent(&tlmi_priv.class_dev->kobj, KOBJ_CHANGE); + } out: kfree(auth_str); kfree(set_str); @@ -647,6 +652,14 @@ static struct kobj_type tlmi_pwd_setting_ktype = { .sysfs_ops = &tlmi_kobj_sysfs_ops, }; +static ssize_t pending_reboot_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", tlmi_priv.pending_changes); +} + +static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot); + /* ---- Initialisation --------------------------------------------------------- */ static void tlmi_release_attr(void) { @@ -659,6 +672,7 @@ static void tlmi_release_attr(void) kobject_put(&tlmi_priv.setting[i]->kobj); } } + sysfs_remove_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); kset_unregister(tlmi_priv.attribute_kset); /* Authentication structures */ @@ -709,8 +723,8 @@ static int tlmi_sysfs_init(void) /* Build attribute */ tlmi_priv.setting[i]->kobj.kset = tlmi_priv.attribute_kset; - ret = kobject_init_and_add(&tlmi_priv.setting[i]->kobj, &tlmi_attr_setting_ktype, - NULL, "%s", tlmi_priv.setting[i]->display_name); + ret = kobject_add(&tlmi_priv.setting[i]->kobj, NULL, + "%s", tlmi_priv.setting[i]->display_name); if (ret) goto fail_create_attr; @@ -719,6 +733,10 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; } + ret = sysfs_create_file(&tlmi_priv.attribute_kset->kobj, &pending_reboot.attr); + if (ret) + goto fail_create_attr; + /* Create authentication entries */ tlmi_priv.authentication_kset = kset_create_and_add("authentication", NULL, &tlmi_priv.class_dev->kobj); @@ -727,8 +745,7 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; } tlmi_priv.pwd_admin->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_init_and_add(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype, - NULL, "%s", "Admin"); + ret = kobject_add(&tlmi_priv.pwd_admin->kobj, NULL, "%s", "Admin"); if (ret) goto fail_create_attr; @@ -737,8 +754,7 @@ static int tlmi_sysfs_init(void) goto fail_create_attr; tlmi_priv.pwd_power->kobj.kset = tlmi_priv.authentication_kset; - ret = kobject_init_and_add(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype, - NULL, "%s", "System"); + ret = kobject_add(&tlmi_priv.pwd_power->kobj, NULL, "%s", "System"); if (ret) goto fail_create_attr; @@ -818,6 +834,7 @@ static int tlmi_analyze(void) pr_info("Error retrieving possible values for %d : %s\n", i, setting->display_name); } + kobject_init(&setting->kobj, &tlmi_attr_setting_ktype); tlmi_priv.setting[i] = setting; tlmi_priv.settings_count++; kfree(item); @@ -844,10 +861,12 @@ static int tlmi_analyze(void) if (pwdcfg.password_state & TLMI_PAP_PWD) tlmi_priv.pwd_admin->valid = true; + kobject_init(&tlmi_priv.pwd_admin->kobj, &tlmi_pwd_setting_ktype); + tlmi_priv.pwd_power = kzalloc(sizeof(struct tlmi_pwd_setting), GFP_KERNEL); if (!tlmi_priv.pwd_power) { ret = -ENOMEM; - goto fail_clear_attr; + goto fail_free_pwd_admin; } strscpy(tlmi_priv.pwd_power->kbdlang, "us", TLMI_LANG_MAXLEN); tlmi_priv.pwd_power->encoding = TLMI_ENCODING_ASCII; @@ -859,11 +878,19 @@ static int tlmi_analyze(void) if (pwdcfg.password_state & TLMI_POP_PWD) tlmi_priv.pwd_power->valid = true; + kobject_init(&tlmi_priv.pwd_power->kobj, &tlmi_pwd_setting_ktype); + return 0; +fail_free_pwd_admin: + kfree(tlmi_priv.pwd_admin); fail_clear_attr: - for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) - kfree(tlmi_priv.setting[i]); + for (i = 0; i < TLMI_SETTINGS_COUNT; ++i) { + if (tlmi_priv.setting[i]) { + kfree(tlmi_priv.setting[i]->possible_values); + kfree(tlmi_priv.setting[i]); + } + } return ret; } diff --git a/drivers/platform/x86/think-lmi.h b/drivers/platform/x86/think-lmi.h index 6fa8da7af6c7..eb598846628a 100644 --- a/drivers/platform/x86/think-lmi.h +++ b/drivers/platform/x86/think-lmi.h @@ -60,6 +60,7 @@ struct think_lmi { bool can_get_bios_selections; bool can_set_bios_password; bool can_get_password_settings; + bool pending_changes; struct tlmi_attr_setting *setting[TLMI_SETTINGS_COUNT]; struct device *class_dev; diff --git a/drivers/platform/x86/wireless-hotkey.c b/drivers/platform/x86/wireless-hotkey.c index b010e4ca3383..11c60a273446 100644 --- a/drivers/platform/x86/wireless-hotkey.c +++ b/drivers/platform/x86/wireless-hotkey.c @@ -78,7 +78,7 @@ static int wl_add(struct acpi_device *device) err = wireless_input_setup(); if (err) - pr_err("Failed to setup hp wireless hotkeys\n"); + pr_err("Failed to setup wireless hotkeys\n"); return err; } diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig index 8c20e524e9ad..8b08745e1ca1 100644 --- a/drivers/ptp/Kconfig +++ b/drivers/ptp/Kconfig @@ -157,6 +157,13 @@ config PTP_1588_CLOCK_OCP tristate "OpenCompute TimeCard as PTP clock" depends on PTP_1588_CLOCK depends on HAS_IOMEM && PCI + depends on SPI && I2C && MTD + imply SPI_MEM + imply SPI_XILINX + imply MTD_SPI_NOR + imply I2C_XILINX + select SERIAL_8250 + default n help This driver adds support for an OpenCompute time card. diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c index 0d1034e3ed0f..92edf772feed 100644 --- a/drivers/ptp/ptp_ocp.c +++ b/drivers/ptp/ptp_ocp.c @@ -6,15 +6,29 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/pci.h> +#include <linux/serial_8250.h> +#include <linux/clkdev.h> +#include <linux/clk-provider.h> +#include <linux/platform_device.h> #include <linux/ptp_clock_kernel.h> +#include <linux/spi/spi.h> +#include <linux/spi/xilinx_spi.h> +#include <net/devlink.h> +#include <linux/i2c.h> +#include <linux/mtd/mtd.h> -static const struct pci_device_id ptp_ocp_pcidev_id[] = { - { PCI_DEVICE(0x1d9b, 0x0400) }, - { 0 } -}; -MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); +#ifndef PCI_VENDOR_ID_FACEBOOK +#define PCI_VENDOR_ID_FACEBOOK 0x1d9b +#endif -#define OCP_REGISTER_OFFSET 0x01000000 +#ifndef PCI_DEVICE_ID_FACEBOOK_TIMECARD +#define PCI_DEVICE_ID_FACEBOOK_TIMECARD 0x0400 +#endif + +static struct class timecard_class = { + .owner = THIS_MODULE, + .name = "timecard", +}; struct ocp_reg { u32 ctrl; @@ -29,18 +43,29 @@ struct ocp_reg { u32 __pad1[2]; u32 offset_ns; u32 offset_window_ns; + u32 __pad2[2]; + u32 drift_ns; + u32 drift_window_ns; + u32 __pad3[6]; + u32 servo_offset_p; + u32 servo_offset_i; + u32 servo_drift_p; + u32 servo_drift_i; }; #define OCP_CTRL_ENABLE BIT(0) #define OCP_CTRL_ADJUST_TIME BIT(1) #define OCP_CTRL_ADJUST_OFFSET BIT(2) +#define OCP_CTRL_ADJUST_DRIFT BIT(3) +#define OCP_CTRL_ADJUST_SERVO BIT(8) #define OCP_CTRL_READ_TIME_REQ BIT(30) #define OCP_CTRL_READ_TIME_DONE BIT(31) #define OCP_STATUS_IN_SYNC BIT(0) +#define OCP_STATUS_IN_HOLDOVER BIT(1) #define OCP_SELECT_CLK_NONE 0 -#define OCP_SELECT_CLK_REG 6 +#define OCP_SELECT_CLK_REG 0xfe struct tod_reg { u32 ctrl; @@ -55,8 +80,6 @@ struct tod_reg { u32 leap; }; -#define TOD_REGISTER_OFFSET 0x01050000 - #define TOD_CTRL_PROTOCOL BIT(28) #define TOD_CTRL_DISABLE_FMT_A BIT(17) #define TOD_CTRL_DISABLE_FMT_B BIT(16) @@ -68,16 +91,264 @@ struct tod_reg { #define TOD_STATUS_UTC_VALID BIT(8) #define TOD_STATUS_LEAP_VALID BIT(16) +struct ts_reg { + u32 enable; + u32 error; + u32 polarity; + u32 version; + u32 __pad0[4]; + u32 cable_delay; + u32 __pad1[3]; + u32 intr; + u32 intr_mask; + u32 event_count; + u32 __pad2[1]; + u32 ts_count; + u32 time_ns; + u32 time_sec; + u32 data_width; + u32 data; +}; + +struct pps_reg { + u32 ctrl; + u32 status; + u32 __pad0[6]; + u32 cable_delay; +}; + +#define PPS_STATUS_FILTER_ERR BIT(0) +#define PPS_STATUS_SUPERV_ERR BIT(1) + +struct img_reg { + u32 version; +}; + +struct ptp_ocp_flash_info { + const char *name; + int pci_offset; + int data_size; + void *data; +}; + +struct ptp_ocp_ext_info { + const char *name; + int index; + irqreturn_t (*irq_fcn)(int irq, void *priv); + int (*enable)(void *priv, bool enable); +}; + +struct ptp_ocp_ext_src { + void __iomem *mem; + struct ptp_ocp *bp; + struct ptp_ocp_ext_info *info; + int irq_vec; +}; + struct ptp_ocp { struct pci_dev *pdev; + struct device dev; spinlock_t lock; - void __iomem *base; struct ocp_reg __iomem *reg; struct tod_reg __iomem *tod; + struct pps_reg __iomem *pps_to_ext; + struct pps_reg __iomem *pps_to_clk; + struct ptp_ocp_ext_src *pps; + struct ptp_ocp_ext_src *ts0; + struct ptp_ocp_ext_src *ts1; + struct img_reg __iomem *image; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; + struct platform_device *i2c_ctrl; + struct platform_device *spi_flash; + struct clk_hw *i2c_clk; + struct timer_list watchdog; + time64_t gnss_lost; + int id; + int n_irqs; + int gnss_port; + int mac_port; /* miniature atomic clock */ + u8 serial[6]; + int flash_start; + bool has_serial; }; +struct ocp_resource { + unsigned long offset; + int size; + int irq_vec; + int (*setup)(struct ptp_ocp *bp, struct ocp_resource *r); + void *extra; + unsigned long bp_offset; +}; + +static int ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r); +static int ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r); +static irqreturn_t ptp_ocp_ts_irq(int irq, void *priv); +static int ptp_ocp_ts_enable(void *priv, bool enable); + +#define bp_assign_entry(bp, res, val) ({ \ + uintptr_t addr = (uintptr_t)(bp) + (res)->bp_offset; \ + *(typeof(val) *)addr = val; \ +}) + +#define OCP_RES_LOCATION(member) \ + .bp_offset = offsetof(struct ptp_ocp, member) + +#define OCP_MEM_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_mem + +#define OCP_SERIAL_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_serial + +#define OCP_I2C_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_i2c + +#define OCP_SPI_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_spi + +#define OCP_EXT_RESOURCE(member) \ + OCP_RES_LOCATION(member), .setup = ptp_ocp_register_ext + +/* This is the MSI vector mapping used. + * 0: N/C + * 1: TS0 + * 2: TS1 + * 3: GPS + * 4: GPS2 (n/c) + * 5: MAC + * 6: SPI IMU (inertial measurement unit) + * 7: I2C oscillator + * 8: HWICAP + * 9: SPI Flash + */ + +static struct ocp_resource ocp_fb_resource[] = { + { + OCP_MEM_RESOURCE(reg), + .offset = 0x01000000, .size = 0x10000, + }, + { + OCP_EXT_RESOURCE(ts0), + .offset = 0x01010000, .size = 0x10000, .irq_vec = 1, + .extra = &(struct ptp_ocp_ext_info) { + .name = "ts0", .index = 0, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_EXT_RESOURCE(ts1), + .offset = 0x01020000, .size = 0x10000, .irq_vec = 2, + .extra = &(struct ptp_ocp_ext_info) { + .name = "ts1", .index = 1, + .irq_fcn = ptp_ocp_ts_irq, + .enable = ptp_ocp_ts_enable, + }, + }, + { + OCP_MEM_RESOURCE(pps_to_ext), + .offset = 0x01030000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(pps_to_clk), + .offset = 0x01040000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(tod), + .offset = 0x01050000, .size = 0x10000, + }, + { + OCP_MEM_RESOURCE(image), + .offset = 0x00020000, .size = 0x1000, + }, + { + OCP_I2C_RESOURCE(i2c_ctrl), + .offset = 0x00150000, .size = 0x10000, .irq_vec = 7, + }, + { + OCP_SERIAL_RESOURCE(gnss_port), + .offset = 0x00160000 + 0x1000, .irq_vec = 3, + }, + { + OCP_SERIAL_RESOURCE(mac_port), + .offset = 0x00180000 + 0x1000, .irq_vec = 5, + }, + { + OCP_SPI_RESOURCE(spi_flash), + .offset = 0x00310000, .size = 0x10000, .irq_vec = 9, + .extra = &(struct ptp_ocp_flash_info) { + .name = "xilinx_spi", .pci_offset = 0, + .data_size = sizeof(struct xspi_platform_data), + .data = &(struct xspi_platform_data) { + .num_chipselect = 1, + .bits_per_word = 8, + .num_devices = 1, + .devices = &(struct spi_board_info) { + .modalias = "spi-nor", + }, + }, + }, + }, + { + .setup = ptp_ocp_fb_board_init, + }, + { } +}; + +static const struct pci_device_id ptp_ocp_pcidev_id[] = { + { PCI_DEVICE_DATA(FACEBOOK, TIMECARD, &ocp_fb_resource) }, + { 0 } +}; +MODULE_DEVICE_TABLE(pci, ptp_ocp_pcidev_id); + +static DEFINE_MUTEX(ptp_ocp_lock); +static DEFINE_IDR(ptp_ocp_idr); + +static struct { + const char *name; + int value; +} ptp_ocp_clock[] = { + { .name = "NONE", .value = 0 }, + { .name = "TOD", .value = 1 }, + { .name = "IRIG", .value = 2 }, + { .name = "PPS", .value = 3 }, + { .name = "PTP", .value = 4 }, + { .name = "RTC", .value = 5 }, + { .name = "DCF", .value = 6 }, + { .name = "REGS", .value = 0xfe }, + { .name = "EXT", .value = 0xff }, +}; + +static const char * +ptp_ocp_clock_name_from_val(int val) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) + if (ptp_ocp_clock[i].value == val) + return ptp_ocp_clock[i].name; + return NULL; +} + +static int +ptp_ocp_clock_val_from_name(const char *name) +{ + const char *clk; + int i; + + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) { + clk = ptp_ocp_clock[i].name; + if (!strncasecmp(name, clk, strlen(clk))) + return ptp_ocp_clock[i].value; + } + return -EINVAL; +} + static int __ptp_ocp_gettime_locked(struct ptp_ocp *bp, struct timespec64 *ts, struct ptp_system_timestamp *sts) @@ -192,6 +463,45 @@ ptp_ocp_null_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm) return -EOPNOTSUPP; } +static int +ptp_ocp_adjphase(struct ptp_clock_info *ptp_info, s32 phase_ns) +{ + return -EOPNOTSUPP; +} + +static int +ptp_ocp_enable(struct ptp_clock_info *ptp_info, struct ptp_clock_request *rq, + int on) +{ + struct ptp_ocp *bp = container_of(ptp_info, struct ptp_ocp, ptp_info); + struct ptp_ocp_ext_src *ext = NULL; + int err; + + switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + switch (rq->extts.index) { + case 0: + ext = bp->ts0; + break; + case 1: + ext = bp->ts1; + break; + } + break; + case PTP_CLK_REQ_PPS: + ext = bp->pps; + break; + default: + return -EOPNOTSUPP; + } + + err = -ENXIO; + if (ext) + err = ext->info->enable(ext, on); + + return err; +} + static const struct ptp_clock_info ptp_ocp_clock_info = { .owner = THIS_MODULE, .name = KBUILD_MODNAME, @@ -200,10 +510,57 @@ static const struct ptp_clock_info ptp_ocp_clock_info = { .settime64 = ptp_ocp_settime, .adjtime = ptp_ocp_adjtime, .adjfine = ptp_ocp_null_adjfine, + .adjphase = ptp_ocp_adjphase, + .enable = ptp_ocp_enable, + .pps = true, + .n_ext_ts = 2, }; +static void +__ptp_ocp_clear_drift_locked(struct ptp_ocp *bp) +{ + u32 ctrl, select; + + select = ioread32(&bp->reg->select); + iowrite32(OCP_SELECT_CLK_REG, &bp->reg->select); + + iowrite32(0, &bp->reg->drift_ns); + + ctrl = ioread32(&bp->reg->ctrl); + ctrl |= OCP_CTRL_ADJUST_DRIFT; + iowrite32(ctrl, &bp->reg->ctrl); + + /* restore clock selection */ + iowrite32(select >> 16, &bp->reg->select); +} + +static void +ptp_ocp_watchdog(struct timer_list *t) +{ + struct ptp_ocp *bp = from_timer(bp, t, watchdog); + unsigned long flags; + u32 status; + + status = ioread32(&bp->pps_to_clk->status); + + if (status & PPS_STATUS_SUPERV_ERR) { + iowrite32(status, &bp->pps_to_clk->status); + if (!bp->gnss_lost) { + spin_lock_irqsave(&bp->lock, flags); + __ptp_ocp_clear_drift_locked(bp); + spin_unlock_irqrestore(&bp->lock, flags); + bp->gnss_lost = ktime_get_real_seconds(); + } + + } else if (bp->gnss_lost) { + bp->gnss_lost = 0; + } + + mod_timer(&bp->watchdog, jiffies + HZ); +} + static int -ptp_ocp_check_clock(struct ptp_ocp *bp) +ptp_ocp_init_clock(struct ptp_ocp *bp) { struct timespec64 ts; bool sync; @@ -214,6 +571,17 @@ ptp_ocp_check_clock(struct ptp_ocp *bp) ctrl |= OCP_CTRL_ENABLE; iowrite32(ctrl, &bp->reg->ctrl); + /* NO DRIFT Correction */ + /* offset_p:i 1/8, offset_i: 1/16, drift_p: 0, drift_i: 0 */ + iowrite32(0x2000, &bp->reg->servo_offset_p); + iowrite32(0x1000, &bp->reg->servo_offset_i); + iowrite32(0, &bp->reg->servo_drift_p); + iowrite32(0, &bp->reg->servo_drift_i); + + /* latch servo values */ + ctrl |= OCP_CTRL_ADJUST_SERVO; + iowrite32(ctrl, &bp->reg->ctrl); + if ((ioread32(&bp->reg->ctrl) & OCP_CTRL_ENABLE) == 0) { dev_err(&bp->pdev->dev, "clock not enabled\n"); return -ENODEV; @@ -229,6 +597,9 @@ ptp_ocp_check_clock(struct ptp_ocp *bp) ts.tv_sec, ts.tv_nsec, sync ? "in-sync" : "UNSYNCED"); + timer_setup(&bp->watchdog, ptp_ocp_watchdog, 0); + mod_timer(&bp->watchdog, jiffies + HZ); + return 0; } @@ -278,82 +649,839 @@ ptp_ocp_tod_info(struct ptp_ocp *bp) reg & TOD_STATUS_LEAP_VALID ? 1 : 0); } +static int +ptp_ocp_firstchild(struct device *dev, void *data) +{ + return 1; +} + +static int +ptp_ocp_read_i2c(struct i2c_adapter *adap, u8 addr, u8 reg, u8 sz, u8 *data) +{ + struct i2c_msg msgs[2] = { + { + .addr = addr, + .len = 1, + .buf = ®, + }, + { + .addr = addr, + .flags = I2C_M_RD, + .len = 2, + .buf = data, + }, + }; + int err; + u8 len; + + /* xiic-i2c for some stupid reason only does 2 byte reads. */ + while (sz) { + len = min_t(u8, sz, 2); + msgs[1].len = len; + err = i2c_transfer(adap, msgs, 2); + if (err != msgs[1].len) + return err; + msgs[1].buf += len; + reg += len; + sz -= len; + } + return 0; +} + +static void +ptp_ocp_get_serial_number(struct ptp_ocp *bp) +{ + struct i2c_adapter *adap; + struct device *dev; + int err; + + dev = device_find_child(&bp->i2c_ctrl->dev, NULL, ptp_ocp_firstchild); + if (!dev) { + dev_err(&bp->pdev->dev, "Can't find I2C adapter\n"); + return; + } + + adap = i2c_verify_adapter(dev); + if (!adap) { + dev_err(&bp->pdev->dev, "device '%s' isn't an I2C adapter\n", + dev_name(dev)); + goto out; + } + + err = ptp_ocp_read_i2c(adap, 0x58, 0x9A, 6, bp->serial); + if (err) { + dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", err); + goto out; + } + + bp->has_serial = true; + +out: + put_device(dev); +} + static void ptp_ocp_info(struct ptp_ocp *bp) { - static const char * const clock_name[] = { - "NO", "TOD", "IRIG", "PPS", "PTP", "RTC", "REGS", "EXT" - }; u32 version, select; version = ioread32(&bp->reg->version); select = ioread32(&bp->reg->select); dev_info(&bp->pdev->dev, "Version %d.%d.%d, clock %s, device ptp%d\n", version >> 24, (version >> 16) & 0xff, version & 0xffff, - clock_name[select & 7], + ptp_ocp_clock_name_from_val(select >> 16), ptp_clock_index(bp->ptp)); ptp_ocp_tod_info(bp); } +static struct device * +ptp_ocp_find_flash(struct ptp_ocp *bp) +{ + struct device *dev, *last; + + last = NULL; + dev = &bp->spi_flash->dev; + + while ((dev = device_find_child(dev, NULL, ptp_ocp_firstchild))) { + if (!strcmp("mtd", dev_bus_name(dev))) + break; + put_device(last); + last = dev; + } + put_device(last); + + return dev; +} + static int -ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) +ptp_ocp_devlink_flash(struct devlink *devlink, struct device *dev, + const struct firmware *fw) { - struct ptp_ocp *bp; + struct mtd_info *mtd = dev_get_drvdata(dev); + struct ptp_ocp *bp = devlink_priv(devlink); + size_t off, len, resid, wrote; + struct erase_info erase; + size_t base, blksz; + int err; + + off = 0; + base = bp->flash_start; + blksz = 4096; + resid = fw->size; + + while (resid) { + devlink_flash_update_status_notify(devlink, "Flashing", + NULL, off, fw->size); + + len = min_t(size_t, resid, blksz); + erase.addr = base + off; + erase.len = blksz; + + err = mtd_erase(mtd, &erase); + if (err) + goto out; + + err = mtd_write(mtd, base + off, len, &wrote, &fw->data[off]); + if (err) + goto out; + + off += blksz; + resid -= len; + } +out: + return err; +} + +static int +ptp_ocp_devlink_flash_update(struct devlink *devlink, + struct devlink_flash_update_params *params, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + struct device *dev; + const char *msg; + int err; + + dev = ptp_ocp_find_flash(bp); + if (!dev) { + dev_err(&bp->pdev->dev, "Can't find Flash SPI adapter\n"); + return -ENODEV; + } + + devlink_flash_update_status_notify(devlink, "Preparing to flash", + NULL, 0, 0); + + err = ptp_ocp_devlink_flash(devlink, dev, params->fw); + + msg = err ? "Flash error" : "Flash complete"; + devlink_flash_update_status_notify(devlink, msg, NULL, 0, 0); + + put_device(dev); + return err; +} + +static int +ptp_ocp_devlink_info_get(struct devlink *devlink, struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct ptp_ocp *bp = devlink_priv(devlink); + char buf[32]; + int err; + + err = devlink_info_driver_name_put(req, KBUILD_MODNAME); + if (err) + return err; + + if (bp->image) { + u32 ver = ioread32(&bp->image->version); + + if (ver & 0xffff) { + sprintf(buf, "%d", ver); + err = devlink_info_version_running_put(req, + "fw", + buf); + } else { + sprintf(buf, "%d", ver >> 16); + err = devlink_info_version_running_put(req, + "loader", + buf); + } + if (err) + return err; + } + + if (!bp->has_serial) + ptp_ocp_get_serial_number(bp); + + if (bp->has_serial) { + sprintf(buf, "%pM", bp->serial); + err = devlink_info_serial_number_put(req, buf); + if (err) + return err; + } + + return 0; +} + +static const struct devlink_ops ptp_ocp_devlink_ops = { + .flash_update = ptp_ocp_devlink_flash_update, + .info_get = ptp_ocp_devlink_info_get, +}; + +static void __iomem * +__ptp_ocp_get_mem(struct ptp_ocp *bp, unsigned long start, int size) +{ + struct resource res = DEFINE_RES_MEM_NAMED(start, size, "ptp_ocp"); + + return devm_ioremap_resource(&bp->pdev->dev, &res); +} + +static void __iomem * +ptp_ocp_get_mem(struct ptp_ocp *bp, struct ocp_resource *r) +{ + unsigned long start; + + start = pci_resource_start(bp->pdev, 0) + r->offset; + return __ptp_ocp_get_mem(bp, start, r->size); +} + +static void +ptp_ocp_set_irq_resource(struct resource *res, int irq) +{ + struct resource r = DEFINE_RES_IRQ(irq); + *res = r; +} + +static void +ptp_ocp_set_mem_resource(struct resource *res, unsigned long start, int size) +{ + struct resource r = DEFINE_RES_MEM(start, size); + *res = r; +} + +static int +ptp_ocp_register_spi(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct ptp_ocp_flash_info *info; + struct pci_dev *pdev = bp->pdev; + struct platform_device *p; + struct resource res[2]; + unsigned long start; + int id; + + /* XXX hack to work around old FPGA */ + if (bp->n_irqs < 10) { + dev_err(&bp->pdev->dev, "FPGA does not have SPI devices\n"); + return 0; + } + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "spi device irq %d out of range\n", + r->irq_vec); + return 0; + } + + start = pci_resource_start(pdev, 0) + r->offset; + ptp_ocp_set_mem_resource(&res[0], start, r->size); + ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + + info = r->extra; + id = pci_dev_id(pdev) << 1; + id += info->pci_offset; + + p = platform_device_register_resndata(&pdev->dev, info->name, id, + res, 2, info->data, + info->data_size); + if (IS_ERR(p)) + return PTR_ERR(p); + + bp_assign_entry(bp, r, p); + + return 0; +} + +static struct platform_device * +ptp_ocp_i2c_bus(struct pci_dev *pdev, struct ocp_resource *r, int id) +{ + struct resource res[2]; + unsigned long start; + + start = pci_resource_start(pdev, 0) + r->offset; + ptp_ocp_set_mem_resource(&res[0], start, r->size); + ptp_ocp_set_irq_resource(&res[1], pci_irq_vector(pdev, r->irq_vec)); + + return platform_device_register_resndata(&pdev->dev, "xiic-i2c", + id, res, 2, NULL, 0); +} + +static int +ptp_ocp_register_i2c(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct platform_device *p; + struct clk_hw *clk; + char buf[32]; + int id; + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "i2c device irq %d out of range\n", + r->irq_vec); + return 0; + } + + id = pci_dev_id(bp->pdev); + + sprintf(buf, "AXI.%d", id); + clk = clk_hw_register_fixed_rate(&pdev->dev, buf, NULL, 0, 50000000); + if (IS_ERR(clk)) + return PTR_ERR(clk); + bp->i2c_clk = clk; + + sprintf(buf, "xiic-i2c.%d", id); + devm_clk_hw_register_clkdev(&pdev->dev, clk, NULL, buf); + p = ptp_ocp_i2c_bus(bp->pdev, r, id); + if (IS_ERR(p)) + return PTR_ERR(p); + + bp_assign_entry(bp, r, p); + + return 0; +} + +static irqreturn_t +ptp_ocp_ts_irq(int irq, void *priv) +{ + struct ptp_ocp_ext_src *ext = priv; + struct ts_reg __iomem *reg = ext->mem; + struct ptp_clock_event ev; + u32 sec, nsec; + + /* XXX should fix API - this converts s/ns -> ts -> s/ns */ + sec = ioread32(®->time_sec); + nsec = ioread32(®->time_ns); + + ev.type = PTP_CLOCK_EXTTS; + ev.index = ext->info->index; + ev.timestamp = sec * 1000000000ULL + nsec; + + ptp_clock_event(ext->bp->ptp, &ev); + + iowrite32(1, ®->intr); /* write 1 to ack */ + + return IRQ_HANDLED; +} + +static int +ptp_ocp_ts_enable(void *priv, bool enable) +{ + struct ptp_ocp_ext_src *ext = priv; + struct ts_reg __iomem *reg = ext->mem; + + if (enable) { + iowrite32(1, ®->enable); + iowrite32(1, ®->intr_mask); + iowrite32(1, ®->intr); + } else { + iowrite32(0, ®->intr_mask); + iowrite32(0, ®->enable); + } + + return 0; +} + +static void +ptp_ocp_unregister_ext(struct ptp_ocp_ext_src *ext) +{ + ext->info->enable(ext, false); + pci_free_irq(ext->bp->pdev, ext->irq_vec, ext); + kfree(ext); +} + +static int +ptp_ocp_register_ext(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct ptp_ocp_ext_src *ext; int err; - bp = kzalloc(sizeof(*bp), GFP_KERNEL); - if (!bp) + ext = kzalloc(sizeof(*ext), GFP_KERNEL); + if (!ext) return -ENOMEM; - bp->pdev = pdev; - pci_set_drvdata(pdev, bp); - err = pci_enable_device(pdev); + err = -EINVAL; + ext->mem = ptp_ocp_get_mem(bp, r); + if (!ext->mem) + goto out; + + ext->bp = bp; + ext->info = r->extra; + ext->irq_vec = r->irq_vec; + + err = pci_request_irq(pdev, r->irq_vec, ext->info->irq_fcn, NULL, + ext, "ocp%d.%s", bp->id, ext->info->name); if (err) { - dev_err(&pdev->dev, "pci_enable_device\n"); - goto out_free; + dev_err(&pdev->dev, "Could not get irq %d\n", r->irq_vec); + goto out; } - err = pci_request_regions(pdev, KBUILD_MODNAME); - if (err) { - dev_err(&pdev->dev, "pci_request_region\n"); - goto out_disable; + bp_assign_entry(bp, r, ext); + + return 0; + +out: + kfree(ext); + return err; +} + +static int +ptp_ocp_serial_line(struct ptp_ocp *bp, struct ocp_resource *r) +{ + struct pci_dev *pdev = bp->pdev; + struct uart_8250_port uart; + + /* Setting UPF_IOREMAP and leaving port.membase unspecified lets + * the serial port device claim and release the pci resource. + */ + memset(&uart, 0, sizeof(uart)); + uart.port.dev = &pdev->dev; + uart.port.iotype = UPIO_MEM; + uart.port.regshift = 2; + uart.port.mapbase = pci_resource_start(pdev, 0) + r->offset; + uart.port.irq = pci_irq_vector(pdev, r->irq_vec); + uart.port.uartclk = 50000000; + uart.port.flags = UPF_FIXED_TYPE | UPF_IOREMAP; + uart.port.type = PORT_16550A; + + return serial8250_register_8250_port(&uart); +} + +static int +ptp_ocp_register_serial(struct ptp_ocp *bp, struct ocp_resource *r) +{ + int port; + + if (r->irq_vec > bp->n_irqs) { + dev_err(&bp->pdev->dev, "serial device irq %d out of range\n", + r->irq_vec); + return 0; } - bp->base = pci_ioremap_bar(pdev, 0); - if (!bp->base) { - dev_err(&pdev->dev, "io_remap bar0\n"); - err = -ENOMEM; - goto out_release_regions; + port = ptp_ocp_serial_line(bp, r); + if (port < 0) + return port; + + bp_assign_entry(bp, r, port); + + return 0; +} + +static int +ptp_ocp_register_mem(struct ptp_ocp *bp, struct ocp_resource *r) +{ + void __iomem *mem; + + mem = ptp_ocp_get_mem(bp, r); + if (!mem) + return -EINVAL; + + bp_assign_entry(bp, r, mem); + + return 0; +} + +/* FB specific board initializers; last "resource" registered. */ +static int +ptp_ocp_fb_board_init(struct ptp_ocp *bp, struct ocp_resource *r) +{ + bp->flash_start = 1024 * 4096; + + return ptp_ocp_init_clock(bp); +} + +static int +ptp_ocp_register_resources(struct ptp_ocp *bp, kernel_ulong_t driver_data) +{ + struct ocp_resource *r, *table; + int err = 0; + + table = (struct ocp_resource *)driver_data; + for (r = table; r->setup; r++) { + err = r->setup(bp, r); + if (err) + break; + } + return err; +} + +static ssize_t +serialnum_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + + if (!bp->has_serial) + ptp_ocp_get_serial_number(bp); + + return sysfs_emit(buf, "%pM\n", bp->serial); +} +static DEVICE_ATTR_RO(serialnum); + +static ssize_t +gnss_sync_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + ssize_t ret; + + if (bp->gnss_lost) + ret = sysfs_emit(buf, "LOST @ %ptT\n", &bp->gnss_lost); + else + ret = sysfs_emit(buf, "SYNC\n"); + + return ret; +} +static DEVICE_ATTR_RO(gnss_sync); + +static ssize_t +clock_source_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + const char *p; + u32 select; + + select = ioread32(&bp->reg->select); + p = ptp_ocp_clock_name_from_val(select >> 16); + + return sysfs_emit(buf, "%s\n", p); +} + +static ssize_t +clock_source_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + unsigned long flags; + int val; + + val = ptp_ocp_clock_val_from_name(buf); + if (val < 0) + return val; + + spin_lock_irqsave(&bp->lock, flags); + iowrite32(val, &bp->reg->select); + spin_unlock_irqrestore(&bp->lock, flags); + + return count; +} +static DEVICE_ATTR_RW(clock_source); + +static ssize_t +available_clock_sources_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const char *clk; + ssize_t count; + int i; + + count = 0; + for (i = 0; i < ARRAY_SIZE(ptp_ocp_clock); i++) { + clk = ptp_ocp_clock[i].name; + count += sysfs_emit_at(buf, count, "%s ", clk); + } + if (count) + count--; + count += sysfs_emit_at(buf, count, "\n"); + return count; +} +static DEVICE_ATTR_RO(available_clock_sources); + +static struct attribute *timecard_attrs[] = { + &dev_attr_serialnum.attr, + &dev_attr_gnss_sync.attr, + &dev_attr_clock_source.attr, + &dev_attr_available_clock_sources.attr, + NULL, +}; +ATTRIBUTE_GROUPS(timecard); + +static void +ptp_ocp_dev_release(struct device *dev) +{ + struct ptp_ocp *bp = dev_get_drvdata(dev); + + mutex_lock(&ptp_ocp_lock); + idr_remove(&ptp_ocp_idr, bp->id); + mutex_unlock(&ptp_ocp_lock); +} + +static int +ptp_ocp_device_init(struct ptp_ocp *bp, struct pci_dev *pdev) +{ + int err; + + mutex_lock(&ptp_ocp_lock); + err = idr_alloc(&ptp_ocp_idr, bp, 0, 0, GFP_KERNEL); + mutex_unlock(&ptp_ocp_lock); + if (err < 0) { + dev_err(&pdev->dev, "idr_alloc failed: %d\n", err); + return err; } - bp->reg = bp->base + OCP_REGISTER_OFFSET; - bp->tod = bp->base + TOD_REGISTER_OFFSET; + bp->id = err; + bp->ptp_info = ptp_ocp_clock_info; spin_lock_init(&bp->lock); + bp->gnss_port = -1; + bp->mac_port = -1; + bp->pdev = pdev; + + device_initialize(&bp->dev); + dev_set_name(&bp->dev, "ocp%d", bp->id); + bp->dev.class = &timecard_class; + bp->dev.parent = &pdev->dev; + bp->dev.release = ptp_ocp_dev_release; + dev_set_drvdata(&bp->dev, bp); + + err = device_add(&bp->dev); + if (err) { + dev_err(&bp->dev, "device add failed: %d\n", err); + goto out; + } + + pci_set_drvdata(pdev, bp); + + return 0; + +out: + ptp_ocp_dev_release(&bp->dev); + put_device(&bp->dev); + return err; +} + +static void +ptp_ocp_symlink(struct ptp_ocp *bp, struct device *child, const char *link) +{ + struct device *dev = &bp->dev; + + if (sysfs_create_link(&dev->kobj, &child->kobj, link)) + dev_err(dev, "%s symlink failed\n", link); +} + +static void +ptp_ocp_link_child(struct ptp_ocp *bp, const char *name, const char *link) +{ + struct device *dev, *child; + + dev = &bp->pdev->dev; + + child = device_find_child_by_name(dev, name); + if (!child) { + dev_err(dev, "Could not find device %s\n", name); + return; + } + + ptp_ocp_symlink(bp, child, link); + put_device(child); +} + +static int +ptp_ocp_complete(struct ptp_ocp *bp) +{ + struct pps_device *pps; + char buf[32]; + + if (bp->gnss_port != -1) { + sprintf(buf, "ttyS%d", bp->gnss_port); + ptp_ocp_link_child(bp, buf, "ttyGNSS"); + } + if (bp->mac_port != -1) { + sprintf(buf, "ttyS%d", bp->mac_port); + ptp_ocp_link_child(bp, buf, "ttyMAC"); + } + sprintf(buf, "ptp%d", ptp_clock_index(bp->ptp)); + ptp_ocp_link_child(bp, buf, "ptp"); + + pps = pps_lookup_dev(bp->ptp); + if (pps) + ptp_ocp_symlink(bp, pps->dev, "pps"); + + if (device_add_groups(&bp->dev, timecard_groups)) + pr_err("device add groups failed\n"); - err = ptp_ocp_check_clock(bp); + return 0; +} + +static void +ptp_ocp_resource_summary(struct ptp_ocp *bp) +{ + struct device *dev = &bp->pdev->dev; + + if (bp->image) { + u32 ver = ioread32(&bp->image->version); + + dev_info(dev, "version %x\n", ver); + if (ver & 0xffff) + dev_info(dev, "regular image, version %d\n", + ver & 0xffff); + else + dev_info(dev, "golden image, version %d\n", + ver >> 16); + } + if (bp->gnss_port != -1) + dev_info(dev, "GNSS @ /dev/ttyS%d 115200\n", bp->gnss_port); + if (bp->mac_port != -1) + dev_info(dev, "MAC @ /dev/ttyS%d 57600\n", bp->mac_port); +} + +static void +ptp_ocp_detach_sysfs(struct ptp_ocp *bp) +{ + struct device *dev = &bp->dev; + + sysfs_remove_link(&dev->kobj, "ttyGNSS"); + sysfs_remove_link(&dev->kobj, "ttyMAC"); + sysfs_remove_link(&dev->kobj, "ptp"); + sysfs_remove_link(&dev->kobj, "pps"); + device_remove_groups(dev, timecard_groups); +} + +static void +ptp_ocp_detach(struct ptp_ocp *bp) +{ + ptp_ocp_detach_sysfs(bp); + if (timer_pending(&bp->watchdog)) + del_timer_sync(&bp->watchdog); + if (bp->ts0) + ptp_ocp_unregister_ext(bp->ts0); + if (bp->ts1) + ptp_ocp_unregister_ext(bp->ts1); + if (bp->pps) + ptp_ocp_unregister_ext(bp->pps); + if (bp->gnss_port != -1) + serial8250_unregister_port(bp->gnss_port); + if (bp->mac_port != -1) + serial8250_unregister_port(bp->mac_port); + if (bp->spi_flash) + platform_device_unregister(bp->spi_flash); + if (bp->i2c_ctrl) + platform_device_unregister(bp->i2c_ctrl); + if (bp->i2c_clk) + clk_hw_unregister_fixed_rate(bp->i2c_clk); + if (bp->n_irqs) + pci_free_irq_vectors(bp->pdev); + if (bp->ptp) + ptp_clock_unregister(bp->ptp); + device_unregister(&bp->dev); +} + +static int +ptp_ocp_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct devlink *devlink; + struct ptp_ocp *bp; + int err; + + devlink = devlink_alloc(&ptp_ocp_devlink_ops, sizeof(*bp), &pdev->dev); + if (!devlink) { + dev_err(&pdev->dev, "devlink_alloc failed\n"); + return -ENOMEM; + } + + err = devlink_register(devlink); + if (err) + goto out_free; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "pci_enable_device\n"); + goto out_unregister; + } + + bp = devlink_priv(devlink); + err = ptp_ocp_device_init(bp, pdev); + if (err) + goto out_unregister; + + /* compat mode. + * Older FPGA firmware only returns 2 irq's. + * allow this - if not all of the IRQ's are returned, skip the + * extra devices and just register the clock. + */ + err = pci_alloc_irq_vectors(pdev, 1, 10, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (err < 0) { + dev_err(&pdev->dev, "alloc_irq_vectors err: %d\n", err); + goto out; + } + bp->n_irqs = err; + pci_set_master(pdev); + + err = ptp_ocp_register_resources(bp, id->driver_data); if (err) goto out; bp->ptp = ptp_clock_register(&bp->ptp_info, &pdev->dev); if (IS_ERR(bp->ptp)) { - dev_err(&pdev->dev, "ptp_clock_register\n"); err = PTR_ERR(bp->ptp); + dev_err(&pdev->dev, "ptp_clock_register: %d\n", err); + bp->ptp = NULL; goto out; } + err = ptp_ocp_complete(bp); + if (err) + goto out; + ptp_ocp_info(bp); + ptp_ocp_resource_summary(bp); return 0; out: - pci_iounmap(pdev, bp->base); -out_release_regions: - pci_release_regions(pdev); -out_disable: + ptp_ocp_detach(bp); pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +out_unregister: + devlink_unregister(devlink); out_free: - kfree(bp); + devlink_free(devlink); return err; } @@ -362,13 +1490,14 @@ static void ptp_ocp_remove(struct pci_dev *pdev) { struct ptp_ocp *bp = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(bp); - ptp_clock_unregister(bp->ptp); - pci_iounmap(pdev, bp->base); - pci_release_regions(pdev); + ptp_ocp_detach(bp); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); - kfree(bp); + + devlink_unregister(devlink); + devlink_free(devlink); } static struct pci_driver ptp_ocp_driver = { @@ -378,19 +1507,84 @@ static struct pci_driver ptp_ocp_driver = { .remove = ptp_ocp_remove, }; +static int +ptp_ocp_i2c_notifier_call(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct device *dev, *child = data; + struct ptp_ocp *bp; + bool add; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + case BUS_NOTIFY_DEL_DEVICE: + add = action == BUS_NOTIFY_ADD_DEVICE; + break; + default: + return 0; + } + + if (!i2c_verify_adapter(child)) + return 0; + + dev = child; + while ((dev = dev->parent)) + if (dev->driver && !strcmp(dev->driver->name, KBUILD_MODNAME)) + goto found; + return 0; + +found: + bp = dev_get_drvdata(dev); + if (add) + ptp_ocp_symlink(bp, child, "i2c"); + else + sysfs_remove_link(&bp->dev.kobj, "i2c"); + + return 0; +} + +static struct notifier_block ptp_ocp_i2c_notifier = { + .notifier_call = ptp_ocp_i2c_notifier_call, +}; + static int __init ptp_ocp_init(void) { + const char *what; int err; + what = "timecard class"; + err = class_register(&timecard_class); + if (err) + goto out; + + what = "i2c notifier"; + err = bus_register_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); + if (err) + goto out_notifier; + + what = "ptp_ocp driver"; err = pci_register_driver(&ptp_ocp_driver); + if (err) + goto out_register; + + return 0; + +out_register: + bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); +out_notifier: + class_unregister(&timecard_class); +out: + pr_err(KBUILD_MODNAME ": failed to register %s: %d\n", what, err); return err; } static void __exit ptp_ocp_fini(void) { + bus_unregister_notifier(&i2c_bus_type, &ptp_ocp_i2c_notifier); pci_unregister_driver(&ptp_ocp_driver); + class_unregister(&timecard_class); } module_init(ptp_ocp_init); diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index cff91b4f1a76..9c67b97faba2 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -74,6 +74,7 @@ config QETH_L2 def_tristate y prompt "qeth layer 2 device support" depends on QETH + depends on BRIDGE || BRIDGE=n help Select this option to be able to run qeth devices in layer 2 mode. To compile as a module, choose M. The module name is qeth_l2. diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 69afc0311dd1..4871f712b874 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -717,6 +717,227 @@ static int qeth_l2_dev2br_an_set(struct qeth_card *card, bool enable) return rc; } +struct qeth_l2_br2dev_event_work { + struct work_struct work; + struct net_device *br_dev; + struct net_device *lsync_dev; + struct net_device *dst_dev; + unsigned long event; + unsigned char addr[ETH_ALEN]; +}; + +static const struct net_device_ops qeth_l2_netdev_ops; + +static bool qeth_l2_must_learn(struct net_device *netdev, + struct net_device *dstdev) +{ + struct qeth_priv *priv; + + priv = netdev_priv(netdev); + return (netdev != dstdev && + (priv->brport_features & BR_LEARNING_SYNC) && + !(br_port_flag_is_set(netdev, BR_ISOLATED) && + br_port_flag_is_set(dstdev, BR_ISOLATED)) && + netdev->netdev_ops == &qeth_l2_netdev_ops); +} + +/** + * qeth_l2_br2dev_worker() - update local MACs + * @work: bridge to device FDB update + * + * Update local MACs of a learning_sync bridgeport so it can receive + * messages for a destination port. + * In case of an isolated learning_sync port, also update its isolated + * siblings. + */ +static void qeth_l2_br2dev_worker(struct work_struct *work) +{ + struct qeth_l2_br2dev_event_work *br2dev_event_work = + container_of(work, struct qeth_l2_br2dev_event_work, work); + struct net_device *lsyncdev = br2dev_event_work->lsync_dev; + struct net_device *dstdev = br2dev_event_work->dst_dev; + struct net_device *brdev = br2dev_event_work->br_dev; + unsigned long event = br2dev_event_work->event; + unsigned char *addr = br2dev_event_work->addr; + struct qeth_card *card = lsyncdev->ml_priv; + struct net_device *lowerdev; + struct list_head *iter; + int err = 0; + + kfree(br2dev_event_work); + QETH_CARD_TEXT_(card, 4, "b2dw%04x", event); + QETH_CARD_TEXT_(card, 4, "ma%012lx", ether_addr_to_u64(addr)); + + rcu_read_lock(); + /* Verify preconditions are still valid: */ + if (!netif_is_bridge_port(lsyncdev) || + brdev != netdev_master_upper_dev_get_rcu(lsyncdev)) + goto unlock; + if (!qeth_l2_must_learn(lsyncdev, dstdev)) + goto unlock; + + if (br_port_flag_is_set(lsyncdev, BR_ISOLATED)) { + /* Update lsyncdev and its isolated sibling(s): */ + iter = &brdev->adj_list.lower; + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + while (lowerdev) { + if (br_port_flag_is_set(lowerdev, BR_ISOLATED)) { + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = dev_uc_add(lowerdev, addr); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = dev_uc_del(lowerdev, addr); + break; + default: + break; + } + if (err) { + QETH_CARD_TEXT(card, 2, "b2derris"); + QETH_CARD_TEXT_(card, 2, + "err%02x%03d", event, + lowerdev->ifindex); + } + } + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + } + } else { + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + err = dev_uc_add(lsyncdev, addr); + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + err = dev_uc_del(lsyncdev, addr); + break; + default: + break; + } + if (err) + QETH_CARD_TEXT_(card, 2, "b2derr%02x", event); + } + +unlock: + rcu_read_unlock(); + dev_put(brdev); + dev_put(lsyncdev); + dev_put(dstdev); +} + +static int qeth_l2_br2dev_queue_work(struct net_device *brdev, + struct net_device *lsyncdev, + struct net_device *dstdev, + unsigned long event, + const unsigned char *addr) +{ + struct qeth_l2_br2dev_event_work *worker_data; + struct qeth_card *card; + + worker_data = kzalloc(sizeof(*worker_data), GFP_ATOMIC); + if (!worker_data) + return -ENOMEM; + INIT_WORK(&worker_data->work, qeth_l2_br2dev_worker); + worker_data->br_dev = brdev; + worker_data->lsync_dev = lsyncdev; + worker_data->dst_dev = dstdev; + worker_data->event = event; + ether_addr_copy(worker_data->addr, addr); + + card = lsyncdev->ml_priv; + /* Take a reference on the sw port devices and the bridge */ + dev_hold(brdev); + dev_hold(lsyncdev); + dev_hold(dstdev); + queue_work(card->event_wq, &worker_data->work); + return 0; +} + +/* Called under rtnl_lock */ +static int qeth_l2_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dstdev, *brdev, *lowerdev; + struct switchdev_notifier_fdb_info *fdb_info; + struct switchdev_notifier_info *info = ptr; + struct list_head *iter; + struct qeth_card *card; + int rc; + + if (!(event == SWITCHDEV_FDB_ADD_TO_DEVICE || + event == SWITCHDEV_FDB_DEL_TO_DEVICE)) + return NOTIFY_DONE; + + dstdev = switchdev_notifier_info_to_dev(info); + brdev = netdev_master_upper_dev_get_rcu(dstdev); + if (!brdev || !netif_is_bridge_master(brdev)) + return NOTIFY_DONE; + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + iter = &brdev->adj_list.lower; + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + while (lowerdev) { + if (qeth_l2_must_learn(lowerdev, dstdev)) { + card = lowerdev->ml_priv; + QETH_CARD_TEXT_(card, 4, "b2dqw%03x", event); + rc = qeth_l2_br2dev_queue_work(brdev, lowerdev, + dstdev, event, + fdb_info->addr); + if (rc) { + QETH_CARD_TEXT(card, 2, "b2dqwerr"); + return NOTIFY_BAD; + } + } + lowerdev = netdev_next_lower_dev_rcu(brdev, &iter); + } + return NOTIFY_DONE; +} + +static struct notifier_block qeth_l2_sw_notifier = { + .notifier_call = qeth_l2_switchdev_event, +}; + +static refcount_t qeth_l2_switchdev_notify_refcnt; + +/* Called under rtnl_lock */ +static void qeth_l2_br2dev_get(void) +{ + int rc; + + if (!refcount_inc_not_zero(&qeth_l2_switchdev_notify_refcnt)) { + rc = register_switchdev_notifier(&qeth_l2_sw_notifier); + if (rc) { + QETH_DBF_MESSAGE(2, + "failed to register qeth_l2_sw_notifier: %d\n", + rc); + } else { + refcount_set(&qeth_l2_switchdev_notify_refcnt, 1); + QETH_DBF_MESSAGE(2, "qeth_l2_sw_notifier registered\n"); + } + } + QETH_DBF_TEXT_(SETUP, 2, "b2d+%04d", + qeth_l2_switchdev_notify_refcnt.refs.counter); +} + +/* Called under rtnl_lock */ +static void qeth_l2_br2dev_put(void) +{ + int rc; + + if (refcount_dec_and_test(&qeth_l2_switchdev_notify_refcnt)) { + rc = unregister_switchdev_notifier(&qeth_l2_sw_notifier); + if (rc) { + QETH_DBF_MESSAGE(2, + "failed to unregister qeth_l2_sw_notifier: %d\n", + rc); + } else { + QETH_DBF_MESSAGE(2, + "qeth_l2_sw_notifier unregistered\n"); + } + } + QETH_DBF_TEXT_(SETUP, 2, "b2d-%04d", + qeth_l2_switchdev_notify_refcnt.refs.counter); +} + static int qeth_l2_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags) @@ -810,16 +1031,19 @@ static int qeth_l2_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, } else if (enable) { qeth_l2_set_pnso_mode(card, QETH_PNSO_ADDR_INFO); rc = qeth_l2_dev2br_an_set(card, true); - if (rc) + if (rc) { qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); - else + } else { priv->brport_features |= BR_LEARNING_SYNC; + qeth_l2_br2dev_get(); + } } else { rc = qeth_l2_dev2br_an_set(card, false); if (!rc) { qeth_l2_set_pnso_mode(card, QETH_PNSO_NONE); priv->brport_features ^= BR_LEARNING_SYNC; qeth_l2_dev2br_fdb_flush(card); + qeth_l2_br2dev_put(); } } mutex_unlock(&card->sbp_lock); @@ -2072,6 +2296,7 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) static void qeth_l2_remove_device(struct ccwgroup_device *gdev) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct qeth_priv *priv; if (gdev->dev.type != &qeth_l2_devtype) device_remove_groups(&gdev->dev, qeth_l2_attr_groups); @@ -2083,8 +2308,15 @@ static void qeth_l2_remove_device(struct ccwgroup_device *gdev) qeth_set_offline(card, card->discipline, false); cancel_work_sync(&card->close_dev_work); - if (card->dev->reg_state == NETREG_REGISTERED) + if (card->dev->reg_state == NETREG_REGISTERED) { + priv = netdev_priv(card->dev); + if (priv->brport_features & BR_LEARNING_SYNC) { + rtnl_lock(); + qeth_l2_br2dev_put(); + rtnl_unlock(); + } unregister_netdev(card->dev); + } } static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok) @@ -2207,6 +2439,7 @@ EXPORT_SYMBOL_GPL(qeth_l2_discipline); static int __init qeth_l2_init(void) { pr_info("register layer 2 discipline\n"); + refcount_set(&qeth_l2_switchdev_notify_refcnt, 0); return 0; } diff --git a/drivers/scsi/arm/acornscsi.c b/drivers/scsi/arm/acornscsi.c index 84fc7a0c6ff4..4a84599ff491 100644 --- a/drivers/scsi/arm/acornscsi.c +++ b/drivers/scsi/arm/acornscsi.c @@ -2642,6 +2642,7 @@ int acornscsi_abort(struct scsi_cmnd *SCpnt) //#endif clear_bit(SCpnt->device->id * 8 + (u8)(SCpnt->device->lun & 0x7), host->busyluns); + fallthrough; /* * We found the command, and cleared it out. Either diff --git a/drivers/scsi/arm/fas216.c b/drivers/scsi/arm/fas216.c index 6baa9b36367d..9c4458a99025 100644 --- a/drivers/scsi/arm/fas216.c +++ b/drivers/scsi/arm/fas216.c @@ -1375,6 +1375,7 @@ static void fas216_busservice_intr(FAS216_Info *info, unsigned int stat, unsigne case IS_COMPLETE: break; } + break; default: break; diff --git a/drivers/scsi/device_handler/scsi_dh_rdac.c b/drivers/scsi/device_handler/scsi_dh_rdac.c index 25f6e1ac9e7b..66652ab409cc 100644 --- a/drivers/scsi/device_handler/scsi_dh_rdac.c +++ b/drivers/scsi/device_handler/scsi_dh_rdac.c @@ -453,8 +453,8 @@ static int initialize_controller(struct scsi_device *sdev, if (!h->ctlr) err = SCSI_DH_RES_TEMP_UNAVAIL; else { - list_add_rcu(&h->node, &h->ctlr->dh_list); h->sdev = sdev; + list_add_rcu(&h->node, &h->ctlr->dh_list); } spin_unlock(&list_lock); err = SCSI_DH_OK; @@ -778,11 +778,11 @@ static void rdac_bus_detach( struct scsi_device *sdev ) spin_lock(&list_lock); if (h->ctlr) { list_del_rcu(&h->node); - h->sdev = NULL; kref_put(&h->ctlr->kref, release_controller); } spin_unlock(&list_lock); sdev->handler_data = NULL; + synchronize_rcu(); kfree(h); } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index bee1bec49c09..935b01ee44b7 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -807,6 +807,13 @@ static int ibmvfc_init_event_pool(struct ibmvfc_host *vhost, for (i = 0; i < size; ++i) { struct ibmvfc_event *evt = &pool->events[i]; + /* + * evt->active states + * 1 = in flight + * 0 = being completed + * -1 = free/freed + */ + atomic_set(&evt->active, -1); atomic_set(&evt->free, 1); evt->crq.valid = 0x80; evt->crq.ioba = cpu_to_be64(pool->iu_token + (sizeof(*evt->xfer_iu) * i)); @@ -1017,6 +1024,7 @@ static void ibmvfc_free_event(struct ibmvfc_event *evt) BUG_ON(!ibmvfc_valid_event(pool, evt)); BUG_ON(atomic_inc_return(&evt->free) != 1); + BUG_ON(atomic_dec_and_test(&evt->active)); spin_lock_irqsave(&evt->queue->l_lock, flags); list_add_tail(&evt->queue_list, &evt->queue->free); @@ -1072,6 +1080,12 @@ static void ibmvfc_complete_purge(struct list_head *purge_list) **/ static void ibmvfc_fail_request(struct ibmvfc_event *evt, int error_code) { + /* + * Anything we are failing should still be active. Otherwise, it + * implies we already got a response for the command and are doing + * something bad like double completing it. + */ + BUG_ON(!atomic_dec_and_test(&evt->active)); if (evt->cmnd) { evt->cmnd->result = (error_code << 16); evt->done = ibmvfc_scsi_eh_done; @@ -1723,6 +1737,7 @@ static int ibmvfc_send_event(struct ibmvfc_event *evt, evt->done(evt); } else { + atomic_set(&evt->active, 1); spin_unlock_irqrestore(&evt->queue->l_lock, flags); ibmvfc_trc_start(evt); } @@ -3251,7 +3266,7 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost, return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; @@ -3778,7 +3793,7 @@ static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost return; } - if (unlikely(atomic_read(&evt->free))) { + if (unlikely(atomic_dec_if_positive(&evt->active))) { dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index 4f0f3baefae4..92fb889d7eb0 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -745,6 +745,7 @@ struct ibmvfc_event { struct ibmvfc_target *tgt; struct scsi_cmnd *cmnd; atomic_t free; + atomic_t active; union ibmvfc_iu *xfer_iu; void (*done)(struct ibmvfc_event *evt); void (*_done)(struct ibmvfc_event *evt); diff --git a/drivers/scsi/megaraid/megaraid_mm.c b/drivers/scsi/megaraid/megaraid_mm.c index abf7b401f5b9..c509440bd161 100644 --- a/drivers/scsi/megaraid/megaraid_mm.c +++ b/drivers/scsi/megaraid/megaraid_mm.c @@ -238,7 +238,7 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) mimd_t mimd; uint32_t adapno; int iterator; - + bool is_found; if (copy_from_user(&mimd, umimd, sizeof(mimd_t))) { *rval = -EFAULT; @@ -254,12 +254,16 @@ mraid_mm_get_adapter(mimd_t __user *umimd, int *rval) adapter = NULL; iterator = 0; + is_found = false; list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } - if (!adapter) { + if (!is_found) { *rval = -ENODEV; return NULL; } @@ -725,6 +729,7 @@ ioctl_done(uioc_t *kioc) uint32_t adapno; int iterator; mraid_mmadp_t* adapter; + bool is_found; /* * When the kioc returns from driver, make sure it still doesn't @@ -747,19 +752,23 @@ ioctl_done(uioc_t *kioc) iterator = 0; adapter = NULL; adapno = kioc->adapno; + is_found = false; con_log(CL_ANN, ( KERN_WARNING "megaraid cmm: completed " "ioctl that was timedout before\n")); list_for_each_entry(adapter, &adapters_list_g, list) { - if (iterator++ == adapno) break; + if (iterator++ == adapno) { + is_found = true; + break; + } } kioc->timedout = 0; - if (adapter) { + if (is_found) mraid_mm_dealloc_kioc( adapter, kioc ); - } + } else { wake_up(&wait_q); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index c39955239d1c..19b1c0cf5f2a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2983,13 +2983,13 @@ _base_check_enable_msix(struct MPT3SAS_ADAPTER *ioc) } /** - * _base_free_irq - free irq + * mpt3sas_base_free_irq - free irq * @ioc: per adapter object * * Freeing respective reply_queue from the list. */ -static void -_base_free_irq(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc) { struct adapter_reply_queue *reply_q, *next; @@ -3191,12 +3191,12 @@ _base_check_and_enable_high_iops_queues(struct MPT3SAS_ADAPTER *ioc, } /** - * _base_disable_msix - disables msix + * mpt3sas_base_disable_msix - disables msix * @ioc: per adapter object * */ -static void -_base_disable_msix(struct MPT3SAS_ADAPTER *ioc) +void +mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc) { if (!ioc->msix_enable) return; @@ -3304,8 +3304,8 @@ _base_enable_msix(struct MPT3SAS_ADAPTER *ioc) for (i = 0; i < ioc->reply_queue_count; i++) { r = _base_request_irq(ioc, i); if (r) { - _base_free_irq(ioc); - _base_disable_msix(ioc); + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); goto try_ioapic; } } @@ -3342,8 +3342,8 @@ mpt3sas_base_unmap_resources(struct MPT3SAS_ADAPTER *ioc) dexitprintk(ioc, ioc_info(ioc, "%s\n", __func__)); - _base_free_irq(ioc); - _base_disable_msix(ioc); + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); kfree(ioc->replyPostRegisterIndex); ioc->replyPostRegisterIndex = NULL; @@ -7613,14 +7613,14 @@ _base_diag_reset(struct MPT3SAS_ADAPTER *ioc) } /** - * _base_make_ioc_ready - put controller in READY state + * mpt3sas_base_make_ioc_ready - put controller in READY state * @ioc: per adapter object * @type: FORCE_BIG_HAMMER or SOFT_RESET * * Return: 0 for success, non-zero for failure. */ -static int -_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type) +int +mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type) { u32 ioc_state; int rc; @@ -7897,7 +7897,7 @@ mpt3sas_base_free_resources(struct MPT3SAS_ADAPTER *ioc) if (ioc->chip_phys && ioc->chip) { mpt3sas_base_mask_interrupts(ioc); ioc->shost_recovery = 1; - _base_make_ioc_ready(ioc, SOFT_RESET); + mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); ioc->shost_recovery = 0; } @@ -8017,7 +8017,7 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->build_sg_mpi = &_base_build_sg; ioc->build_zero_len_sge_mpi = &_base_build_zero_len_sge; - r = _base_make_ioc_ready(ioc, SOFT_RESET); + r = mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); if (r) goto out_free_resources; @@ -8471,7 +8471,7 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc, _base_pre_reset_handler(ioc); mpt3sas_wait_for_commands_to_complete(ioc); mpt3sas_base_mask_interrupts(ioc); - r = _base_make_ioc_ready(ioc, type); + r = mpt3sas_base_make_ioc_ready(ioc, type); if (r) goto out; _base_clear_outstanding_commands(ioc); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index d4834c8ee9c0..0c6c3df0038d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -1730,6 +1730,10 @@ do { ioc_err(ioc, "In func: %s\n", __func__); \ status, mpi_request, sz); } while (0) int mpt3sas_wait_for_ioc(struct MPT3SAS_ADAPTER *ioc, int wait_count); +int +mpt3sas_base_make_ioc_ready(struct MPT3SAS_ADAPTER *ioc, enum reset_type type); +void mpt3sas_base_free_irq(struct MPT3SAS_ADAPTER *ioc); +void mpt3sas_base_disable_msix(struct MPT3SAS_ADAPTER *ioc); /* scsih shared API */ struct scsi_cmnd *mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 866d118f7931..8e64a6f14542 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -11295,7 +11295,12 @@ scsih_shutdown(struct pci_dev *pdev) _scsih_ir_shutdown(ioc); _scsih_nvme_shutdown(ioc); - mpt3sas_base_detach(ioc); + mpt3sas_base_mask_interrupts(ioc); + ioc->shost_recovery = 1; + mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET); + ioc->shost_recovery = 0; + mpt3sas_base_free_irq(ioc); + mpt3sas_base_disable_msix(ioc); } diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index 48548a95327b..32e60f0c3b14 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -684,8 +684,7 @@ int pm8001_dev_found(struct domain_device *dev) void pm8001_task_done(struct sas_task *task) { - if (!del_timer(&task->slow_task->timer)) - return; + del_timer(&task->slow_task->timer); complete(&task->slow_task->completion); } @@ -693,9 +692,14 @@ static void pm8001_tmf_timedout(struct timer_list *t) { struct sas_task_slow *slow = from_timer(slow, t, timer); struct sas_task *task = slow->task; + unsigned long flags; - task->task_state_flags |= SAS_TASK_STATE_ABORTED; - complete(&task->slow_task->completion); + spin_lock_irqsave(&task->task_state_lock, flags); + if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { + task->task_state_flags |= SAS_TASK_STATE_ABORTED; + complete(&task->slow_task->completion); + } + spin_unlock_irqrestore(&task->task_state_lock, flags); } #define PM8001_TASK_TIMEOUT 20 @@ -748,13 +752,10 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, } res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task[%x]timeout.\n", - tmf->tmf); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", + tmf->tmf); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && @@ -834,12 +835,9 @@ pm8001_exec_internal_task_abort(struct pm8001_hba_info *pm8001_ha, wait_for_completion(&task->slow_task->completion); res = TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ - if ((task->task_state_flags & SAS_TASK_STATE_ABORTED)) { - if (!(task->task_state_flags & SAS_TASK_STATE_DONE)) { - pm8001_dbg(pm8001_ha, FAIL, - "TMF task timeout.\n"); - goto ex_err; - } + if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + pm8001_dbg(pm8001_ha, FAIL, "TMF task timeout.\n"); + goto ex_err; } if (task->task_status.resp == SAS_TASK_COMPLETE && diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index b059bf2b61d4..5b6996a2401b 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -475,7 +475,8 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, error = shost->hostt->target_alloc(starget); if(error) { - dev_printk(KERN_ERR, dev, "target allocation failed, error %d\n", error); + if (error != -ENXIO) + dev_err(dev, "target allocation failed, error %d\n", error); /* don't want scsi_target_reap to do the final * put because it will be under the host lock */ scsi_target_destroy(starget); diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 32489d25158f..ae9bfc658203 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -807,11 +807,14 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); /* - * If the device state changes to SDEV_RUNNING, we need to run - * the queue to avoid I/O hang. + * If the device state changes to SDEV_RUNNING, we need to + * rescan the device to revalidate it, and run the queue to + * avoid I/O hang. */ - if (ret == 0 && state == SDEV_RUNNING) + if (ret == 0 && state == SDEV_RUNNING) { + scsi_rescan_device(dev); blk_mq_run_hw_queues(sdev->request_queue, true); + } mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index b07105ae7c91..d8b05d8b5470 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -439,39 +439,10 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct iscsi_transport *t = iface->transport; - int param; - int param_type; + int param = -1; if (attr == &dev_attr_iface_enabled.attr) param = ISCSI_NET_PARAM_IFACE_ENABLE; - else if (attr == &dev_attr_iface_vlan_id.attr) - param = ISCSI_NET_PARAM_VLAN_ID; - else if (attr == &dev_attr_iface_vlan_priority.attr) - param = ISCSI_NET_PARAM_VLAN_PRIORITY; - else if (attr == &dev_attr_iface_vlan_enabled.attr) - param = ISCSI_NET_PARAM_VLAN_ENABLED; - else if (attr == &dev_attr_iface_mtu.attr) - param = ISCSI_NET_PARAM_MTU; - else if (attr == &dev_attr_iface_port.attr) - param = ISCSI_NET_PARAM_PORT; - else if (attr == &dev_attr_iface_ipaddress_state.attr) - param = ISCSI_NET_PARAM_IPADDR_STATE; - else if (attr == &dev_attr_iface_delayed_ack_en.attr) - param = ISCSI_NET_PARAM_DELAYED_ACK_EN; - else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) - param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; - else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) - param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; - else if (attr == &dev_attr_iface_tcp_wsf.attr) - param = ISCSI_NET_PARAM_TCP_WSF; - else if (attr == &dev_attr_iface_tcp_timer_scale.attr) - param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; - else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) - param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; - else if (attr == &dev_attr_iface_cache_id.attr) - param = ISCSI_NET_PARAM_CACHE_ID; - else if (attr == &dev_attr_iface_redirect_en.attr) - param = ISCSI_NET_PARAM_REDIRECT_EN; else if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) @@ -508,6 +479,38 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN; else if (attr == &dev_attr_iface_initiator_name.attr) param = ISCSI_IFACE_PARAM_INITIATOR_NAME; + + if (param != -1) + return t->attr_is_visible(ISCSI_IFACE_PARAM, param); + + if (attr == &dev_attr_iface_vlan_id.attr) + param = ISCSI_NET_PARAM_VLAN_ID; + else if (attr == &dev_attr_iface_vlan_priority.attr) + param = ISCSI_NET_PARAM_VLAN_PRIORITY; + else if (attr == &dev_attr_iface_vlan_enabled.attr) + param = ISCSI_NET_PARAM_VLAN_ENABLED; + else if (attr == &dev_attr_iface_mtu.attr) + param = ISCSI_NET_PARAM_MTU; + else if (attr == &dev_attr_iface_port.attr) + param = ISCSI_NET_PARAM_PORT; + else if (attr == &dev_attr_iface_ipaddress_state.attr) + param = ISCSI_NET_PARAM_IPADDR_STATE; + else if (attr == &dev_attr_iface_delayed_ack_en.attr) + param = ISCSI_NET_PARAM_DELAYED_ACK_EN; + else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) + param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; + else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) + param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; + else if (attr == &dev_attr_iface_tcp_wsf.attr) + param = ISCSI_NET_PARAM_TCP_WSF; + else if (attr == &dev_attr_iface_tcp_timer_scale.attr) + param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; + else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) + param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; + else if (attr == &dev_attr_iface_cache_id.attr) + param = ISCSI_NET_PARAM_CACHE_ID; + else if (attr == &dev_attr_iface_redirect_en.attr) + param = ISCSI_NET_PARAM_REDIRECT_EN; else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) { if (attr == &dev_attr_ipv4_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV4_ADDR; @@ -598,32 +601,7 @@ static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, return 0; } - switch (param) { - case ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO: - case ISCSI_IFACE_PARAM_HDRDGST_EN: - case ISCSI_IFACE_PARAM_DATADGST_EN: - case ISCSI_IFACE_PARAM_IMM_DATA_EN: - case ISCSI_IFACE_PARAM_INITIAL_R2T_EN: - case ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN: - case ISCSI_IFACE_PARAM_PDU_INORDER_EN: - case ISCSI_IFACE_PARAM_ERL: - case ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH: - case ISCSI_IFACE_PARAM_FIRST_BURST: - case ISCSI_IFACE_PARAM_MAX_R2T: - case ISCSI_IFACE_PARAM_MAX_BURST: - case ISCSI_IFACE_PARAM_CHAP_AUTH_EN: - case ISCSI_IFACE_PARAM_BIDI_CHAP_EN: - case ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL: - case ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN: - case ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN: - case ISCSI_IFACE_PARAM_INITIATOR_NAME: - param_type = ISCSI_IFACE_PARAM; - break; - default: - param_type = ISCSI_NET_PARAM; - } - - return t->attr_is_visible(param_type, param); + return t->attr_is_visible(ISCSI_NET_PARAM, param); } static struct attribute *iscsi_iface_attrs[] = { diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 94c254e9012e..a6d3ac0a6cbc 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -221,7 +221,7 @@ static unsigned int sr_get_events(struct scsi_device *sdev) else if (med->media_event_code == 2) return DISK_EVENT_MEDIA_CHANGE; else if (med->media_event_code == 3) - return DISK_EVENT_EJECT_REQUEST; + return DISK_EVENT_MEDIA_CHANGE; return 0; } diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c index 19a02e958865..8fcdf89da8aa 100644 --- a/drivers/staging/qlge/qlge_main.c +++ b/drivers/staging/qlge/qlge_main.c @@ -4547,7 +4547,8 @@ static int qlge_probe(struct pci_dev *pdev, static int cards_found; int err; - devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter)); + devlink = devlink_alloc(&qlge_devlink_ops, sizeof(struct qlge_adapter), + &pdev->dev); if (!devlink) return -ENOMEM; @@ -4613,7 +4614,7 @@ static int qlge_probe(struct pci_dev *pdev, goto netdev_free; } - err = devlink_register(devlink, &pdev->dev); + err = devlink_register(devlink); if (err) goto netdev_free; diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index b32f4ee88e79..ca1b2312d6e7 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -25,7 +25,7 @@ #include "target_core_alua.h" static sense_reason_t -sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char *, u32, bool); +sbc_check_prot(struct se_device *, struct se_cmd *, unsigned char, u32, bool); static sense_reason_t sbc_execute_unmap(struct se_cmd *cmd); static sense_reason_t @@ -279,14 +279,14 @@ static inline unsigned long long transport_lba_64_ext(unsigned char *cdb) } static sense_reason_t -sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *ops) +sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops) { struct se_device *dev = cmd->se_dev; sector_t end_lba = dev->transport->get_blocks(dev) + 1; unsigned int sectors = sbc_get_write_same_sectors(cmd); sense_reason_t ret; - if ((flags[0] & 0x04) || (flags[0] & 0x02)) { + if ((flags & 0x04) || (flags & 0x02)) { pr_err("WRITE_SAME PBDATA and LBDATA" " bits not supported for Block Discard" " Emulation\n"); @@ -308,7 +308,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o } /* We always have ANC_SUP == 0 so setting ANCHOR is always an error */ - if (flags[0] & 0x10) { + if (flags & 0x10) { pr_warn("WRITE SAME with ANCHOR not supported\n"); return TCM_INVALID_CDB_FIELD; } @@ -316,7 +316,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o * Special case for WRITE_SAME w/ UNMAP=1 that ends up getting * translated into block discard requests within backend code. */ - if (flags[0] & 0x08) { + if (flags & 0x08) { if (!ops->execute_unmap) return TCM_UNSUPPORTED_SCSI_OPCODE; @@ -331,7 +331,7 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o if (!ops->execute_write_same) return TCM_UNSUPPORTED_SCSI_OPCODE; - ret = sbc_check_prot(dev, cmd, &cmd->t_task_cdb[0], sectors, true); + ret = sbc_check_prot(dev, cmd, flags >> 5, sectors, true); if (ret) return ret; @@ -717,10 +717,9 @@ sbc_set_prot_op_checks(u8 protect, bool fabric_prot, enum target_prot_type prot_ } static sense_reason_t -sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, +sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char protect, u32 sectors, bool is_write) { - u8 protect = cdb[1] >> 5; int sp_ops = cmd->se_sess->sup_prot_ops; int pi_prot_type = dev->dev_attrib.pi_prot_type; bool fabric_prot = false; @@ -768,7 +767,7 @@ sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, fallthrough; default: pr_err("Unable to determine pi_prot_type for CDB: 0x%02x " - "PROTECT: 0x%02x\n", cdb[0], protect); + "PROTECT: 0x%02x\n", cmd->t_task_cdb[0], protect); return TCM_INVALID_CDB_FIELD; } @@ -843,7 +842,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -857,7 +856,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -871,7 +870,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, false); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, false); if (ret) return ret; @@ -892,7 +891,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -906,7 +905,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -921,7 +920,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) if (sbc_check_dpofua(dev, cmd, cdb)) return TCM_INVALID_CDB_FIELD; - ret = sbc_check_prot(dev, cmd, cdb, sectors, true); + ret = sbc_check_prot(dev, cmd, cdb[1] >> 5, sectors, true); if (ret) return ret; @@ -980,7 +979,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = sbc_get_size(cmd, 1); cmd->t_task_lba = get_unaligned_be64(&cdb[12]); - ret = sbc_setup_write_same(cmd, &cdb[10], ops); + ret = sbc_setup_write_same(cmd, cdb[10], ops); if (ret) return ret; break; @@ -1079,7 +1078,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) size = sbc_get_size(cmd, 1); cmd->t_task_lba = get_unaligned_be64(&cdb[2]); - ret = sbc_setup_write_same(cmd, &cdb[1], ops); + ret = sbc_setup_write_same(cmd, cdb[1], ops); if (ret) return ret; break; @@ -1097,7 +1096,7 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) * Follow sbcr26 with WRITE_SAME (10) and check for the existence * of byte 1 bit 3 UNMAP instead of original reserved field */ - ret = sbc_setup_write_same(cmd, &cdb[1], ops); + ret = sbc_setup_write_same(cmd, cdb[1], ops); if (ret) return ret; break; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 7e35eddd9eb7..26ceabe34de5 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -886,7 +886,7 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) INIT_WORK(&cmd->work, success ? target_complete_ok_work : target_complete_failure_work); - if (wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID) + if (!wwn || wwn->cmd_compl_affinity == SE_COMPL_AFFINITY_CPUID) cpu = cmd->cpuid; else cpu = wwn->cmd_compl_affinity; diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index fdf79bcf7eb0..35d5908b5478 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -824,7 +824,7 @@ static struct usb_class_driver wdm_class = { }; /* --- WWAN framework integration --- */ -#ifdef CONFIG_WWAN +#ifdef CONFIG_WWAN_CORE static int wdm_wwan_port_start(struct wwan_port *port) { struct wdm_device *desc = wwan_port_get_drvdata(port); @@ -963,11 +963,11 @@ static void wdm_wwan_rx(struct wdm_device *desc, int length) /* inbuf has been copied, it is safe to check for outstanding data */ schedule_work(&desc->service_outs_intr); } -#else /* CONFIG_WWAN */ +#else /* CONFIG_WWAN_CORE */ static void wdm_wwan_init(struct wdm_device *desc) {} static void wdm_wwan_deinit(struct wdm_device *desc) {} static void wdm_wwan_rx(struct wdm_device *desc, int length) {} -#endif /* CONFIG_WWAN */ +#endif /* CONFIG_WWAN_CORE */ /* --- error handling --- */ static void wdm_rxwork(struct work_struct *work) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index b97464498763..9618ba622a2d 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -1133,7 +1133,7 @@ static int do_proc_control(struct usb_dev_state *ps, "wIndex=%04x wLength=%04x\n", ctrl->bRequestType, ctrl->bRequest, ctrl->wValue, ctrl->wIndex, ctrl->wLength); - if (ctrl->bRequestType & 0x80) { + if ((ctrl->bRequestType & USB_DIR_IN) && ctrl->wLength) { pipe = usb_rcvctrlpipe(dev, 0); snoop_urb(dev, NULL, pipe, ctrl->wLength, tmo, SUBMIT, NULL, 0); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d1efc7141333..86658a81d284 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -48,6 +48,7 @@ #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ +#define USB_PING_RESPONSE_TIME 400 /* ns */ /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can @@ -182,8 +183,9 @@ int usb_device_supports_lpm(struct usb_device *udev) } /* - * Set the Maximum Exit Latency (MEL) for the host to initiate a transition from - * either U1 or U2. + * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from + * U1/U2, send a PING to the device and receive a PING_RESPONSE. + * See USB 3.1 section C.1.5.2 */ static void usb_set_lpm_mel(struct usb_device *udev, struct usb3_lpm_parameters *udev_lpm_params, @@ -193,35 +195,37 @@ static void usb_set_lpm_mel(struct usb_device *udev, unsigned int hub_exit_latency) { unsigned int total_mel; - unsigned int device_mel; - unsigned int hub_mel; /* - * Calculate the time it takes to transition all links from the roothub - * to the parent hub into U0. The parent hub must then decode the - * packet (hub header decode latency) to figure out which port it was - * bound for. - * - * The Hub Header decode latency is expressed in 0.1us intervals (0x1 - * means 0.1us). Multiply that by 100 to get nanoseconds. + * tMEL1. time to transition path from host to device into U0. + * MEL for parent already contains the delay up to parent, so only add + * the exit latency for the last link (pick the slower exit latency), + * and the hub header decode latency. See USB 3.1 section C 2.2.1 + * Store MEL in nanoseconds */ total_mel = hub_lpm_params->mel + - (hub->descriptor->u.ss.bHubHdrDecLat * 100); + max(udev_exit_latency, hub_exit_latency) * 1000 + + hub->descriptor->u.ss.bHubHdrDecLat * 100; /* - * How long will it take to transition the downstream hub's port into - * U0? The greater of either the hub exit latency or the device exit - * latency. - * - * The BOS U1/U2 exit latencies are expressed in 1us intervals. - * Multiply that by 1000 to get nanoseconds. + * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for + * each link + wHubDelay for each hub. Add only for last link. + * tMEL4, the time for PING_RESPONSE to traverse upstream is similar. + * Multiply by 2 to include it as well. */ - device_mel = udev_exit_latency * 1000; - hub_mel = hub_exit_latency * 1000; - if (device_mel > hub_mel) - total_mel += device_mel; - else - total_mel += hub_mel; + total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) + + USB_TP_TRANSMISSION_DELAY) * 2; + + /* + * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE + * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4 + * to cover the delay if the PING_RESPONSE is queued behind a Max Packet + * Size DP. + * Note these delays should be added only once for the entire path, so + * add them to the MEL of the device connected to the roothub. + */ + if (!hub->hdev->parent) + total_mel += USB_PING_RESPONSE_TIME + 2100; udev_lpm_params->mel = total_mel; } @@ -4113,6 +4117,47 @@ static int usb_set_lpm_timeout(struct usb_device *udev, } /* + * Don't allow device intiated U1/U2 if the system exit latency + one bus + * interval is greater than the minimum service interval of any active + * periodic endpoint. See USB 3.2 section 9.4.9 + */ +static bool usb_device_may_initiate_lpm(struct usb_device *udev, + enum usb3_link_state state) +{ + unsigned int sel; /* us */ + int i, j; + + if (state == USB3_LPM_U1) + sel = DIV_ROUND_UP(udev->u1_params.sel, 1000); + else if (state == USB3_LPM_U2) + sel = DIV_ROUND_UP(udev->u2_params.sel, 1000); + else + return false; + + for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { + struct usb_interface *intf; + struct usb_endpoint_descriptor *desc; + unsigned int interval; + + intf = udev->actconfig->interface[i]; + if (!intf) + continue; + + for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) { + desc = &intf->cur_altsetting->endpoint[j].desc; + + if (usb_endpoint_xfer_int(desc) || + usb_endpoint_xfer_isoc(desc)) { + interval = (1 << (desc->bInterval - 1)) * 125; + if (sel + 125 > interval) + return false; + } + } + } + return true; +} + +/* * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated * U1/U2 entry. * @@ -4184,20 +4229,23 @@ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, * U1/U2_ENABLE */ if (udev->actconfig && - usb_set_device_initiated_lpm(udev, state, true) == 0) { - if (state == USB3_LPM_U1) - udev->usb3_lpm_u1_enabled = 1; - else if (state == USB3_LPM_U2) - udev->usb3_lpm_u2_enabled = 1; - } else { - /* Don't request U1/U2 entry if the device - * cannot transition to U1/U2. - */ - usb_set_lpm_timeout(udev, state, 0); - hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); + usb_device_may_initiate_lpm(udev, state)) { + if (usb_set_device_initiated_lpm(udev, state, true)) { + /* + * Request to enable device initiated U1/U2 failed, + * better to turn off lpm in this case. + */ + usb_set_lpm_timeout(udev, state, 0); + hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); + return; + } } -} + if (state == USB3_LPM_U1) + udev->usb3_lpm_u1_enabled = 1; + else if (state == USB3_LPM_U2) + udev->usb3_lpm_u2_enabled = 1; +} /* * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated * U1/U2 entry. diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 6114cf83bb44..8239fe7129dd 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -501,10 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = { /* DJI CineSSD */ { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM }, - /* Fibocom L850-GL LTE Modem */ - { USB_DEVICE(0x2cb7, 0x0007), .driver_info = - USB_QUIRK_IGNORE_REMOTE_WAKEUP }, - /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index ab6b815e0089..483de2bbfaab 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -383,6 +383,9 @@ enum dwc2_ep0_state { * 0 - No (default) * 1 - Partial power down * 2 - Hibernation + * @no_clock_gating: Specifies whether to avoid clock gating feature. + * 0 - No (use clock gating) + * 1 - Yes (avoid it) * @lpm: Enable LPM support. * 0 - No * 1 - Yes @@ -480,6 +483,7 @@ struct dwc2_core_params { #define DWC2_POWER_DOWN_PARAM_NONE 0 #define DWC2_POWER_DOWN_PARAM_PARTIAL 1 #define DWC2_POWER_DOWN_PARAM_HIBERNATION 2 + bool no_clock_gating; bool lpm; bool lpm_clock_gating; diff --git a/drivers/usb/dwc2/core_intr.c b/drivers/usb/dwc2/core_intr.c index a5ab03808da6..a5c52b237e72 100644 --- a/drivers/usb/dwc2/core_intr.c +++ b/drivers/usb/dwc2/core_intr.c @@ -556,7 +556,8 @@ static void dwc2_handle_usb_suspend_intr(struct dwc2_hsotg *hsotg) * If neither hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_gadget_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_gadget_enter_clock_gating(hsotg); } /* diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index c581ee41ac81..3146df6e6510 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -2749,12 +2749,14 @@ static void dwc2_hsotg_complete_in(struct dwc2_hsotg *hsotg, return; } - /* Zlp for all endpoints, for ep0 only in DATA IN stage */ + /* Zlp for all endpoints in non DDMA, for ep0 only in DATA IN stage */ if (hs_ep->send_zlp) { - dwc2_hsotg_program_zlp(hsotg, hs_ep); hs_ep->send_zlp = 0; - /* transfer will be completed on next complete interrupt */ - return; + if (!using_desc_dma(hsotg)) { + dwc2_hsotg_program_zlp(hsotg, hs_ep); + /* transfer will be completed on next complete interrupt */ + return; + } } if (hs_ep->index == 0 && hsotg->ep0_state == DWC2_EP0_DATA_IN) { @@ -3900,9 +3902,27 @@ static void dwc2_hsotg_ep_stop_xfr(struct dwc2_hsotg *hsotg, __func__); } } else { + /* Mask GINTSTS_GOUTNAKEFF interrupt */ + dwc2_hsotg_disable_gsint(hsotg, GINTSTS_GOUTNAKEFF); + if (!(dwc2_readl(hsotg, GINTSTS) & GINTSTS_GOUTNAKEFF)) dwc2_set_bit(hsotg, DCTL, DCTL_SGOUTNAK); + if (!using_dma(hsotg)) { + /* Wait for GINTSTS_RXFLVL interrupt */ + if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, + GINTSTS_RXFLVL, 100)) { + dev_warn(hsotg->dev, "%s: timeout GINTSTS.RXFLVL\n", + __func__); + } else { + /* + * Pop GLOBAL OUT NAK status packet from RxFIFO + * to assert GOUTNAKEFF interrupt + */ + dwc2_readl(hsotg, GRXSTSP); + } + } + /* Wait for global nak to take effect */ if (dwc2_hsotg_wait_bit_set(hsotg, GINTSTS, GINTSTS_GOUTNAKEFF, 100)) @@ -4348,6 +4368,9 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now) epctl = dwc2_readl(hs, epreg); if (value) { + /* Unmask GOUTNAKEFF interrupt */ + dwc2_hsotg_en_gsint(hs, GINTSTS_GOUTNAKEFF); + if (!(dwc2_readl(hs, GINTSTS) & GINTSTS_GOUTNAKEFF)) dwc2_set_bit(hs, DCTL, DCTL_SGOUTNAK); // STALL bit will be set in GOUTNAKEFF interrupt handler diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 035d4911a3c3..2a7828971d05 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -3338,7 +3338,8 @@ int dwc2_port_suspend(struct dwc2_hsotg *hsotg, u16 windex) * If not hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_host_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_host_enter_clock_gating(hsotg); break; } @@ -4402,7 +4403,8 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd) * If not hibernation nor partial power down are supported, * clock gating is used to save power. */ - dwc2_host_enter_clock_gating(hsotg); + if (!hsotg->params.no_clock_gating) + dwc2_host_enter_clock_gating(hsotg); /* After entering suspend, hardware is not accessible */ clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags); diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c index 67c5eb140232..59e119345994 100644 --- a/drivers/usb/dwc2/params.c +++ b/drivers/usb/dwc2/params.c @@ -76,6 +76,7 @@ static void dwc2_set_s3c6400_params(struct dwc2_hsotg *hsotg) struct dwc2_core_params *p = &hsotg->params; p->power_down = DWC2_POWER_DOWN_PARAM_NONE; + p->no_clock_gating = true; p->phy_utmi_width = 8; } diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index dccdf13b5f9e..5991766239ba 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -1279,6 +1279,7 @@ struct dwc3 { unsigned dis_metastability_quirk:1; unsigned dis_split_quirk:1; + unsigned async_callbacks:1; u16 imod_interval; }; diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c index 3cd294264372..2f9e45eed228 100644 --- a/drivers/usb/dwc3/ep0.c +++ b/drivers/usb/dwc3/ep0.c @@ -597,11 +597,13 @@ static int dwc3_ep0_set_address(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) static int dwc3_ep0_delegate_req(struct dwc3 *dwc, struct usb_ctrlrequest *ctrl) { - int ret; + int ret = -EINVAL; - spin_unlock(&dwc->lock); - ret = dwc->gadget_driver->setup(dwc->gadget, ctrl); - spin_lock(&dwc->lock); + if (dwc->async_callbacks) { + spin_unlock(&dwc->lock); + ret = dwc->gadget_driver->setup(dwc->gadget, ctrl); + spin_lock(&dwc->lock); + } return ret; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index af6d7f157989..45f2bc0807e8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2585,6 +2585,16 @@ static int dwc3_gadget_vbus_draw(struct usb_gadget *g, unsigned int mA) return ret; } +static void dwc3_gadget_async_callbacks(struct usb_gadget *g, bool enable) +{ + struct dwc3 *dwc = gadget_to_dwc(g); + unsigned long flags; + + spin_lock_irqsave(&dwc->lock, flags); + dwc->async_callbacks = enable; + spin_unlock_irqrestore(&dwc->lock, flags); +} + static const struct usb_gadget_ops dwc3_gadget_ops = { .get_frame = dwc3_gadget_get_frame, .wakeup = dwc3_gadget_wakeup, @@ -2596,6 +2606,7 @@ static const struct usb_gadget_ops dwc3_gadget_ops = { .udc_set_ssp_rate = dwc3_gadget_set_ssp_rate, .get_config_params = dwc3_gadget_config_params, .vbus_draw = dwc3_gadget_vbus_draw, + .udc_async_callbacks = dwc3_gadget_async_callbacks, }; /* -------------------------------------------------------------------------- */ @@ -3231,7 +3242,7 @@ static void dwc3_endpoint_interrupt(struct dwc3 *dwc, static void dwc3_disconnect_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->disconnect) { + if (dwc->async_callbacks && dwc->gadget_driver->disconnect) { spin_unlock(&dwc->lock); dwc->gadget_driver->disconnect(dwc->gadget); spin_lock(&dwc->lock); @@ -3240,7 +3251,7 @@ static void dwc3_disconnect_gadget(struct dwc3 *dwc) static void dwc3_suspend_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->suspend) { + if (dwc->async_callbacks && dwc->gadget_driver->suspend) { spin_unlock(&dwc->lock); dwc->gadget_driver->suspend(dwc->gadget); spin_lock(&dwc->lock); @@ -3249,7 +3260,7 @@ static void dwc3_suspend_gadget(struct dwc3 *dwc) static void dwc3_resume_gadget(struct dwc3 *dwc) { - if (dwc->gadget_driver && dwc->gadget_driver->resume) { + if (dwc->async_callbacks && dwc->gadget_driver->resume) { spin_unlock(&dwc->lock); dwc->gadget_driver->resume(dwc->gadget); spin_lock(&dwc->lock); @@ -3261,7 +3272,7 @@ static void dwc3_reset_gadget(struct dwc3 *dwc) if (!dwc->gadget_driver) return; - if (dwc->gadget->speed != USB_SPEED_UNKNOWN) { + if (dwc->async_callbacks && dwc->gadget->speed != USB_SPEED_UNKNOWN) { spin_unlock(&dwc->lock); usb_gadget_udc_reset(dwc->gadget, dwc->gadget_driver); spin_lock(&dwc->lock); @@ -3585,7 +3596,7 @@ static void dwc3_gadget_wakeup_interrupt(struct dwc3 *dwc) * implemented. */ - if (dwc->gadget_driver && dwc->gadget_driver->resume) { + if (dwc->async_callbacks && dwc->gadget_driver->resume) { spin_unlock(&dwc->lock); dwc->gadget_driver->resume(dwc->gadget); spin_lock(&dwc->lock); diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index bffef8e47dac..281ca766698a 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1198,7 +1198,7 @@ void gserial_free_line(unsigned char port_num) struct gs_port *port; mutex_lock(&ports[port_num].lock); - if (WARN_ON(!ports[port_num].port)) { + if (!ports[port_num].port) { mutex_unlock(&ports[port_num].lock); return; } diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index a54d1cef17db..c0ca7144e512 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3853,6 +3853,7 @@ static int tegra_xudc_probe(struct platform_device *pdev) return 0; free_eps: + pm_runtime_disable(&pdev->dev); tegra_xudc_free_eps(xudc); free_event_ring: tegra_xudc_free_event_ring(xudc); diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 36f5bf6a0752..10b0365f3439 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -703,24 +703,28 @@ EXPORT_SYMBOL_GPL(ehci_setup); static irqreturn_t ehci_irq (struct usb_hcd *hcd) { struct ehci_hcd *ehci = hcd_to_ehci (hcd); - u32 status, masked_status, pcd_status = 0, cmd; + u32 status, current_status, masked_status, pcd_status = 0; + u32 cmd; int bh; spin_lock(&ehci->lock); - status = ehci_readl(ehci, &ehci->regs->status); + status = 0; + current_status = ehci_readl(ehci, &ehci->regs->status); +restart: /* e.g. cardbus physical eject */ - if (status == ~(u32) 0) { + if (current_status == ~(u32) 0) { ehci_dbg (ehci, "device removed\n"); goto dead; } + status |= current_status; /* * We don't use STS_FLR, but some controllers don't like it to * remain on, so mask it out along with the other status bits. */ - masked_status = status & (INTR_MASK | STS_FLR); + masked_status = current_status & (INTR_MASK | STS_FLR); /* Shared IRQ? */ if (!masked_status || unlikely(ehci->rh_state == EHCI_RH_HALTED)) { @@ -730,6 +734,12 @@ static irqreturn_t ehci_irq (struct usb_hcd *hcd) /* clear (just) interrupts */ ehci_writel(ehci, masked_status, &ehci->regs->status); + + /* For edge interrupts, don't race with an interrupt bit being raised */ + current_status = ehci_readl(ehci, &ehci->regs->status); + if (current_status & INTR_MASK) + goto restart; + cmd = ehci_readl(ehci, &ehci->regs->command); bh = 0; diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index e7a8e0609853..59cc1bc7f12f 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -153,8 +153,6 @@ struct max3421_hcd { */ struct urb *curr_urb; enum scheduling_pass sched_pass; - struct usb_device *loaded_dev; /* dev that's loaded into the chip */ - int loaded_epnum; /* epnum whose toggles are loaded */ int urb_done; /* > 0 -> no errors, < 0: errno */ size_t curr_len; u8 hien; @@ -492,39 +490,17 @@ max3421_set_speed(struct usb_hcd *hcd, struct usb_device *dev) * Caller must NOT hold HCD spinlock. */ static void -max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum, - int force_toggles) +max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum) { - struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd); - int old_epnum, same_ep, rcvtog, sndtog; - struct usb_device *old_dev; + int rcvtog, sndtog; u8 hctl; - old_dev = max3421_hcd->loaded_dev; - old_epnum = max3421_hcd->loaded_epnum; - - same_ep = (dev == old_dev && epnum == old_epnum); - if (same_ep && !force_toggles) - return; - - if (old_dev && !same_ep) { - /* save the old end-points toggles: */ - u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL); - - rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1; - sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1; - - /* no locking: HCD (i.e., we) own toggles, don't we? */ - usb_settoggle(old_dev, old_epnum, 0, rcvtog); - usb_settoggle(old_dev, old_epnum, 1, sndtog); - } /* setup new endpoint's toggle bits: */ rcvtog = usb_gettoggle(dev, epnum, 0); sndtog = usb_gettoggle(dev, epnum, 1); hctl = (BIT(rcvtog + MAX3421_HCTL_RCVTOG0_BIT) | BIT(sndtog + MAX3421_HCTL_SNDTOG0_BIT)); - max3421_hcd->loaded_epnum = epnum; spi_wr8(hcd, MAX3421_REG_HCTL, hctl); /* @@ -532,7 +508,6 @@ max3421_set_address(struct usb_hcd *hcd, struct usb_device *dev, int epnum, * address-assignment so it's best to just always load the * address whenever the end-point changed/was forced. */ - max3421_hcd->loaded_dev = dev; spi_wr8(hcd, MAX3421_REG_PERADDR, dev->devnum); } @@ -667,7 +642,7 @@ max3421_select_and_start_urb(struct usb_hcd *hcd) struct max3421_hcd *max3421_hcd = hcd_to_max3421(hcd); struct urb *urb, *curr_urb = NULL; struct max3421_ep *max3421_ep; - int epnum, force_toggles = 0; + int epnum; struct usb_host_endpoint *ep; struct list_head *pos; unsigned long flags; @@ -777,7 +752,6 @@ done: usb_settoggle(urb->dev, epnum, 0, 1); usb_settoggle(urb->dev, epnum, 1, 1); max3421_ep->pkt_state = PKT_STATE_SETUP; - force_toggles = 1; } else max3421_ep->pkt_state = PKT_STATE_TRANSFER; } @@ -785,7 +759,7 @@ done: spin_unlock_irqrestore(&max3421_hcd->lock, flags); max3421_ep->last_active = max3421_hcd->frame_number; - max3421_set_address(hcd, urb->dev, epnum, force_toggles); + max3421_set_address(hcd, urb->dev, epnum); max3421_set_speed(hcd, urb->dev); max3421_next_transfer(hcd, 0); return 1; @@ -1379,6 +1353,16 @@ max3421_urb_done(struct usb_hcd *hcd) status = 0; urb = max3421_hcd->curr_urb; if (urb) { + /* save the old end-points toggles: */ + u8 hrsl = spi_rd8(hcd, MAX3421_REG_HRSL); + int rcvtog = (hrsl >> MAX3421_HRSL_RCVTOGRD_BIT) & 1; + int sndtog = (hrsl >> MAX3421_HRSL_SNDTOGRD_BIT) & 1; + int epnum = usb_endpoint_num(&urb->ep->desc); + + /* no locking: HCD (i.e., we) own toggles, don't we? */ + usb_settoggle(urb->dev, epnum, 0, rcvtog); + usb_settoggle(urb->dev, epnum, 1, sndtog); + max3421_hcd->curr_urb = NULL; spin_lock_irqsave(&max3421_hcd->lock, flags); usb_hcd_unlink_urb_from_ep(hcd, urb); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index e9b18fc17617..151e93c4bd57 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1638,11 +1638,12 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf) * Inform the usbcore about resume-in-progress by returning * a non-zero value even if there are no status changes. */ + spin_lock_irqsave(&xhci->lock, flags); + status = bus_state->resuming_ports; mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC; - spin_lock_irqsave(&xhci->lock, flags); /* For each port, did anything change? If so, set that bit in buf. */ for (i = 0; i < max_ports; i++) { temp = readl(ports[i]->addr); diff --git a/drivers/usb/host/xhci-pci-renesas.c b/drivers/usb/host/xhci-pci-renesas.c index 1da647961c25..5923844ed821 100644 --- a/drivers/usb/host/xhci-pci-renesas.c +++ b/drivers/usb/host/xhci-pci-renesas.c @@ -207,8 +207,7 @@ static int renesas_check_rom_state(struct pci_dev *pdev) return 0; case RENESAS_ROM_STATUS_NO_RESULT: /* No result yet */ - dev_dbg(&pdev->dev, "Unknown ROM status ...\n"); - break; + return 0; case RENESAS_ROM_STATUS_ERROR: /* Error State */ default: /* All other states are marked as "Reserved states" */ @@ -225,12 +224,13 @@ static int renesas_fw_check_running(struct pci_dev *pdev) u8 fw_state; int err; - /* - * Only if device has ROM and loaded FW we can skip loading and - * return success. Otherwise (even unknown state), attempt to load FW. - */ - if (renesas_check_rom(pdev) && !renesas_check_rom_state(pdev)) - return 0; + /* Check if device has ROM and loaded, if so skip everything */ + err = renesas_check_rom(pdev); + if (err) { /* we have rom */ + err = renesas_check_rom_state(pdev); + if (!err) + return err; + } /* * Test if the device is actually needing the firmware. As most diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 18c2bbddf080..1c9a7957c45c 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -636,7 +636,14 @@ static const struct pci_device_id pci_ids[] = { { /* end: all zeroes */ } }; MODULE_DEVICE_TABLE(pci, pci_ids); + +/* + * Without CONFIG_USB_XHCI_PCI_RENESAS renesas_xhci_check_request_fw() won't + * load firmware, so don't encumber the xhci-pci driver with it. + */ +#if IS_ENABLED(CONFIG_USB_XHCI_PCI_RENESAS) MODULE_FIRMWARE("renesas_usb_fw.mem"); +#endif /* pci driver glue; this is a "new style" PCI driver module */ static struct pci_driver xhci_pci_driver = { diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c index 83ed5089475a..1b24492bb4e5 100644 --- a/drivers/usb/phy/phy.c +++ b/drivers/usb/phy/phy.c @@ -86,10 +86,10 @@ static struct usb_phy *__device_to_usb_phy(struct device *dev) list_for_each_entry(usb_phy, &phy_list, head) { if (usb_phy->dev == dev) - break; + return usb_phy; } - return usb_phy; + return NULL; } static void usb_phy_set_default_current(struct usb_phy *usb_phy) @@ -150,8 +150,14 @@ static int usb_phy_uevent(struct device *dev, struct kobj_uevent_env *env) struct usb_phy *usb_phy; char uchger_state[50] = { 0 }; char uchger_type[50] = { 0 }; + unsigned long flags; + spin_lock_irqsave(&phy_lock, flags); usb_phy = __device_to_usb_phy(dev); + spin_unlock_irqrestore(&phy_lock, flags); + + if (!usb_phy) + return -ENODEV; snprintf(uchger_state, ARRAY_SIZE(uchger_state), "USB_CHARGER_STATE=%s", usb_chger_state[usb_phy->chg_state]); diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index b5e7991dc7d9..a3c2b01ccf7b 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -101,6 +101,8 @@ static struct dma_chan *usbhsf_dma_chan_get(struct usbhs_fifo *fifo, #define usbhsf_dma_map(p) __usbhsf_dma_map_ctrl(p, 1) #define usbhsf_dma_unmap(p) __usbhsf_dma_map_ctrl(p, 0) static int __usbhsf_dma_map_ctrl(struct usbhs_pkt *pkt, int map); +static void usbhsf_tx_irq_ctrl(struct usbhs_pipe *pipe, int enable); +static void usbhsf_rx_irq_ctrl(struct usbhs_pipe *pipe, int enable); struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) { struct usbhs_priv *priv = usbhs_pipe_to_priv(pipe); @@ -123,6 +125,11 @@ struct usbhs_pkt *usbhs_pkt_pop(struct usbhs_pipe *pipe, struct usbhs_pkt *pkt) if (chan) { dmaengine_terminate_all(chan); usbhsf_dma_unmap(pkt); + } else { + if (usbhs_pipe_is_dir_in(pipe)) + usbhsf_rx_irq_ctrl(pipe, 0); + else + usbhsf_tx_irq_ctrl(pipe, 0); } usbhs_pipe_clear_without_sequence(pipe, 0, 0); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 09b845d0da41..3c80bfbf3bec 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -155,6 +155,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */ { USB_DEVICE(0x10C4, 0x89FB) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */ + { USB_DEVICE(0x10C4, 0x8A5B) }, /* CEL EM3588 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */ { USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */ @@ -202,8 +203,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ - { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */ - { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */ + { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 M.2 Key E serial interface */ + { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 Display serial interface */ { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 7608584ef4fe..0fbe253dc570 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -238,6 +238,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_UC15 0x9090 /* These u-blox products use Qualcomm's vendor ID */ #define UBLOX_PRODUCT_R410M 0x90b2 +#define UBLOX_PRODUCT_R6XX 0x90fa /* These Yuga products use Qualcomm's vendor ID */ #define YUGA_PRODUCT_CLM920_NC5 0x9625 @@ -1101,6 +1102,8 @@ static const struct usb_device_id option_ids[] = { /* u-blox products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R410M), .driver_info = RSVD(1) | RSVD(3) }, + { USB_DEVICE(QUALCOMM_VENDOR_ID, UBLOX_PRODUCT_R6XX), + .driver_info = RSVD(3) }, /* Quectel products using Quectel vendor ID */ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC21, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h index f9677a5ec31b..c35a6db993f1 100644 --- a/drivers/usb/storage/unusual_uas.h +++ b/drivers/usb/storage/unusual_uas.h @@ -45,6 +45,13 @@ UNUSUAL_DEV(0x059f, 0x105f, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_REPORT_OPCODES | US_FL_NO_SAME), +/* Reported-by: Julian Sikorski <belegdol@gmail.com> */ +UNUSUAL_DEV(0x059f, 0x1061, 0x0000, 0x9999, + "LaCie", + "Rugged USB3-FW", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + /* * Apricorn USB3 dongle sometimes returns "USBSUSBSUSBS" in response to SCSI * commands in UAS mode. Observed with the 1.28 firmware; are there others? diff --git a/drivers/usb/typec/stusb160x.c b/drivers/usb/typec/stusb160x.c index 6eaeba9b096e..e7745d1c2a5c 100644 --- a/drivers/usb/typec/stusb160x.c +++ b/drivers/usb/typec/stusb160x.c @@ -686,6 +686,15 @@ static int stusb160x_probe(struct i2c_client *client) return -ENODEV; /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + + /* * When both VDD and VSYS power supplies are present, the low power * supply VSYS is selected when VSYS voltage is above 3.1 V. * Otherwise VDD is selected. @@ -739,10 +748,6 @@ static int stusb160x_probe(struct i2c_client *client) typec_set_pwr_opmode(chip->port, chip->pwr_opmode); if (client->irq) { - ret = stusb160x_irq_init(chip, client->irq); - if (ret) - goto port_unregister; - chip->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(chip->role_sw)) { ret = PTR_ERR(chip->role_sw); @@ -752,6 +757,10 @@ static int stusb160x_probe(struct i2c_client *client) ret); goto port_unregister; } + + ret = stusb160x_irq_init(chip, client->irq); + if (ret) + goto role_sw_put; } else { /* * If Source or Dual power role, need to enable VDD supply @@ -775,6 +784,9 @@ static int stusb160x_probe(struct i2c_client *client) return 0; +role_sw_put: + if (chip->role_sw) + usb_role_switch_put(chip->role_sw); port_unregister: typec_unregister_port(chip->port); all_reg_disable: diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 938219bc1b4b..21b3ae25c76d 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -629,6 +629,15 @@ static int tps6598x_probe(struct i2c_client *client) if (!fwnode) return -ENODEV; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + tps->role_sw = fwnode_usb_role_switch_get(fwnode); if (IS_ERR(tps->role_sw)) { ret = PTR_ERR(tps->role_sw); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 2a31467f7ac5..de8e8a1e13e7 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -573,7 +573,7 @@ static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent) cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent)); MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index); - MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma); err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out)); diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 06fb7a93a1bd..4d5ae61580aa 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -168,21 +168,6 @@ config OSF4_COMPAT with v4 shared libraries freely available from Compaq. If you're going to use shared libraries from Tru64 version 5.0 or later, say N. -config BINFMT_EM86 - tristate "Kernel support for Linux/Intel ELF binaries" - depends on ALPHA - help - Say Y here if you want to be able to execute Linux/Intel ELF - binaries just like native Alpha binaries on your Alpha machine. For - this to work, you need to have the emulator /usr/bin/em86 in place. - - You can get the same functionality by saying N here and saying Y to - "Kernel support for MISC binaries". - - You may answer M to compile the emulation support as a module and - later load the module when you want to use a Linux/Intel binary. The - module will be called binfmt_em86. If unsure, say Y. - config BINFMT_MISC tristate "Kernel support for MISC binaries" help diff --git a/fs/Makefile b/fs/Makefile index 9c708e1fbe8f..f98f3e691c37 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -39,7 +39,6 @@ obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o -obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o obj-$(CONFIG_BINFMT_SCRIPT) += binfmt_script.o obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c deleted file mode 100644 index 06b9b9fddf70..000000000000 --- a/fs/binfmt_em86.c +++ /dev/null @@ -1,110 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * linux/fs/binfmt_em86.c - * - * Based on linux/fs/binfmt_script.c - * Copyright (C) 1996 Martin von Löwis - * original #!-checking implemented by tytso. - * - * em86 changes Copyright (C) 1997 Jim Paradis - */ - -#include <linux/module.h> -#include <linux/string.h> -#include <linux/stat.h> -#include <linux/binfmts.h> -#include <linux/elf.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/errno.h> - - -#define EM86_INTERP "/usr/bin/em86" -#define EM86_I_NAME "em86" - -static int load_em86(struct linux_binprm *bprm) -{ - const char *i_name, *i_arg; - char *interp; - struct file * file; - int retval; - struct elfhdr elf_ex; - - /* Make sure this is a Linux/Intel ELF executable... */ - elf_ex = *((struct elfhdr *)bprm->buf); - - if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0) - return -ENOEXEC; - - /* First of all, some simple consistency checks */ - if ((elf_ex.e_type != ET_EXEC && elf_ex.e_type != ET_DYN) || - (!((elf_ex.e_machine == EM_386) || (elf_ex.e_machine == EM_486))) || - !bprm->file->f_op->mmap) { - return -ENOEXEC; - } - - /* Need to be able to load the file after exec */ - if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE) - return -ENOENT; - - /* Unlike in the script case, we don't have to do any hairy - * parsing to find our interpreter... it's hardcoded! - */ - interp = EM86_INTERP; - i_name = EM86_I_NAME; - i_arg = NULL; /* We reserve the right to add an arg later */ - - /* - * Splice in (1) the interpreter's name for argv[0] - * (2) (optional) argument to interpreter - * (3) filename of emulated file (replace argv[0]) - * - * This is done in reverse order, because of how the - * user environment and arguments are stored. - */ - remove_arg_zero(bprm); - retval = copy_string_kernel(bprm->filename, bprm); - if (retval < 0) return retval; - bprm->argc++; - if (i_arg) { - retval = copy_string_kernel(i_arg, bprm); - if (retval < 0) return retval; - bprm->argc++; - } - retval = copy_string_kernel(i_name, bprm); - if (retval < 0) return retval; - bprm->argc++; - - /* - * OK, now restart the process with the interpreter's inode. - * Note that we use open_exec() as the name is now in kernel - * space, and we don't need to copy it. - */ - file = open_exec(interp); - if (IS_ERR(file)) - return PTR_ERR(file); - - bprm->interpreter = file; - return 0; -} - -static struct linux_binfmt em86_format = { - .module = THIS_MODULE, - .load_binary = load_em86, -}; - -static int __init init_em86_binfmt(void) -{ - register_binfmt(&em86_format); - return 0; -} - -static void __exit exit_em86_binfmt(void) -{ - unregister_binfmt(&em86_format); -} - -core_initcall(init_em86_binfmt); -module_exit(exit_em86_binfmt); -MODULE_LICENSE("GPL"); diff --git a/fs/block_dev.c b/fs/block_dev.c index 0c424a0cadaa..9ef4f1fc2cb0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode) free_percpu(bdev->bd_stats); kfree(bdev->bd_meta_info); + if (!bdev_is_partition(bdev)) + kfree(bdev->bd_disk); kmem_cache_free(bdev_cachep, BDEV_I(inode)); } diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7a8a2fc19533..78b202d198b8 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1488,15 +1488,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **roots, - bool ignore_offset) + bool ignore_offset, bool skip_commit_root_sem) { int ret; - if (!trans) + if (!trans && !skip_commit_root_sem) down_read(&fs_info->commit_root_sem); ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, time_seq, roots, ignore_offset); - if (!trans) + if (!trans && !skip_commit_root_sem) up_read(&fs_info->commit_root_sem); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 17abde7f794c..ff5f07f9940b 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -47,7 +47,8 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, const u64 *extent_item_pos, bool ignore_offset); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots, bool ignore_offset); + u64 time_seq, struct ulist **roots, bool ignore_offset, + bool skip_commit_root_sem); char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, struct extent_buffer *eb_in, u64 parent, diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 9a023ae0f98b..30d82cdf128c 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -352,7 +352,7 @@ static void end_compressed_bio_write(struct bio *bio) btrfs_record_physical_zoned(inode, cb->start, bio); btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL, cb->start, cb->start + cb->len - 1, - bio->bi_status == BLK_STS_OK); + !cb->errors); end_compressed_writeback(inode, cb); /* note, our inode could be gone now */ diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 06bc842ecdb3..ca848b183474 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -974,7 +974,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); if (qrecord_inserted) - btrfs_qgroup_trace_extent_post(fs_info, record); + btrfs_qgroup_trace_extent_post(trans, record); return 0; } @@ -1069,7 +1069,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, if (qrecord_inserted) - return btrfs_qgroup_trace_extent_post(fs_info, record); + return btrfs_qgroup_trace_extent_post(trans, record); return 0; } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b117dd3b8172..a59ab7b9aea0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -209,7 +209,7 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, static void csum_tree_block(struct extent_buffer *buf, u8 *result) { struct btrfs_fs_info *fs_info = buf->fs_info; - const int num_pages = fs_info->nodesize >> PAGE_SHIFT; + const int num_pages = num_extent_pages(buf); const int first_page_part = min_t(u32, PAGE_SIZE, fs_info->nodesize); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); char *kaddr; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d296483d148f..268ce58d4569 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6019,6 +6019,9 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range) mutex_lock(&fs_info->fs_devices->device_list_mutex); devices = &fs_info->fs_devices->devices; list_for_each_entry(device, devices, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + continue; + ret = btrfs_trim_free_extents(device, &group_trimmed); if (ret) { dev_failed++; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8f60314c36c5..0117d867ecf8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2992,7 +2992,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) goto out; } - if (ordered_extent->disk) + if (ordered_extent->bdev) btrfs_rewrite_logical_zoned(ordered_extent); btrfs_free_io_failure_record(inode, start, end); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 6eb41b7c0c84..5c0f8481e25e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -190,8 +190,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset entry->truncated_len = (u64)-1; entry->qgroup_rsv = ret; entry->physical = (u64)-1; - entry->disk = NULL; - entry->partno = (u8)-1; ASSERT(type == BTRFS_ORDERED_REGULAR || type == BTRFS_ORDERED_NOCOW || diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 566472004edd..b2d88aba8420 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h @@ -145,8 +145,7 @@ struct btrfs_ordered_extent { * command in a workqueue context */ u64 physical; - struct gendisk *disk; - u8 partno; + struct block_device *bdev; }; /* diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 07ec06d4e972..0fa121171ca1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1704,17 +1704,39 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, return 0; } -int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord) { struct ulist *old_root; u64 bytenr = qrecord->bytenr; int ret; - ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); + /* + * We are always called in a context where we are already holding a + * transaction handle. Often we are called when adding a data delayed + * reference from btrfs_truncate_inode_items() (truncating or unlinking), + * in which case we will be holding a write lock on extent buffer from a + * subvolume tree. In this case we can't allow btrfs_find_all_roots() to + * acquire fs_info->commit_root_sem, because that is a higher level lock + * that must be acquired before locking any extent buffers. + * + * So we want btrfs_find_all_roots() to not acquire the commit_root_sem + * but we can't pass it a non-NULL transaction handle, because otherwise + * it would not use commit roots and would lock extent buffers, causing + * a deadlock if it ends up trying to read lock the same extent buffer + * that was previously write locked at btrfs_truncate_inode_items(). + * + * So pass a NULL transaction handle to btrfs_find_all_roots() and + * explicitly tell it to not acquire the commit_root_sem - if we are + * holding a transaction handle we don't need its protection. + */ + ASSERT(trans != NULL); + + ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, + false, true); if (ret < 0) { - fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; - btrfs_warn(fs_info, + trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; + btrfs_warn(trans->fs_info, "error accounting new delayed refs extent (err code: %d), quota inconsistent", ret); return 0; @@ -1758,7 +1780,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr, kfree(record); return 0; } - return btrfs_qgroup_trace_extent_post(fs_info, record); + return btrfs_qgroup_trace_extent_post(trans, record); } int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans, @@ -2629,7 +2651,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) /* Search commit root to find old_roots */ ret = btrfs_find_all_roots(NULL, fs_info, record->bytenr, 0, - &record->old_roots, false); + &record->old_roots, false, false); if (ret < 0) goto cleanup; } @@ -2645,7 +2667,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) * current root. It's safe inside commit_transaction(). */ ret = btrfs_find_all_roots(trans, fs_info, - record->bytenr, BTRFS_SEQ_LAST, &new_roots, false); + record->bytenr, BTRFS_SEQ_LAST, &new_roots, false, false); if (ret < 0) goto cleanup; if (qgroup_to_skip) { @@ -3179,7 +3201,7 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, num_bytes = found.offset; ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, - &roots, false); + &roots, false, false); if (ret < 0) goto out; /* For rescan, just pass old_roots as NULL */ diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 7283e4f549af..880e9df0dac1 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -298,7 +298,7 @@ int btrfs_qgroup_trace_extent_nolock( * using current root, then we can move all expensive backref walk out of * transaction committing, but not now as qgroup accounting will be wrong again. */ -int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, +int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord); /* diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index f3137285a9e2..98b5aaba46f1 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -224,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, * quota. */ ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -237,7 +237,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -261,7 +261,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, new_roots = NULL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -273,7 +273,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return -EINVAL; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -325,7 +325,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -338,7 +338,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -360,7 +360,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -373,7 +373,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); @@ -401,7 +401,7 @@ static int test_multiple_refs(struct btrfs_root *root, } ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, - false); + false, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); @@ -414,7 +414,7 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, - false); + false, false); if (ret) { ulist_free(old_roots); ulist_free(new_roots); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index dc6eb088d73e..e6430ac9bbe8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -5526,16 +5526,29 @@ log_extents: spin_lock(&inode->lock); inode->logged_trans = trans->transid; /* - * Don't update last_log_commit if we logged that an inode exists - * after it was loaded to memory (full_sync bit set). - * This is to prevent data loss when we do a write to the inode, - * then the inode gets evicted after all delalloc was flushed, - * then we log it exists (due to a rename for example) and then - * fsync it. This last fsync would do nothing (not logging the - * extents previously written). + * Don't update last_log_commit if we logged that an inode exists. + * We do this for two reasons: + * + * 1) We might have had buffered writes to this inode that were + * flushed and had their ordered extents completed in this + * transaction, but we did not previously log the inode with + * LOG_INODE_ALL. Later the inode was evicted and after that + * it was loaded again and this LOG_INODE_EXISTS log operation + * happened. We must make sure that if an explicit fsync against + * the inode is performed later, it logs the new extents, an + * updated inode item, etc, and syncs the log. The same logic + * applies to direct IO writes instead of buffered writes. + * + * 2) When we log the inode with LOG_INODE_EXISTS, its inode item + * is logged with an i_size of 0 or whatever value was logged + * before. If later the i_size of the inode is increased by a + * truncate operation, the log is synced through an fsync of + * some other inode and then finally an explicit fsync against + * this inode is made, we must make sure this fsync logs the + * inode with the new i_size, the hole between old i_size and + * the new i_size, and syncs the log. */ - if (inode_only != LOG_INODE_EXISTS || - !test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags)) + if (inode_only != LOG_INODE_EXISTS) inode->last_log_commit = inode->last_sub_trans; spin_unlock(&inode->lock); } @@ -6490,8 +6503,8 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * if this inode hasn't been logged and directory we're renaming it * from hasn't been logged, we don't need to log it */ - if (inode->logged_trans < trans->transid && - (!old_dir || old_dir->logged_trans < trans->transid)) + if (!inode_logged(trans, inode) && + (!old_dir || !inode_logged(trans, old_dir))) return; /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1e4d43ffe38b..70f94b75f25a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1078,6 +1078,7 @@ static void __btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices, if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { list_del_init(&device->dev_alloc_list); clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); + fs_devices->rw_devices--; } list_del_init(&device->dev_list); fs_devices->num_devices--; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 297c0b1c0634..907c2cc45c9c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1349,8 +1349,7 @@ void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, return; ordered->physical = physical; - ordered->disk = bio->bi_bdev->bd_disk; - ordered->partno = bio->bi_bdev->bd_partno; + ordered->bdev = bio->bi_bdev; btrfs_put_ordered_extent(ordered); } @@ -1362,18 +1361,16 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) struct extent_map_tree *em_tree; struct extent_map *em; struct btrfs_ordered_sum *sum; - struct block_device *bdev; u64 orig_logical = ordered->disk_bytenr; u64 *logical = NULL; int nr, stripe_len; /* Zoned devices should not have partitions. So, we can assume it is 0 */ - ASSERT(ordered->partno == 0); - bdev = bdgrab(ordered->disk->part0); - if (WARN_ON(!bdev)) + ASSERT(!bdev_is_partition(ordered->bdev)); + if (WARN_ON(!ordered->bdev)) return; - if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev, + if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, ordered->bdev, ordered->physical, &logical, &nr, &stripe_len))) goto out; @@ -1402,7 +1399,6 @@ void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) out: kfree(logical); - bdput(bdev); } bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index a818213c972f..9db1b39df773 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4456,7 +4456,7 @@ bool check_session_state(struct ceph_mds_session *s) break; case CEPH_MDS_SESSION_CLOSING: /* Should never reach this when we're unmounting */ - WARN_ON_ONCE(true); + WARN_ON_ONCE(s->s_ttl); fallthrough; case CEPH_MDS_SESSION_NEW: case CEPH_MDS_SESSION_RESTARTING: diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f72e3b3dca69..65d1a65bfc37 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -873,8 +873,11 @@ PsxDelete: InformationLevel) - 4; offset = param_offset + params; - /* Setup pointer to Request Data (inode type) */ - pRqD = (struct unlink_psx_rq *)(((char *)&pSMB->hdr.Protocol) + offset); + /* Setup pointer to Request Data (inode type). + * Note that SMB offsets are from the beginning of SMB which is 4 bytes + * in, after RFC1001 field + */ + pRqD = (struct unlink_psx_rq *)((char *)(pSMB) + offset + 4); pRqD->type = cpu_to_le16(type); pSMB->ParameterOffset = cpu_to_le16(param_offset); pSMB->DataOffset = cpu_to_le16(offset); @@ -1081,7 +1084,8 @@ PsxCreat: param_offset = offsetof(struct smb_com_transaction2_spi_req, InformationLevel) - 4; offset = param_offset + params; - pdata = (OPEN_PSX_REQ *)(((char *)&pSMB->hdr.Protocol) + offset); + /* SMB offsets are from the beginning of SMB which is 4 bytes in, after RFC1001 field */ + pdata = (OPEN_PSX_REQ *)((char *)(pSMB) + offset + 4); pdata->Level = cpu_to_le16(SMB_QUERY_FILE_UNIX_BASIC); pdata->Permissions = cpu_to_le64(mode); pdata->PosixOpenFlags = cpu_to_le32(posix_flags); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 1b04d6ec14dd..3781eee9360a 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -220,7 +220,7 @@ cifs_reconnect(struct TCP_Server_Info *server) #ifdef CONFIG_CIFS_DFS_UPCALL struct super_block *sb = NULL; struct cifs_sb_info *cifs_sb = NULL; - struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); struct dfs_cache_tgt_iterator *tgt_it = NULL; #endif @@ -3130,7 +3130,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_ { int rc; char *npath = NULL; - struct dfs_cache_tgt_list tgt_list = {0}; + struct dfs_cache_tgt_list tgt_list = DFS_CACHE_TGT_LIST_INIT(tgt_list); struct dfs_cache_tgt_iterator *tgt_it = NULL; struct smb3_fs_context tmp_ctx = {NULL}; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 7c1769714609..283745592844 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -19,6 +19,7 @@ #include "cifs_debug.h" #include "cifs_unicode.h" #include "smb2glob.h" +#include "dns_resolve.h" #include "dfs_cache.h" @@ -911,6 +912,7 @@ static int get_targets(struct cache_entry *ce, struct dfs_cache_tgt_list *tl) err_free_it: list_for_each_entry_safe(it, nit, head, it_list) { + list_del(&it->it_list); kfree(it->it_name); kfree(it); } @@ -1293,6 +1295,194 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, return 0; } +static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2) +{ + char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0}; + const char *host; + size_t hostlen; + char *ip = NULL; + struct sockaddr sa; + bool match; + int rc; + + if (strcasecmp(s1, s2)) + return false; + + /* + * Resolve share's hostname and check if server address matches. Otherwise just ignore it + * as we could not have upcall to resolve hostname or failed to convert ip address. + */ + match = true; + extract_unc_hostname(s1, &host, &hostlen); + scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); + + rc = dns_resolve_server_name_to_ip(unc, &ip, NULL); + if (rc < 0) { + cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n", + __func__, (int)hostlen, host); + return true; + } + + if (!cifs_convert_address(&sa, ip, strlen(ip))) { + cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n", + __func__, ip); + } else { + mutex_lock(&server->srv_mutex); + match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa); + mutex_unlock(&server->srv_mutex); + } + + kfree(ip); + return match; +} + +/* + * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new + * target shares in @refs. + */ +static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl, + const struct dfs_info3_param *refs, int numrefs) +{ + struct dfs_cache_tgt_iterator *it; + int i; + + for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) { + for (i = 0; i < numrefs; i++) { + if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it), + refs[i].node_name)) + return; + } + } + + cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); + for (i = 0; i < tcon->ses->chan_count; i++) { + spin_lock(&GlobalMid_Lock); + if (tcon->ses->chans[i].server->tcpStatus != CifsExiting) + tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect; + spin_unlock(&GlobalMid_Lock); + } +} + +/* Refresh dfs referral of tcon and mark it for reconnect if needed */ +static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) +{ + const char *path = tcon->dfs_path + 1; + struct cifs_ses *ses; + struct cache_entry *ce; + struct dfs_info3_param *refs = NULL; + int numrefs = 0; + bool needs_refresh = false; + struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); + int rc = 0; + unsigned int xid; + + ses = find_ipc_from_server_path(sessions, path); + if (IS_ERR(ses)) { + cifs_dbg(FYI, "%s: could not find ipc session\n", __func__); + return PTR_ERR(ses); + } + + down_read(&htable_rw_lock); + ce = lookup_cache_entry(path); + needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); + if (!IS_ERR(ce)) { + rc = get_targets(ce, &tl); + if (rc) + cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc); + } + up_read(&htable_rw_lock); + + if (!needs_refresh) { + rc = 0; + goto out; + } + + xid = get_xid(); + rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); + free_xid(xid); + + /* Create or update a cache entry with the new referral */ + if (!rc) { + dump_refs(refs, numrefs); + + down_write(&htable_rw_lock); + ce = lookup_cache_entry(path); + if (IS_ERR(ce)) + add_cache_entry_locked(refs, numrefs); + else if (force_refresh || cache_entry_expired(ce)) + update_cache_entry_locked(ce, refs, numrefs); + up_write(&htable_rw_lock); + + mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs); + } + +out: + dfs_cache_free_tgts(&tl); + free_dfs_info_array(refs, numrefs); + return rc; +} + +/** + * dfs_cache_remount_fs - remount a DFS share + * + * Reconfigure dfs mount by forcing a new DFS referral and if the currently cached targets do not + * match any of the new targets, mark it for reconnect. + * + * @cifs_sb: cifs superblock. + * + * Return zero if remounted, otherwise non-zero. + */ +int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) +{ + struct cifs_tcon *tcon; + struct mount_group *mg; + struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; + int rc; + + if (!cifs_sb || !cifs_sb->master_tlink) + return -EINVAL; + + tcon = cifs_sb_master_tcon(cifs_sb); + if (!tcon->dfs_path) { + cifs_dbg(FYI, "%s: not a dfs tcon\n", __func__); + return 0; + } + + if (uuid_is_null(&cifs_sb->dfs_mount_id)) { + cifs_dbg(FYI, "%s: tcon has no dfs mount group id\n", __func__); + return -EINVAL; + } + + mutex_lock(&mount_group_list_lock); + mg = find_mount_group_locked(&cifs_sb->dfs_mount_id); + if (IS_ERR(mg)) { + mutex_unlock(&mount_group_list_lock); + cifs_dbg(FYI, "%s: tcon has ipc session to refresh referral\n", __func__); + return PTR_ERR(mg); + } + kref_get(&mg->refcount); + mutex_unlock(&mount_group_list_lock); + + spin_lock(&mg->lock); + memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0])); + spin_unlock(&mg->lock); + + /* + * After reconnecting to a different server, unique ids won't match anymore, so we disable + * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). + */ + cifs_autodisable_serverino(cifs_sb); + /* + * Force the use of prefix path to support failover on DFS paths that resolve to targets + * that have different prefix paths. + */ + cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; + rc = refresh_tcon(sessions, tcon, true); + + kref_put(&mg->refcount, mount_group_release); + return rc; +} + /* * Refresh all active dfs mounts regardless of whether they are in cache or not. * (cache can be cleared) @@ -1303,7 +1493,6 @@ static void refresh_mounts(struct cifs_ses **sessions) struct cifs_ses *ses; struct cifs_tcon *tcon, *ntcon; struct list_head tcons; - unsigned int xid; INIT_LIST_HEAD(&tcons); @@ -1321,44 +1510,8 @@ static void refresh_mounts(struct cifs_ses **sessions) spin_unlock(&cifs_tcp_ses_lock); list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) { - const char *path = tcon->dfs_path + 1; - struct cache_entry *ce; - struct dfs_info3_param *refs = NULL; - int numrefs = 0; - bool needs_refresh = false; - int rc = 0; - list_del_init(&tcon->ulist); - - ses = find_ipc_from_server_path(sessions, path); - if (IS_ERR(ses)) - goto next_tcon; - - down_read(&htable_rw_lock); - ce = lookup_cache_entry(path); - needs_refresh = IS_ERR(ce) || cache_entry_expired(ce); - up_read(&htable_rw_lock); - - if (!needs_refresh) - goto next_tcon; - - xid = get_xid(); - rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - free_xid(xid); - - /* Create or update a cache entry with the new referral */ - if (!rc) { - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - if (IS_ERR(ce)) - add_cache_entry_locked(refs, numrefs); - else if (cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - } - -next_tcon: - free_dfs_info_array(refs, numrefs); + refresh_tcon(sessions, tcon, false); cifs_put_tcon(tcon); } } diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index b29d3ae64829..52070d1df189 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -13,6 +13,8 @@ #include <linux/uuid.h> #include "cifsglob.h" +#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), } + struct dfs_cache_tgt_list { int tl_numtgts; struct list_head tl_list; @@ -44,6 +46,7 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); +int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); static inline struct dfs_cache_tgt_iterator * dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cd108607a070..0a72840a88f1 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -4619,7 +4619,7 @@ read_complete: static int cifs_readpage(struct file *file, struct page *page) { - loff_t offset = (loff_t)page->index << PAGE_SHIFT; + loff_t offset = page_file_offset(page); int rc = -EACCES; unsigned int xid; diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 553adfbcc22a..eed59bc1d913 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -13,6 +13,9 @@ #include <linux/magic.h> #include <linux/security.h> #include <net/net_namespace.h> +#ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs_cache.h" +#endif */ #include <linux/ctype.h> @@ -779,6 +782,10 @@ static int smb3_reconfigure(struct fs_context *fc) smb3_cleanup_fs_context_contents(cifs_sb->ctx); rc = smb3_fs_context_dup(cifs_sb->ctx, ctx); smb3_update_mnt_flags(cifs_sb); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (!rc) + rc = dfs_cache_remount_fs(cifs_sb); +#endif return rc; } @@ -918,6 +925,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, ctx->cred_uid = uid; ctx->cruid_specified = true; break; + case Opt_backupuid: + uid = make_kuid(current_user_ns(), result.uint_32); + if (!uid_valid(uid)) + goto cifs_parse_mount_err; + ctx->backupuid = uid; + ctx->backupuid_specified = true; + break; case Opt_backupgid: gid = make_kgid(current_user_ns(), result.uint_32); if (!gid_valid(gid)) diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index ba3c58e1f725..2dfd0d8297eb 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -3618,6 +3618,7 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, { struct cifs_io_parms io_parms = {0}; int nbytes; + int rc = 0; struct kvec iov[2]; io_parms.netfid = cfile->fid.netfid; @@ -3625,13 +3626,25 @@ static int smb3_simple_fallocate_write_range(unsigned int xid, io_parms.tcon = tcon; io_parms.persistent_fid = cfile->fid.persistent_fid; io_parms.volatile_fid = cfile->fid.volatile_fid; - io_parms.offset = off; - io_parms.length = len; - /* iov[0] is reserved for smb header */ - iov[1].iov_base = buf; - iov[1].iov_len = io_parms.length; - return SMB2_write(xid, &io_parms, &nbytes, iov, 1); + while (len) { + io_parms.offset = off; + io_parms.length = len; + if (io_parms.length > SMB2_MAX_BUFFER_SIZE) + io_parms.length = SMB2_MAX_BUFFER_SIZE; + /* iov[0] is reserved for smb header */ + iov[1].iov_base = buf; + iov[1].iov_len = io_parms.length; + rc = SMB2_write(xid, &io_parms, &nbytes, iov, 1); + if (rc) + break; + if (nbytes > len) + return -EINVAL; + buf += nbytes; + off += nbytes; + len -= nbytes; + } + return rc; } static int smb3_simple_fallocate_range(unsigned int xid, @@ -3655,11 +3668,6 @@ static int smb3_simple_fallocate_range(unsigned int xid, (char **)&out_data, &out_data_len); if (rc) goto out; - /* - * It is already all allocated - */ - if (out_data_len == 0) - goto out; buf = kzalloc(1024 * 1024, GFP_KERNEL); if (buf == NULL) { @@ -3782,6 +3790,24 @@ static long smb3_simple_falloc(struct file *file, struct cifs_tcon *tcon, goto out; } + if (keep_size == true) { + /* + * We can not preallocate pages beyond the end of the file + * in SMB2 + */ + if (off >= i_size_read(inode)) { + rc = 0; + goto out; + } + /* + * For fallocates that are partially beyond the end of file, + * clamp len so we only fallocate up to the end of file. + */ + if (off + len > i_size_read(inode)) { + len = i_size_read(inode) - off; + } + } + if ((keep_size == true) || (i_size_read(inode) >= off + len)) { /* * At this point, we are trying to fallocate an internal diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 14292dba3a12..2c2f179b6977 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -106,12 +106,11 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) return err; } -static bool ext2_check_page(struct page *page, int quiet) +static bool ext2_check_page(struct page *page, int quiet, char *kaddr) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; unsigned chunk_size = ext2_chunk_size(dir); - char *kaddr = page_address(page); u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count); unsigned offs, rec_len; unsigned limit = PAGE_SIZE; @@ -205,7 +204,8 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n, if (!IS_ERR(page)) { *page_addr = kmap_local_page(page); if (unlikely(!PageChecked(page))) { - if (PageError(page) || !ext2_check_page(page, quiet)) + if (PageError(page) || !ext2_check_page(page, quiet, + *page_addr)) goto fail; } } @@ -584,10 +584,10 @@ out_unlock: * ext2_delete_entry deletes a directory entry by merging it with the * previous entry. Page is up-to-date. */ -int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) +int ext2_delete_entry (struct ext2_dir_entry_2 *dir, struct page *page, + char *kaddr) { struct inode *inode = page->mapping->host; - char *kaddr = page_address(page); unsigned from = ((char*)dir - kaddr) & ~(ext2_chunk_size(inode)-1); unsigned to = ((char *)dir - kaddr) + ext2_rec_len_from_disk(dir->rec_len); @@ -607,7 +607,7 @@ int ext2_delete_entry (struct ext2_dir_entry_2 * dir, struct page * page ) de = ext2_next_entry(de); } if (pde) - from = (char*)pde - (char*)page_address(page); + from = (char *)pde - kaddr; pos = page_offset(page) + from; lock_page(page); err = ext2_prepare_chunk(page, pos, to - from); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index b0a694820cb7..e512630cb63e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -740,7 +740,8 @@ extern int ext2_inode_by_name(struct inode *dir, extern int ext2_make_empty(struct inode *, struct inode *); extern struct ext2_dir_entry_2 *ext2_find_entry(struct inode *, const struct qstr *, struct page **, void **res_page_addr); -extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); +extern int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page, + char *kaddr); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 *ext2_dotdot(struct inode *dir, struct page **p, void **pa); extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, void *, diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 1f69b81655b6..5f6b7560eb3f 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -293,7 +293,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) goto out; } - err = ext2_delete_entry (de, page); + err = ext2_delete_entry (de, page, page_addr); ext2_put_page(page, page_addr); if (err) goto out; @@ -397,7 +397,7 @@ static int ext2_rename (struct user_namespace * mnt_userns, old_inode->i_ctime = current_time(old_inode); mark_inode_dirty(old_inode); - ext2_delete_entry(old_de, old_page); + ext2_delete_entry(old_de, old_page, old_page_addr); if (dir_de) { if (old_dir != new_dir) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 06d04a74ab6c..4c3370548982 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -521,6 +521,9 @@ static bool inode_prepare_wbs_switch(struct inode *inode, */ smp_mb(); + if (IS_DAX(inode)) + return false; + /* while holding I_WB_SWITCH, no one else can update the association */ spin_lock(&inode->i_lock); if (!(inode->i_sb->s_flags & SB_ACTIVE) || diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 926eeb9bf4eb..cdfb1ae78a3f 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -77,7 +77,7 @@ enum hugetlb_param { static const struct fs_parameter_spec hugetlb_fs_parameters[] = { fsparam_u32 ("gid", Opt_gid), fsparam_string("min_size", Opt_min_size), - fsparam_u32 ("mode", Opt_mode), + fsparam_u32oct("mode", Opt_mode), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("pagesize", Opt_pagesize), fsparam_string("size", Opt_size), diff --git a/fs/internal.h b/fs/internal.h index 3ce8edbaa3ca..82e8eb32ff3d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -61,7 +61,6 @@ extern void __init chrdev_init(void); */ extern const struct fs_context_operations legacy_fs_context_ops; extern int parse_monolithic_mount_data(struct fs_context *, void *); -extern void fc_drop_locked(struct fs_context *); extern void vfs_clean_context(struct fs_context *fc); extern int finish_clean_context(struct fs_context *fc); diff --git a/fs/io-wq.c b/fs/io-wq.c index 843d4a7bcd6e..cf086b01c6c6 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -731,7 +731,12 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) int work_flags; unsigned long flags; - if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) { + /* + * If io-wq is exiting for this task, or if the request has explicitly + * been marked as one that should not get executed, cancel it here. + */ + if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || + (work->flags & IO_WQ_WORK_CANCEL)) { io_run_cancel(work, wqe); return; } diff --git a/fs/io_uring.c b/fs/io_uring.c index 0cac361bf6b8..bf548af0426c 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1279,8 +1279,17 @@ static void io_prep_async_link(struct io_kiocb *req) { struct io_kiocb *cur; - io_for_each_link(cur, req) - io_prep_async_work(cur); + if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->completion_lock); + io_for_each_link(cur, req) + io_prep_async_work(cur); + spin_unlock_irq(&ctx->completion_lock); + } else { + io_for_each_link(cur, req) + io_prep_async_work(cur); + } } static void io_queue_async_work(struct io_kiocb *req) @@ -1294,6 +1303,17 @@ static void io_queue_async_work(struct io_kiocb *req) /* init ->work of the whole link before punting */ io_prep_async_link(req); + + /* + * Not expected to happen, but if we do have a bug where this _can_ + * happen, catch it here and ensure the request is marked as + * canceled. That will make io-wq go through the usual work cancel + * procedure rather than attempt to run this request (or create a new + * worker for it). + */ + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) + req->work.flags |= IO_WQ_WORK_CANCEL; + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, &req->work, req->flags); io_wq_enqueue(tctx->io_wq, &req->work); @@ -1939,9 +1959,13 @@ static void tctx_task_work(struct callback_head *cb) node = next; } if (wq_list_empty(&tctx->task_list)) { + spin_lock_irq(&tctx->task_lock); clear_bit(0, &tctx->task_state); - if (wq_list_empty(&tctx->task_list)) + if (wq_list_empty(&tctx->task_list)) { + spin_unlock_irq(&tctx->task_lock); break; + } + spin_unlock_irq(&tctx->task_lock); /* another tctx_task_work() is enqueued, yield */ if (test_and_set_bit(0, &tctx->task_state)) break; @@ -2036,6 +2060,12 @@ static void io_req_task_queue(struct io_kiocb *req) io_req_task_work_add(req); } +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ + req->io_task_work.func = io_queue_async_work; + io_req_task_work_add(req); +} + static inline void io_queue_next(struct io_kiocb *req) { struct io_kiocb *nxt = io_req_find_next(req); @@ -2205,7 +2235,7 @@ static inline bool io_run_task_work(void) * Find and free completed poll iocbs */ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, - struct list_head *done) + struct list_head *done, bool resubmit) { struct req_batch rb; struct io_kiocb *req; @@ -2220,11 +2250,11 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, req = list_first_entry(done, struct io_kiocb, inflight_entry); list_del(&req->inflight_entry); - if (READ_ONCE(req->result) == -EAGAIN && + if (READ_ONCE(req->result) == -EAGAIN && resubmit && !(req->flags & REQ_F_DONT_REISSUE)) { req->iopoll_completed = 0; req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); continue; } @@ -2244,7 +2274,7 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, } static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, - long min) + long min, bool resubmit) { struct io_kiocb *req, *tmp; LIST_HEAD(done); @@ -2287,7 +2317,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events, } if (!list_empty(&done)) - io_iopoll_complete(ctx, nr_events, &done); + io_iopoll_complete(ctx, nr_events, &done, resubmit); return ret; } @@ -2305,7 +2335,7 @@ static void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) while (!list_empty(&ctx->iopoll_list)) { unsigned int nr_events = 0; - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, false); /* let it sleep and repeat later if can't complete a request */ if (nr_events == 0) @@ -2367,7 +2397,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min) list_empty(&ctx->iopoll_list)) break; } - ret = io_do_iopoll(ctx, &nr_events, min); + ret = io_do_iopoll(ctx, &nr_events, min, true); } while (!ret && nr_events < min && !need_resched()); out: mutex_unlock(&ctx->uring_lock); @@ -2417,6 +2447,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req) */ if (percpu_ref_is_dying(&ctx->refs)) return false; + /* + * Play it safe and assume not safe to re-import and reissue if we're + * not in the original thread group (or in task context). + */ + if (!same_thread_group(req->task, current) || !in_task()) + return false; return true; } #else @@ -2747,7 +2783,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, req->flags &= ~REQ_F_REISSUE; if (io_resubmit_prep(req)) { req_ref_get(req); - io_queue_async_work(req); + io_req_task_queue_reissue(req); } else { int cflags = 0; @@ -4802,6 +4838,7 @@ IO_NETOP_FN(recv); struct io_poll_table { struct poll_table_struct pt; struct io_kiocb *req; + int nr_entries; int error; }; @@ -4902,7 +4939,6 @@ static bool io_poll_complete(struct io_kiocb *req, __poll_t mask) if (req->poll.events & EPOLLONESHOT) flags = 0; if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) { - io_poll_remove_waitqs(req); req->poll.done = true; flags = 0; } @@ -4925,6 +4961,7 @@ static void io_poll_task_func(struct io_kiocb *req) done = io_poll_complete(req, req->result); if (done) { + io_poll_remove_double(req); hash_del(&req->hash_node); } else { req->result = 0; @@ -4995,11 +5032,11 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, struct io_kiocb *req = pt->req; /* - * If poll->head is already set, it's because the file being polled - * uses multiple waitqueues for poll handling (eg one for read, one - * for write). Setup a separate io_poll_iocb if this happens. + * The file being polled uses multiple waitqueues for poll handling + * (e.g. one for read, one for write). Setup a separate io_poll_iocb + * if this happens. */ - if (unlikely(poll->head)) { + if (unlikely(pt->nr_entries)) { struct io_poll_iocb *poll_one = poll; /* already have a 2nd entry, fail a third attempt */ @@ -5027,7 +5064,7 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, *poll_ptr = poll; } - pt->error = 0; + pt->nr_entries++; poll->head = head; if (poll->events & EPOLLEXCLUSIVE) @@ -5104,11 +5141,16 @@ static __poll_t __io_arm_poll_handler(struct io_kiocb *req, ipt->pt._key = mask; ipt->req = req; - ipt->error = -EINVAL; + ipt->error = 0; + ipt->nr_entries = 0; mask = vfs_poll(req->file, &ipt->pt) & poll->events; + if (unlikely(!ipt->nr_entries) && !ipt->error) + ipt->error = -EINVAL; spin_lock_irq(&ctx->completion_lock); + if (ipt->error || (mask && (poll->events & EPOLLONESHOT))) + io_poll_remove_double(req); if (likely(poll->head)) { spin_lock(&poll->head->lock); if (unlikely(list_empty(&poll->wait.entry))) { @@ -5179,7 +5221,6 @@ static int io_arm_poll_handler(struct io_kiocb *req) ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask, io_async_wake); if (ret || ipt.error) { - io_poll_remove_double(req); spin_unlock_irq(&ctx->completion_lock); if (ret) return IO_APOLL_READY; @@ -6792,7 +6833,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) mutex_lock(&ctx->uring_lock); if (!list_empty(&ctx->iopoll_list)) - io_do_iopoll(ctx, &nr_events, 0); + io_do_iopoll(ctx, &nr_events, 0, true); /* * Don't submit if refs are dying, good for io_uring_register(), @@ -7899,15 +7940,19 @@ static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, struct io_wq_data data; unsigned int concurrency; + mutex_lock(&ctx->uring_lock); hash = ctx->hash_map; if (!hash) { hash = kzalloc(sizeof(*hash), GFP_KERNEL); - if (!hash) + if (!hash) { + mutex_unlock(&ctx->uring_lock); return ERR_PTR(-ENOMEM); + } refcount_set(&hash->refs, 1); init_waitqueue_head(&hash->wait); ctx->hash_map = hash; } + mutex_unlock(&ctx->uring_lock); data.hash = hash; data.task = task; @@ -7981,9 +8026,11 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx, f = fdget(p->wq_fd); if (!f.file) return -ENXIO; - fdput(f); - if (f.file->f_op != &io_uring_fops) + if (f.file->f_op != &io_uring_fops) { + fdput(f); return -EINVAL; + } + fdput(f); } if (ctx->flags & IORING_SETUP_SQPOLL) { struct task_struct *tsk; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 775657943057..54d7843c0211 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1529,6 +1529,45 @@ static void ocfs2_truncate_cluster_pages(struct inode *inode, u64 byte_start, } } +/* + * zero out partial blocks of one cluster. + * + * start: file offset where zero starts, will be made upper block aligned. + * len: it will be trimmed to the end of current cluster if "start + len" + * is bigger than it. + */ +static int ocfs2_zeroout_partial_cluster(struct inode *inode, + u64 start, u64 len) +{ + int ret; + u64 start_block, end_block, nr_blocks; + u64 p_block, offset; + u32 cluster, p_cluster, nr_clusters; + struct super_block *sb = inode->i_sb; + u64 end = ocfs2_align_bytes_to_clusters(sb, start); + + if (start + len < end) + end = start + len; + + start_block = ocfs2_blocks_for_bytes(sb, start); + end_block = ocfs2_blocks_for_bytes(sb, end); + nr_blocks = end_block - start_block; + if (!nr_blocks) + return 0; + + cluster = ocfs2_bytes_to_clusters(sb, start); + ret = ocfs2_get_clusters(inode, cluster, &p_cluster, + &nr_clusters, NULL); + if (ret) + return ret; + if (!p_cluster) + return 0; + + offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); + p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; + return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); +} + static int ocfs2_zero_partial_clusters(struct inode *inode, u64 start, u64 len) { @@ -1538,6 +1577,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); unsigned int csize = osb->s_clustersize; handle_t *handle; + loff_t isize = i_size_read(inode); /* * The "start" and "end" values are NOT necessarily part of @@ -1558,6 +1598,26 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, if ((start & (csize - 1)) == 0 && (end & (csize - 1)) == 0) goto out; + /* No page cache for EOF blocks, issue zero out to disk. */ + if (end > isize) { + /* + * zeroout eof blocks in last cluster starting from + * "isize" even "start" > "isize" because it is + * complicated to zeroout just at "start" as "start" + * may be not aligned with block size, buffer write + * would be required to do that, but out of eof buffer + * write is not supported. + */ + ret = ocfs2_zeroout_partial_cluster(inode, isize, + end - isize); + if (ret) { + mlog_errno(ret); + goto out; + } + if (start >= isize) + goto out; + end = isize; + } handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -1856,45 +1916,6 @@ out: } /* - * zero out partial blocks of one cluster. - * - * start: file offset where zero starts, will be made upper block aligned. - * len: it will be trimmed to the end of current cluster if "start + len" - * is bigger than it. - */ -static int ocfs2_zeroout_partial_cluster(struct inode *inode, - u64 start, u64 len) -{ - int ret; - u64 start_block, end_block, nr_blocks; - u64 p_block, offset; - u32 cluster, p_cluster, nr_clusters; - struct super_block *sb = inode->i_sb; - u64 end = ocfs2_align_bytes_to_clusters(sb, start); - - if (start + len < end) - end = start + len; - - start_block = ocfs2_blocks_for_bytes(sb, start); - end_block = ocfs2_blocks_for_bytes(sb, end); - nr_blocks = end_block - start_block; - if (!nr_blocks) - return 0; - - cluster = ocfs2_bytes_to_clusters(sb, start); - ret = ocfs2_get_clusters(inode, cluster, &p_cluster, - &nr_clusters, NULL); - if (ret) - return ret; - if (!p_cluster) - return 0; - - offset = start_block - ocfs2_clusters_to_blocks(sb, cluster); - p_block = ocfs2_clusters_to_blocks(sb, p_cluster) + offset; - return sb_issue_zeroout(sb, p_block, nr_blocks, GFP_NOFS); -} - -/* * Parts of this function taken from xfs_change_file_space() */ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, @@ -1935,7 +1956,6 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, goto out_inode_unlock; } - orig_isize = i_size_read(inode); switch (sr->l_whence) { case 0: /*SEEK_SET*/ break; @@ -1943,7 +1963,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, sr->l_start += f_pos; break; case 2: /*SEEK_END*/ - sr->l_start += orig_isize; + sr->l_start += i_size_read(inode); break; default: ret = -EINVAL; @@ -1998,6 +2018,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, ret = -EINVAL; } + orig_isize = i_size_read(inode); /* zeroout eof blocks in the cluster. */ if (!ret && change_size && orig_isize < size) { ret = ocfs2_zeroout_partial_cluster(inode, orig_isize, diff --git a/fs/pipe.c b/fs/pipe.c index bfd946a9ad01..8e6ef62aeb1c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -32,6 +32,21 @@ #include "internal.h" /* + * New pipe buffers will be restricted to this size while the user is exceeding + * their pipe buffer quota. The general pipe use case needs at least two + * buffers: one for data yet to be read, and one for new data. If this is less + * than two, then a write to a non-empty pipe may block even if the pipe is not + * full. This can occur with GNU make jobserver or similar uses of pipes as + * semaphores: multiple processes may be waiting to write tokens back to the + * pipe before reading tokens: https://lore.kernel.org/lkml/1628086770.5rn8p04n6j.none@localhost/. + * + * Users can reduce their pipe buffers with F_SETPIPE_SZ below this at their + * own risk, namely: pipe writes to non-full pipes may block until the pipe is + * emptied. + */ +#define PIPE_MIN_DEF_BUFFERS 2 + +/* * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ @@ -429,20 +444,20 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) #endif /* - * Only wake up if the pipe started out empty, since - * otherwise there should be no readers waiting. + * Epoll nonsensically wants a wakeup whether the pipe + * was already empty or not. * * If it wasn't empty we try to merge new data into * the last buffer. * * That naturally merges small writes, but it also - * page-aligs the rest of the writes for large writes + * page-aligns the rest of the writes for large writes * spanning multiple pages. */ head = pipe->head; - was_empty = pipe_empty(head, pipe->tail); + was_empty = true; chars = total_len & (PAGE_SIZE-1); - if (chars && !was_empty) { + if (chars && !pipe_empty(head, pipe->tail)) { unsigned int mask = pipe->ring_size - 1; struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; int offset = buf->offset + buf->len; @@ -781,8 +796,8 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { - user_bufs = account_pipe_buffers(user, pipe_bufs, 1); - pipe_bufs = 1; + user_bufs = account_pipe_buffers(user, pipe_bufs, PIPE_MIN_DEF_BUFFERS); + pipe_bufs = PIPE_MIN_DEF_BUFFERS; } if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 476a7ff49482..ef42729216d1 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -387,6 +387,24 @@ void pathrelse(struct treepath *search_path) search_path->path_length = ILLEGAL_PATH_ELEMENT_OFFSET; } +static int has_valid_deh_location(struct buffer_head *bh, struct item_head *ih) +{ + struct reiserfs_de_head *deh; + int i; + + deh = B_I_DEH(bh, ih); + for (i = 0; i < ih_entry_count(ih); i++) { + if (deh_location(&deh[i]) > ih_item_len(ih)) { + reiserfs_warning(NULL, "reiserfs-5094", + "directory entry location seems wrong %h", + &deh[i]); + return 0; + } + } + + return 1; +} + static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) { struct block_head *blkh; @@ -454,11 +472,14 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh) "(second one): %h", ih); return 0; } - if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) { - reiserfs_warning(NULL, "reiserfs-5093", - "item entry count seems wrong %h", - ih); - return 0; + if (is_direntry_le_ih(ih)) { + if (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE)) { + reiserfs_warning(NULL, "reiserfs-5093", + "item entry count seems wrong %h", + ih); + return 0; + } + return has_valid_deh_location(bh, ih); } prev_location = ih_location(ih); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 3ffafc73acf0..58481f8d63d5 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2082,6 +2082,14 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) unlock_new_inode(root_inode); } + if (!S_ISDIR(root_inode->i_mode) || !inode_get_bytes(root_inode) || + !root_inode->i_size) { + SWARN(silent, s, "", "corrupt root inode, run fsck"); + iput(root_inode); + errval = -EUCLEAN; + goto error; + } + s->s_root = d_make_root(root_inode); if (!s->s_root) goto error; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index f6e0f0c0d0e5..5c2d806e6ae5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1236,23 +1236,21 @@ static __always_inline void wake_userfault(struct userfaultfd_ctx *ctx, } static __always_inline int validate_range(struct mm_struct *mm, - __u64 *start, __u64 len) + __u64 start, __u64 len) { __u64 task_size = mm->task_size; - *start = untagged_addr(*start); - - if (*start & ~PAGE_MASK) + if (start & ~PAGE_MASK) return -EINVAL; if (len & ~PAGE_MASK) return -EINVAL; if (!len) return -EINVAL; - if (*start < mmap_min_addr) + if (start < mmap_min_addr) return -EINVAL; - if (*start >= task_size) + if (start >= task_size) return -EINVAL; - if (len > task_size - *start) + if (len > task_size - start) return -EINVAL; return 0; } @@ -1316,7 +1314,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vm_flags |= VM_UFFD_MINOR; } - ret = validate_range(mm, &uffdio_register.range.start, + ret = validate_range(mm, uffdio_register.range.start, uffdio_register.range.len); if (ret) goto out; @@ -1522,7 +1520,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_unregister, buf, sizeof(uffdio_unregister))) goto out; - ret = validate_range(mm, &uffdio_unregister.start, + ret = validate_range(mm, uffdio_unregister.start, uffdio_unregister.len); if (ret) goto out; @@ -1671,7 +1669,7 @@ static int userfaultfd_wake(struct userfaultfd_ctx *ctx, if (copy_from_user(&uffdio_wake, buf, sizeof(uffdio_wake))) goto out; - ret = validate_range(ctx->mm, &uffdio_wake.start, uffdio_wake.len); + ret = validate_range(ctx->mm, uffdio_wake.start, uffdio_wake.len); if (ret) goto out; @@ -1711,7 +1709,7 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx, sizeof(uffdio_copy)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_copy.dst, uffdio_copy.len); + ret = validate_range(ctx->mm, uffdio_copy.dst, uffdio_copy.len); if (ret) goto out; /* @@ -1768,7 +1766,7 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx, sizeof(uffdio_zeropage)-sizeof(__s64))) goto out; - ret = validate_range(ctx->mm, &uffdio_zeropage.range.start, + ret = validate_range(ctx->mm, uffdio_zeropage.range.start, uffdio_zeropage.range.len); if (ret) goto out; @@ -1818,7 +1816,7 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, sizeof(struct uffdio_writeprotect))) return -EFAULT; - ret = validate_range(ctx->mm, &uffdio_wp.range.start, + ret = validate_range(ctx->mm, uffdio_wp.range.start, uffdio_wp.range.len); if (ret) return ret; @@ -1866,7 +1864,7 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg) sizeof(uffdio_continue) - (sizeof(__s64)))) goto out; - ret = validate_range(ctx->mm, &uffdio_continue.range.start, + ret = validate_range(ctx->mm, uffdio_continue.range.start, uffdio_continue.range.len); if (ret) goto out; diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index d548ea4b6aab..2c5bcbc19264 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -411,7 +411,16 @@ struct xfs_log_dinode { /* start of the extended dinode, writable fields */ uint32_t di_crc; /* CRC of the inode */ uint64_t di_changecount; /* number of attribute changes */ - xfs_lsn_t di_lsn; /* flush sequence */ + + /* + * The LSN we write to this field during formatting is not a reflection + * of the current on-disk LSN. It should never be used for recovery + * sequencing, nor should it be recovered into the on-disk inode at all. + * See xlog_recover_inode_commit_pass2() and xfs_log_dinode_to_disk() + * for details. + */ + xfs_lsn_t di_lsn; + uint64_t di_flags2; /* more random flags */ uint32_t di_cowextsize; /* basic cow extent size for file */ uint8_t di_pad2[12]; /* more padding for future expansion */ diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c index d44e8b4a3391..4775485b4062 100644 --- a/fs/xfs/xfs_buf_item_recover.c +++ b/fs/xfs/xfs_buf_item_recover.c @@ -698,7 +698,8 @@ xlog_recover_do_inode_buffer( static xfs_lsn_t xlog_recover_get_buf_lsn( struct xfs_mount *mp, - struct xfs_buf *bp) + struct xfs_buf *bp, + struct xfs_buf_log_format *buf_f) { uint32_t magic32; uint16_t magic16; @@ -706,11 +707,20 @@ xlog_recover_get_buf_lsn( void *blk = bp->b_addr; uuid_t *uuid; xfs_lsn_t lsn = -1; + uint16_t blft; /* v4 filesystems always recover immediately */ if (!xfs_sb_version_hascrc(&mp->m_sb)) goto recover_immediately; + /* + * realtime bitmap and summary file blocks do not have magic numbers or + * UUIDs, so we must recover them immediately. + */ + blft = xfs_blft_from_flags(buf_f); + if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF) + goto recover_immediately; + magic32 = be32_to_cpu(*(__be32 *)blk); switch (magic32) { case XFS_ABTB_CRC_MAGIC: @@ -796,6 +806,7 @@ xlog_recover_get_buf_lsn( switch (magicda) { case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: + case XFS_ATTR3_LEAF_MAGIC: case XFS_DA3_NODE_MAGIC: lsn = be64_to_cpu(((struct xfs_da3_blkinfo *)blk)->lsn); uuid = &((struct xfs_da3_blkinfo *)blk)->uuid; @@ -919,7 +930,7 @@ xlog_recover_buf_commit_pass2( * the verifier will be reset to match whatever recover turns that * buffer into. */ - lsn = xlog_recover_get_buf_lsn(mp, bp); + lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f); if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { trace_xfs_log_recover_buf_skip(log, buf_f); xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index 7b79518b6c20..e0072a6cd2d3 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -145,7 +145,8 @@ xfs_log_dinode_to_disk_ts( STATIC void xfs_log_dinode_to_disk( struct xfs_log_dinode *from, - struct xfs_dinode *to) + struct xfs_dinode *to, + xfs_lsn_t lsn) { to->di_magic = cpu_to_be16(from->di_magic); to->di_mode = cpu_to_be16(from->di_mode); @@ -182,7 +183,7 @@ xfs_log_dinode_to_disk( to->di_flags2 = cpu_to_be64(from->di_flags2); to->di_cowextsize = cpu_to_be32(from->di_cowextsize); to->di_ino = cpu_to_be64(from->di_ino); - to->di_lsn = cpu_to_be64(from->di_lsn); + to->di_lsn = cpu_to_be64(lsn); memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2)); uuid_copy(&to->di_uuid, &from->di_uuid); to->di_flushiter = 0; @@ -261,16 +262,25 @@ xlog_recover_inode_commit_pass2( } /* - * If the inode has an LSN in it, recover the inode only if it's less - * than the lsn of the transaction we are replaying. Note: we still - * need to replay an owner change even though the inode is more recent - * than the transaction as there is no guarantee that all the btree - * blocks are more recent than this transaction, too. + * If the inode has an LSN in it, recover the inode only if the on-disk + * inode's LSN is older than the lsn of the transaction we are + * replaying. We can have multiple checkpoints with the same start LSN, + * so the current LSN being equal to the on-disk LSN doesn't necessarily + * mean that the on-disk inode is more recent than the change being + * replayed. + * + * We must check the current_lsn against the on-disk inode + * here because the we can't trust the log dinode to contain a valid LSN + * (see comment below before replaying the log dinode for details). + * + * Note: we still need to replay an owner change even though the inode + * is more recent than the transaction as there is no guarantee that all + * the btree blocks are more recent than this transaction, too. */ if (dip->di_version >= 3) { xfs_lsn_t lsn = be64_to_cpu(dip->di_lsn); - if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) { + if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) > 0) { trace_xfs_log_recover_inode_skip(log, in_f); error = 0; goto out_owner_change; @@ -368,8 +378,17 @@ xlog_recover_inode_commit_pass2( goto out_release; } - /* recover the log dinode inode into the on disk inode */ - xfs_log_dinode_to_disk(ldip, dip); + /* + * Recover the log dinode inode into the on disk inode. + * + * The LSN in the log dinode is garbage - it can be zero or reflect + * stale in-memory runtime state that isn't coherent with the changes + * logged in this transaction or the changes written to the on-disk + * inode. Hence we write the current lSN into the inode because that + * matches what xfs_iflush() would write inode the inode when flushing + * the changes in this transaction. + */ + xfs_log_dinode_to_disk(ldip, dip, current_lsn); fields = in_f->ilf_fields; if (fields & XFS_ILOG_DEV) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 36fa2650b081..60ac5fd63f1e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -78,13 +78,12 @@ xlog_verify_iclog( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn); + struct xlog_in_core *iclog); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) #define xlog_verify_iclog(a,b,c) -#define xlog_verify_tail_lsn(a,b,c) +#define xlog_verify_tail_lsn(a,b) #endif STATIC int @@ -487,51 +486,80 @@ out_error: return error; } -static bool -__xlog_state_release_iclog( - struct xlog *log, - struct xlog_in_core *iclog) -{ - lockdep_assert_held(&log->l_icloglock); - - if (iclog->ic_state == XLOG_STATE_WANT_SYNC) { - /* update tail before writing to iclog */ - xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp); - - iclog->ic_state = XLOG_STATE_SYNCING; - iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); - xlog_verify_tail_lsn(log, iclog, tail_lsn); - /* cycle incremented when incrementing curr_block */ - trace_xlog_iclog_syncing(iclog, _RET_IP_); - return true; - } - - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); - return false; -} - /* * Flush iclog to disk if this is the last reference to the given iclog and the * it is in the WANT_SYNC state. + * + * If the caller passes in a non-zero @old_tail_lsn and the current log tail + * does not match, there may be metadata on disk that must be persisted before + * this iclog is written. To satisfy that requirement, set the + * XLOG_ICL_NEED_FLUSH flag as a condition for writing this iclog with the new + * log tail value. + * + * If XLOG_ICL_NEED_FUA is already set on the iclog, we need to ensure that the + * log tail is updated correctly. NEED_FUA indicates that the iclog will be + * written to stable storage, and implies that a commit record is contained + * within the iclog. We need to ensure that the log tail does not move beyond + * the tail that the first commit record in the iclog ordered against, otherwise + * correct recovery of that checkpoint becomes dependent on future operations + * performed on this iclog. + * + * Hence if NEED_FUA is set and the current iclog tail lsn is empty, write the + * current tail into iclog. Once the iclog tail is set, future operations must + * not modify it, otherwise they potentially violate ordering constraints for + * the checkpoint commit that wrote the initial tail lsn value. The tail lsn in + * the iclog will get zeroed on activation of the iclog after sync, so we + * always capture the tail lsn on the iclog on the first NEED_FUA release + * regardless of the number of active reference counts on this iclog. */ + int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + xfs_lsn_t old_tail_lsn) { + xfs_lsn_t tail_lsn; lockdep_assert_held(&log->l_icloglock); trace_xlog_iclog_release(iclog, _RET_IP_); if (iclog->ic_state == XLOG_STATE_IOERROR) return -EIO; - if (atomic_dec_and_test(&iclog->ic_refcnt) && - __xlog_state_release_iclog(log, iclog)) { - spin_unlock(&log->l_icloglock); - xlog_sync(log, iclog); - spin_lock(&log->l_icloglock); + /* + * Grabbing the current log tail needs to be atomic w.r.t. the writing + * of the tail LSN into the iclog so we guarantee that the log tail does + * not move between deciding if a cache flush is required and writing + * the LSN into the iclog below. + */ + if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) { + tail_lsn = xlog_assign_tail_lsn(log->l_mp); + + if (old_tail_lsn && tail_lsn != old_tail_lsn) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH; + + if ((iclog->ic_flags & XLOG_ICL_NEED_FUA) && + !iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); } + if (!atomic_dec_and_test(&iclog->ic_refcnt)) + return 0; + + if (iclog->ic_state != XLOG_STATE_WANT_SYNC) { + ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + return 0; + } + + iclog->ic_state = XLOG_STATE_SYNCING; + if (!iclog->ic_header.h_tail_lsn) + iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn); + xlog_verify_tail_lsn(log, iclog); + trace_xlog_iclog_syncing(iclog, _RET_IP_); + + spin_unlock(&log->l_icloglock); + xlog_sync(log, iclog); + spin_lock(&log->l_icloglock); return 0; } @@ -774,6 +802,21 @@ xfs_log_mount_cancel( } /* + * Flush out the iclog to disk ensuring that device caches are flushed and + * the iclog hits stable storage before any completion waiters are woken. + */ +static inline int +xlog_force_iclog( + struct xlog_in_core *iclog) +{ + atomic_inc(&iclog->ic_refcnt); + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (iclog->ic_state == XLOG_STATE_ACTIVE) + xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); + return xlog_state_release_iclog(iclog->ic_log, iclog, 0); +} + +/* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions * have been ordered and callbacks run before we are woken here, hence @@ -827,13 +870,6 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(ulf); - /* - * For external log devices, we need to flush the data device cache - * first to ensure all metadata writeback is on stable storage before we - * stamp the tail LSN into the unmount record. - */ - if (log->l_targ != log->l_mp->m_ddev_targp) - blkdev_issue_flush(log->l_targ->bt_bdev); return xlog_write(log, &vec, ticket, NULL, NULL, XLOG_UNMOUNT_TRANS); } @@ -865,18 +901,7 @@ out_err: spin_lock(&log->l_icloglock); iclog = log->l_iclog; - atomic_inc(&iclog->ic_refcnt); - if (iclog->ic_state == XLOG_STATE_ACTIVE) - xlog_state_switch_iclogs(log, iclog, 0); - else - ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC || - iclog->ic_state == XLOG_STATE_IOERROR); - /* - * Ensure the journal is fully flushed and on stable storage once the - * iclog containing the unmount record is written. - */ - iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); - error = xlog_state_release_iclog(log, iclog); + error = xlog_force_iclog(iclog); xlog_wait_on_iclog(iclog); if (tic) { @@ -1796,10 +1821,20 @@ xlog_write_iclog( * metadata writeback and causing priority inversions. */ iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_IDLE; - if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) + if (iclog->ic_flags & XLOG_ICL_NEED_FLUSH) { iclog->ic_bio.bi_opf |= REQ_PREFLUSH; + /* + * For external log devices, we also need to flush the data + * device cache first to ensure all metadata writeback covered + * by the LSN in this iclog is on stable storage. This is slow, + * but it *must* complete before we issue the external log IO. + */ + if (log->l_targ != log->l_mp->m_ddev_targp) + blkdev_issue_flush(log->l_mp->m_ddev_targp->bt_bdev); + } if (iclog->ic_flags & XLOG_ICL_NEED_FUA) iclog->ic_bio.bi_opf |= REQ_FUA; + iclog->ic_flags &= ~(XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA); if (xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count)) { @@ -2310,7 +2345,7 @@ xlog_write_copy_finish( return 0; release_iclog: - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); return error; } @@ -2529,7 +2564,7 @@ next_lv: ASSERT(optype & XLOG_COMMIT_TRANS); *commit_iclog = iclog; } else { - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); } spin_unlock(&log->l_icloglock); @@ -2567,6 +2602,7 @@ xlog_state_activate_iclog( memset(iclog->ic_header.h_cycle_data, 0, sizeof(iclog->ic_header.h_cycle_data)); iclog->ic_header.h_lsn = 0; + iclog->ic_header.h_tail_lsn = 0; } /* @@ -2967,7 +3003,7 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, 0); spin_unlock(&log->l_icloglock); if (error) return error; @@ -3132,6 +3168,35 @@ xlog_state_switch_iclogs( } /* + * Force the iclog to disk and check if the iclog has been completed before + * xlog_force_iclog() returns. This can happen on synchronous (e.g. + * pmem) or fast async storage because we drop the icloglock to issue the IO. + * If completion has already occurred, tell the caller so that it can avoid an + * unnecessary wait on the iclog. + */ +static int +xlog_force_and_check_iclog( + struct xlog_in_core *iclog, + bool *completed) +{ + xfs_lsn_t lsn = be64_to_cpu(iclog->ic_header.h_lsn); + int error; + + *completed = false; + error = xlog_force_iclog(iclog); + if (error) + return error; + + /* + * If the iclog has already been completed and reused the header LSN + * will have been rewritten by completion + */ + if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + *completed = true; + return 0; +} + +/* * Write out all data in the in-core log as of this exact moment in time. * * Data may be written to the in-core log during this call. However, @@ -3165,7 +3230,6 @@ xfs_log_force( { struct xlog *log = mp->m_log; struct xlog_in_core *iclog; - xfs_lsn_t lsn; XFS_STATS_INC(mp, xs_log_force); trace_xfs_log_force(mp, 0, _RET_IP_); @@ -3193,39 +3257,33 @@ xfs_log_force( iclog = iclog->ic_prev; } else if (iclog->ic_state == XLOG_STATE_ACTIVE) { if (atomic_read(&iclog->ic_refcnt) == 0) { - /* - * We are the only one with access to this iclog. - * - * Flush it out now. There should be a roundoff of zero - * to show that someone has already taken care of the - * roundoff from the previous sync. - */ - atomic_inc(&iclog->ic_refcnt); - lsn = be64_to_cpu(iclog->ic_header.h_lsn); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + /* We have exclusive access to this iclog. */ + bool completed; + + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; - if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) + if (completed) goto out_unlock; } else { /* - * Someone else is writing to this iclog. - * - * Use its call to flush out the data. However, the - * other thread may not force out this LR, so we mark - * it WANT_SYNC. + * Someone else is still writing to this iclog, so we + * need to ensure that when they release the iclog it + * gets synced immediately as we may be waiting on it. */ xlog_state_switch_iclogs(log, iclog, 0); } - } else { - /* - * If the head iclog is not active nor dirty, we just attach - * ourselves to the head and go to sleep if necessary. - */ - ; } + /* + * The iclog we are about to wait on may contain the checkpoint pushed + * by the above xlog_cil_force() call, but it may not have been pushed + * to disk yet. Like the ACTIVE case above, we need to make sure caches + * are flushed when this iclog is written. + */ + if (iclog->ic_state == XLOG_STATE_WANT_SYNC) + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + if (flags & XFS_LOG_SYNC) return xlog_wait_on_iclog(iclog); out_unlock: @@ -3245,6 +3303,7 @@ xlog_force_lsn( bool already_slept) { struct xlog_in_core *iclog; + bool completed; spin_lock(&log->l_icloglock); iclog = log->l_iclog; @@ -3258,7 +3317,8 @@ xlog_force_lsn( goto out_unlock; } - if (iclog->ic_state == XLOG_STATE_ACTIVE) { + switch (iclog->ic_state) { + case XLOG_STATE_ACTIVE: /* * We sleep here if we haven't already slept (e.g. this is the * first time we've looked at the correct iclog buf) and the @@ -3281,12 +3341,31 @@ xlog_force_lsn( &log->l_icloglock); return -EAGAIN; } - atomic_inc(&iclog->ic_refcnt); - xlog_state_switch_iclogs(log, iclog, 0); - if (xlog_state_release_iclog(log, iclog)) + if (xlog_force_and_check_iclog(iclog, &completed)) goto out_error; if (log_flushed) *log_flushed = 1; + if (completed) + goto out_unlock; + break; + case XLOG_STATE_WANT_SYNC: + /* + * This iclog may contain the checkpoint pushed by the + * xlog_cil_force_seq() call, but there are other writers still + * accessing it so it hasn't been pushed to disk yet. Like the + * ACTIVE case above, we need to make sure caches are flushed + * when this iclog is written. + */ + iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; + break; + default: + /* + * The entire checkpoint was written by the CIL force and is on + * its way to disk already. It will be stable when it + * completes, so we don't need to manipulate caches here at all. + * We just need to wait for completion if necessary. + */ + break; } if (flags & XFS_LOG_SYNC) @@ -3559,10 +3638,10 @@ xlog_verify_grant_tail( STATIC void xlog_verify_tail_lsn( struct xlog *log, - struct xlog_in_core *iclog, - xfs_lsn_t tail_lsn) + struct xlog_in_core *iclog) { - int blocks; + xfs_lsn_t tail_lsn = be64_to_cpu(iclog->ic_header.h_tail_lsn); + int blocks; if (CYCLE_LSN(tail_lsn) == log->l_prev_cycle) { blocks = diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b128aaa9b870..4c44bc3786c0 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -654,8 +654,9 @@ xlog_cil_push_work( struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; struct xfs_log_vec lvhdr = { NULL }; + xfs_lsn_t preflush_tail_lsn; xfs_lsn_t commit_lsn; - xfs_lsn_t push_seq; + xfs_csn_t push_seq; struct bio bio; DECLARE_COMPLETION_ONSTACK(bdev_flush); @@ -730,7 +731,15 @@ xlog_cil_push_work( * because we hold the flush lock exclusively. Hence we can now issue * a cache flush to ensure all the completed metadata in the journal we * are about to overwrite is on stable storage. + * + * Because we are issuing this cache flush before we've written the + * tail lsn to the iclog, we can have metadata IO completions move the + * tail forwards between the completion of this flush and the iclog + * being written. In this case, we need to re-issue the cache flush + * before the iclog write. To detect whether the log tail moves, sample + * the tail LSN *before* we issue the flush. */ + preflush_tail_lsn = atomic64_read(&log->l_tail_lsn); xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev, &bdev_flush); @@ -941,7 +950,7 @@ restart: * storage. */ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA; - xlog_state_release_iclog(log, commit_iclog); + xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn); spin_unlock(&log->l_icloglock); return; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 4c41bbfa33b0..f3e79a45d60a 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -59,6 +59,16 @@ enum xlog_iclog_state { { XLOG_STATE_DIRTY, "XLOG_STATE_DIRTY" }, \ { XLOG_STATE_IOERROR, "XLOG_STATE_IOERROR" } +/* + * In core log flags + */ +#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ +#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ + +#define XLOG_ICL_STRINGS \ + { XLOG_ICL_NEED_FLUSH, "XLOG_ICL_NEED_FLUSH" }, \ + { XLOG_ICL_NEED_FUA, "XLOG_ICL_NEED_FUA" } + /* * Log ticket flags @@ -143,9 +153,6 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 -#define XLOG_ICL_NEED_FLUSH (1 << 0) /* iclog needs REQ_PREFLUSH */ -#define XLOG_ICL_NEED_FUA (1 << 1) /* iclog needs REQ_FUA */ - /* Ticket reservation region accounting */ #define XLOG_TIC_LEN_MAX 15 @@ -497,7 +504,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket, void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, + xfs_lsn_t log_tail_lsn); /* * When we crack an atomic LSN, we sample it first so that the value will not diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index f9d8d605f9b1..19260291ff8b 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3944,6 +3944,7 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __field(uint32_t, state) __field(int32_t, refcount) __field(uint32_t, offset) + __field(uint32_t, flags) __field(unsigned long long, lsn) __field(unsigned long, caller_ip) ), @@ -3952,15 +3953,17 @@ DECLARE_EVENT_CLASS(xlog_iclog_class, __entry->state = iclog->ic_state; __entry->refcount = atomic_read(&iclog->ic_refcnt); __entry->offset = iclog->ic_offset; + __entry->flags = iclog->ic_flags; __entry->lsn = be64_to_cpu(iclog->ic_header.h_lsn); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx caller %pS", + TP_printk("dev %d:%d state %s refcnt %d offset %u lsn 0x%llx flags %s caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __print_symbolic(__entry->state, XLOG_STATE_STRINGS), __entry->refcount, __entry->offset, __entry->lsn, + __print_flags(__entry->flags, "|", XLOG_ICL_STRINGS), (char *)__entry->caller_ip) ); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 1ae993fee4a5..13d93371790e 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -707,11 +707,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); * @hrv: Hardware Revision of the device, pass -1 to not check _HRV * * The caller is responsible for invoking acpi_dev_put() on the returned device. - * - * FIXME: Due to above requirement there is a window that may invalidate @adev - * and next iteration will use a dangling pointer, e.g. in the case of a - * hotplug event. That said, the caller should ensure that this will never - * happen. */ #define for_each_acpi_dev_match(adev, hid, uid, hrv) \ for (adev = acpi_dev_get_first_match_dev(hid, uid, hrv); \ @@ -725,7 +720,8 @@ static inline struct acpi_device *acpi_dev_get(struct acpi_device *adev) static inline void acpi_dev_put(struct acpi_device *adev) { - put_device(&adev->dev); + if (adev) + put_device(&adev->dev); } struct acpi_device *acpi_bus_get_acpi_device(acpi_handle handle); diff --git a/include/drm/drm_ioctl.h b/include/drm/drm_ioctl.h index 10100a4bbe2a..afb27cb6a7bd 100644 --- a/include/drm/drm_ioctl.h +++ b/include/drm/drm_ioctl.h @@ -68,6 +68,7 @@ typedef int drm_ioctl_compat_t(struct file *filp, unsigned int cmd, unsigned long arg); #define DRM_IOCTL_NR(n) _IOC_NR(n) +#define DRM_IOCTL_TYPE(n) _IOC_TYPE(n) #define DRM_MAJOR 226 /** diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3177181c4326..d3afea47ade6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -57,7 +57,7 @@ struct blk_keyslot_manager; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 5 +#define BLKCG_MAX_POLS 6 typedef void (rq_end_io_fn)(struct request *, blk_status_t); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 8b77d08d4b47..a74cd1c3bd87 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -27,19 +27,6 @@ struct task_struct; extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE]; #define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type]) -#define BPF_CGROUP_STORAGE_NEST_MAX 8 - -struct bpf_cgroup_storage_info { - struct task_struct *task; - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]; -}; - -/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks - * to use bpf cgroup storage simultaneously. - */ -DECLARE_PER_CPU(struct bpf_cgroup_storage_info, - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); - #define for_each_cgroup_storage_type(stype) \ for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++) @@ -172,44 +159,6 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type( return BPF_CGROUP_STORAGE_SHARED; } -static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage - *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) -{ - enum bpf_cgroup_storage_type stype; - int i, err = 0; - - preempt_disable(); - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL)) - continue; - - this_cpu_write(bpf_cgroup_storage_info[i].task, current); - for_each_cgroup_storage_type(stype) - this_cpu_write(bpf_cgroup_storage_info[i].storage[stype], - storage[stype]); - goto out; - } - err = -EBUSY; - WARN_ON_ONCE(1); - -out: - preempt_enable(); - return err; -} - -static inline void bpf_cgroup_storage_unset(void) -{ - int i; - - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) - continue; - - this_cpu_write(bpf_cgroup_storage_info[i].task, NULL); - return; - } -} - struct bpf_cgroup_storage * cgroup_storage_lookup(struct bpf_cgroup_storage_map *map, void *key, bool locked); @@ -487,9 +436,6 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, return -EINVAL; } -static inline int bpf_cgroup_storage_set( - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; } -static inline void bpf_cgroup_storage_unset(void) {} static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map) { return 0; } static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc( diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0edff8f5177e..c8cc09013210 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1142,38 +1142,40 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *include_prog, struct bpf_prog_array **new_array); +struct bpf_run_ctx {}; + +struct bpf_cg_run_ctx { + struct bpf_run_ctx run_ctx; + struct bpf_prog_array_item *prog_item; +}; + /* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */ #define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0) /* BPF program asks to set CN on the packet. */ #define BPF_RET_SET_CN (1 << 0) -/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY, - * if bpf_cgroup_storage_set() failed, the rest of programs - * will not execute. This should be a really rare scenario - * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of - * preemptions all between bpf_cgroup_storage_set() and - * bpf_cgroup_storage_unset() on the same cpu. - */ #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \ ({ \ struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ + struct bpf_run_ctx *old_run_ctx; \ + struct bpf_cg_run_ctx run_ctx; \ u32 _ret = 1; \ u32 func_ret; \ migrate_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ _item = &_array->items[0]; \ + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); \ while ((_prog = READ_ONCE(_item->prog))) { \ - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ - break; \ + run_ctx.prog_item = _item; \ func_ret = func(_prog, ctx); \ _ret &= (func_ret & 1); \ - *(ret_flags) |= (func_ret >> 1); \ - bpf_cgroup_storage_unset(); \ + *(ret_flags) |= (func_ret >> 1); \ _item++; \ } \ + bpf_reset_run_ctx(old_run_ctx); \ rcu_read_unlock(); \ migrate_enable(); \ _ret; \ @@ -1184,6 +1186,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ struct bpf_prog_array *_array; \ + struct bpf_run_ctx *old_run_ctx; \ + struct bpf_cg_run_ctx run_ctx; \ u32 _ret = 1; \ migrate_disable(); \ rcu_read_lock(); \ @@ -1191,17 +1195,13 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, if (unlikely(check_non_null && !_array))\ goto _out; \ _item = &_array->items[0]; \ - while ((_prog = READ_ONCE(_item->prog))) { \ - if (!set_cg_storage) { \ - _ret &= func(_prog, ctx); \ - } else { \ - if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage))) \ - break; \ - _ret &= func(_prog, ctx); \ - bpf_cgroup_storage_unset(); \ - } \ + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);\ + while ((_prog = READ_ONCE(_item->prog))) { \ + run_ctx.prog_item = _item; \ + _ret &= func(_prog, ctx); \ _item++; \ } \ + bpf_reset_run_ctx(old_run_ctx); \ _out: \ rcu_read_unlock(); \ migrate_enable(); \ @@ -1284,6 +1284,20 @@ static inline void bpf_enable_instrumentation(void) migrate_enable(); } +static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx) +{ + struct bpf_run_ctx *old_ctx; + + old_ctx = current->bpf_ctx; + current->bpf_ctx = new_ctx; + return old_ctx; +} + +static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx) +{ + current->bpf_ctx = old_ctx; +} + extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; extern const struct file_operations bpf_iter_fops; @@ -1428,6 +1442,9 @@ typedef void (*bpf_iter_show_fdinfo_t) (const struct bpf_iter_aux_info *aux, struct seq_file *seq); typedef int (*bpf_iter_fill_link_info_t)(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info); +typedef const struct bpf_func_proto * +(*bpf_iter_get_func_proto_t)(enum bpf_func_id func_id, + const struct bpf_prog *prog); enum bpf_iter_feature { BPF_ITER_RESCHED = BIT(0), @@ -1440,6 +1457,7 @@ struct bpf_iter_reg { bpf_iter_detach_target_t detach_target; bpf_iter_show_fdinfo_t show_fdinfo; bpf_iter_fill_link_info_t fill_link_info; + bpf_iter_get_func_proto_t get_func_proto; u32 ctx_arg_info_size; u32 feature; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; @@ -1462,6 +1480,8 @@ struct bpf_iter__bpf_map_elem { int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); +const struct bpf_func_proto * +bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog); int bpf_iter_new_fd(struct bpf_link *link); bool bpf_link_is_iter(struct bpf_link *link); @@ -2036,6 +2056,8 @@ extern const struct bpf_func_proto bpf_task_storage_get_proto; extern const struct bpf_func_proto bpf_task_storage_delete_proto; extern const struct bpf_func_proto bpf_for_each_map_elem_proto; extern const struct bpf_func_proto bpf_btf_find_by_name_kind_proto; +extern const struct bpf_func_proto bpf_sk_setsockopt_proto; +extern const struct bpf_func_proto bpf_sk_getsockopt_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a9db1eae6796..ae3ac3a2018c 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -134,4 +134,5 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter) #ifdef CONFIG_NET BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns) +BPF_LINK_TYPE(BPF_LINK_TYPE_XDP, xdp) #endif diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b847e1ccd10f..5424124dbe36 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -354,8 +354,8 @@ struct bpf_insn_aux_data { }; u64 map_key_state; /* constant (32 bit) key tracking for maps */ int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int sanitize_stack_off; /* stack slot to be cleared */ u32 seen; /* this insn was processed by the verifier at env->pass_cnt */ + bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */ bool zext_dst; /* this insn zero extends dst reg */ u8 alu_state; /* used in combination with alu_limit */ @@ -429,6 +429,7 @@ struct bpf_verifier_env { u32 used_map_cnt; /* number of used maps */ u32 used_btf_cnt; /* number of used BTF objects */ u32 id_gen; /* used to generate unique reg IDs */ + bool explore_alu_limits; bool allow_ptr_leaks; bool allow_uninit_stack; bool allow_ptr_to_map_access; diff --git a/include/linux/filter.h b/include/linux/filter.h index ba36989f711a..1797e8506929 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -73,6 +73,11 @@ struct ctl_table_header; /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 +/* unused opcode to mark speculation barrier for mitigating + * Speculative Store Bypass + */ +#define BPF_NOSPEC 0xc0 + /* As per nm, we expose JITed images as text (code) section for * kallsyms. That way, tools like perf can find it to match * addresses. @@ -390,6 +395,16 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) .off = 0, \ .imm = 0 }) +/* Speculation barrier */ + +#define BPF_ST_NOSPEC() \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_NOSPEC, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0 }) + /* Internal classic blocks for direct assignment */ #define __BPF_STMT(CODE, K) \ @@ -761,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, DECLARE_BPF_DISPATCHER(xdp) +DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp); + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -768,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * under local_bh_disable(), which provides the needed RCU protection * for accessing map entries. */ - return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + + if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { + if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) + act = xdp_master_redirect(xdp); + } + + return act; } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index e2bc16300c82..6b54982fc5f3 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -141,6 +141,7 @@ extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param); +extern void fc_drop_locked(struct fs_context *fc); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/include/linux/fsl/mc.h b/include/linux/fsl/mc.h index 63b56aba925a..30ece3ae6df7 100644 --- a/include/linux/fsl/mc.h +++ b/include/linux/fsl/mc.h @@ -423,7 +423,8 @@ int __must_check fsl_mc_allocate_irqs(struct fsl_mc_device *mc_dev); void fsl_mc_free_irqs(struct fsl_mc_device *mc_dev); -struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev); +struct fsl_mc_device *fsl_mc_get_endpoint(struct fsl_mc_device *mc_dev, + u16 if_id); extern struct bus_type fsl_mc_bus_type; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 8c6e8e996c87..d9a606a9fc64 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -318,14 +318,16 @@ static inline void memcpy_to_page(struct page *page, size_t offset, VM_BUG_ON(offset + len > PAGE_SIZE); memcpy(to + offset, from, len); + flush_dcache_page(page); kunmap_local(to); } static inline void memzero_page(struct page *page, size_t offset, size_t len) { - char *addr = kmap_atomic(page); + char *addr = kmap_local_page(page); memset(addr + offset, 0, len); - kunmap_atomic(addr); + flush_dcache_page(page); + kunmap_local(addr); } #endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 21daed10322e..509e18c7e740 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -190,39 +190,4 @@ static inline clock_t br_get_ageing_time(const struct net_device *br_dev) } #endif -#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_NET_SWITCHDEV) - -int switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack); -void switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb); - -#else - -static inline int -switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack) -{ - return -EINVAL; -} - -static inline void -switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb) -{ -} -#endif - #endif diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 64ce8cd1cfaf..93c262ecbdc9 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -41,9 +41,6 @@ struct ip_sf_socklist { __be32 sl_addr[]; }; -#define IP_SFLSIZE(count) (sizeof(struct ip_sf_socklist) + \ - (count) * sizeof(__be32)) - #define IP_SFBLOCK 10 /* allocate this many at once */ /* ip_mc_socklist is real list now. Speed is not argument; diff --git a/include/linux/intel-ish-client-if.h b/include/linux/intel-ish-client-if.h index 25e2b4e80502..aee8ff4739b1 100644 --- a/include/linux/intel-ish-client-if.h +++ b/include/linux/intel-ish-client-if.h @@ -81,6 +81,8 @@ int ishtp_register_event_cb(struct ishtp_cl_device *device, /* Get the device * from ishtp device instance */ struct device *ishtp_device(struct ishtp_cl_device *cl_device); +/* wait for IPC resume */ +bool ishtp_wait_resume(struct ishtp_device *dev); /* Trace interface for clients */ ishtp_print_log ishtp_trace_callback(struct ishtp_cl_device *cl_device); /* Get device pointer of PCI device for DMA acces */ diff --git a/include/linux/memblock.h b/include/linux/memblock.h index cbf46f56d105..4a53c3ca86bd 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -209,7 +209,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range(i, p_start, p_end) \ __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_mem_range_rev - reverse iterate through memblock areas from @@ -220,7 +220,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, */ #define for_each_mem_range_rev(i, p_start, p_end) \ __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ - MEMBLOCK_NONE, p_start, p_end, NULL) + MEMBLOCK_HOTPLUG, p_start, p_end, NULL) /** * for_each_reserved_mem_range - iterate over all reserved memblock areas diff --git a/include/linux/mhi.h b/include/linux/mhi.h index beb918328eef..c493a80cb453 100644 --- a/include/linux/mhi.h +++ b/include/linux/mhi.h @@ -721,8 +721,13 @@ void mhi_device_put(struct mhi_device *mhi_dev); * host and device execution environments match and * channels are in a DISABLED state. * @mhi_dev: Device associated with the channels + * @flags: MHI channel flags */ -int mhi_prepare_for_transfer(struct mhi_device *mhi_dev); +int mhi_prepare_for_transfer(struct mhi_device *mhi_dev, + unsigned int flags); + +/* Automatically allocate and queue inbound buffers */ +#define MHI_CH_INBOUND_ALLOC_BUFS BIT(0) /** * mhi_unprepare_from_transfer - Reset UL and DL channels for data transfer. diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1efe37466969..af4dd6e9f97f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1138,6 +1138,8 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev); bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev); bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +bool mlx5_lag_is_master(struct mlx5_core_dev *dev); +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, struct net_device *slave); @@ -1145,6 +1147,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, u64 *values, int num_counters, size_t *offsets); +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type, diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index bc7db2e059eb..4ab5c1fc1270 100644 --- a/include/linux/mlx5/eswitch.h +++ b/include/linux/mlx5/eswitch.h @@ -29,11 +29,20 @@ enum { REP_LOADED, }; +enum mlx5_switchdev_event { + MLX5_SWITCHDEV_EVENT_PAIR, + MLX5_SWITCHDEV_EVENT_UNPAIR, +}; + struct mlx5_eswitch_rep; struct mlx5_eswitch_rep_ops { int (*load)(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep); void (*unload)(struct mlx5_eswitch_rep *rep); void *(*get_proto_dev)(struct mlx5_eswitch_rep *rep); + int (*event)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + enum mlx5_switchdev_event event, + void *data); }; struct mlx5_eswitch_rep_data { @@ -63,6 +72,7 @@ struct mlx5_eswitch_rep *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, void *mlx5_eswitch_uplink_get_proto_dev(struct mlx5_eswitch *esw, u8 rep_type); struct mlx5_flow_handle * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, + struct mlx5_eswitch *from_esw, struct mlx5_eswitch_rep *rep, u32 sqn); #ifdef CONFIG_MLX5_ESWITCH @@ -128,6 +138,7 @@ u32 mlx5_eswitch_get_vport_metadata_for_set(struct mlx5_eswitch *esw, u8 mlx5_eswitch_mode(struct mlx5_core_dev *dev); u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev); +struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw); #else /* CONFIG_MLX5_ESWITCH */ @@ -171,6 +182,11 @@ static inline u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev) return 0; } +static inline struct mlx5_core_dev *mlx5_eswitch_get_core_dev(struct mlx5_eswitch *esw) +{ + return NULL; +} + #endif /* CONFIG_MLX5_ESWITCH */ static inline bool is_mdev_switchdev_mode(struct mlx5_core_dev *dev) diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 77746f7e35b8..0106c67e8ccb 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -38,6 +38,8 @@ #define MLX5_FS_DEFAULT_FLOW_TAG 0x0 +#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) + enum { MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO = 1 << 16, MLX5_FLOW_CONTEXT_ACTION_ENCRYPT = 1 << 17, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 6bbae0c3bc0b..fce3cbae0b99 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1652,7 +1652,13 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 max_geneve_tlv_option_data_len[0x5]; u8 reserved_at_570[0x10]; - u8 reserved_at_580[0x33]; + u8 reserved_at_580[0xb]; + u8 log_max_dci_stream_channels[0x5]; + u8 reserved_at_590[0x3]; + u8 log_max_dci_errored_streams[0x5]; + u8 reserved_at_598[0x8]; + + u8 reserved_at_5a0[0x13]; u8 log_max_dek[0x5]; u8 reserved_at_5b8[0x4]; u8 mini_cqe_resp_stride_index[0x1]; @@ -3021,10 +3027,12 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_3c0[0x8]; u8 next_send_psn[0x18]; - u8 reserved_at_3e0[0x8]; + u8 reserved_at_3e0[0x3]; + u8 log_num_dci_stream_channels[0x5]; u8 cqn_snd[0x18]; - u8 reserved_at_400[0x8]; + u8 reserved_at_400[0x3]; + u8 log_num_dci_errored_streams[0x5]; u8 deth_sqpn[0x18]; u8 reserved_at_420[0x20]; @@ -3912,7 +3920,7 @@ struct mlx5_ifc_cqc_bits { u8 status[0x4]; u8 reserved_at_4[0x2]; u8 dbr_umem_valid[0x1]; - u8 apu_thread_cq[0x1]; + u8 apu_cq[0x1]; u8 cqe_sz[0x3]; u8 cc[0x1]; u8 reserved_at_c[0x1]; @@ -3938,8 +3946,7 @@ struct mlx5_ifc_cqc_bits { u8 cq_period[0xc]; u8 cq_max_count[0x10]; - u8 reserved_at_a0[0x18]; - u8 c_eqn[0x8]; + u8 c_eqn_or_apu_element[0x20]; u8 reserved_at_c0[0x3]; u8 log_page_size[0x5]; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 52bbd2b7cb46..7f8ee09c711f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -103,11 +103,19 @@ struct page { unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; - /** - * @dma_addr: might require a 64-bit value on - * 32-bit architectures. - */ - unsigned long dma_addr[2]; + unsigned long dma_addr; + union { + /** + * dma_addr_upper: might require a 64-bit + * value on 32-bit architectures. + */ + unsigned long dma_addr_upper; + /** + * For frag page support, not supported in + * 32-bit architectures with 64-bit DMA. + */ + atomic_long_t pp_frag_count; + }; }; struct { /* slab, slob and slub */ union { diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d63a94ecbf3b..bd8d5b8e2de3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -295,18 +295,6 @@ enum netdev_state_t { }; -/* - * This structure holds boot-time configured netdevice settings. They - * are then used in the device probing. - */ -struct netdev_boot_setup { - char name[IFNAMSIZ]; - struct ifmap map; -}; -#define NETDEV_BOOT_SETUP_MAX 8 - -int __init netdev_boot_setup(char *str); - struct gro_list { struct list_head list; int count; @@ -1330,6 +1318,9 @@ struct netdev_net_notifier { * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, + * struct xdp_buff *xdp); + * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific @@ -1557,6 +1548,8 @@ struct net_device_ops { int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); + struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev, + struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); @@ -2939,7 +2932,6 @@ static inline struct net_device *first_net_device_rcu(struct net *net) } int netdev_boot_setup_check(struct net_device *dev); -unsigned long netdev_boot_base(const char *prefix, int unit); struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type, const char *hwaddr); struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type); @@ -3929,6 +3921,8 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev, return 0; } #endif +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq); static inline struct netdev_rx_queue * __netif_get_rx_queue(struct net_device *dev, unsigned int rxq) @@ -4087,6 +4081,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); @@ -4154,11 +4149,13 @@ void netdev_run_todo(void); */ static inline void dev_put(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_dec(*dev->pcpu_refcnt); + this_cpu_dec(*dev->pcpu_refcnt); #else - refcount_dec(&dev->dev_refcnt); + refcount_dec(&dev->dev_refcnt); #endif + } } /** @@ -4169,11 +4166,13 @@ static inline void dev_put(struct net_device *dev) */ static inline void dev_hold(struct net_device *dev) { + if (dev) { #ifdef CONFIG_PCPU_DEV_REFCNT - this_cpu_inc(*dev->pcpu_refcnt); + this_cpu_inc(*dev->pcpu_refcnt); #else - refcount_inc(&dev->dev_refcnt); + refcount_inc(&dev->dev_refcnt); #endif + } } /* Carrier loss detection, dial on demand. The functions netif_carrier_on diff --git a/include/linux/sched.h b/include/linux/sched.h index ec8d07d88641..c64119aa2e60 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -42,6 +42,7 @@ struct backing_dev_info; struct bio_list; struct blk_plug; struct bpf_local_storage; +struct bpf_run_ctx; struct capture_control; struct cfs_rq; struct fs_struct; @@ -1379,6 +1380,8 @@ struct task_struct { #ifdef CONFIG_BPF_SYSCALL /* Used by BPF task local storage */ struct bpf_local_storage __rcu *bpf_storage; + /* Used for BPF run context */ + struct bpf_run_ctx *bpf_ctx; #endif #ifdef CONFIG_GCC_PLUGIN_STACKLEAK diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2bcdc8cd38be..6bdb0db3e825 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1183,6 +1183,7 @@ static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); +struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, @@ -4711,11 +4712,9 @@ static inline u64 skb_get_kcov_handle(struct sk_buff *skb) } #ifdef CONFIG_PAGE_POOL -static inline void skb_mark_for_recycle(struct sk_buff *skb, struct page *page, - struct page_pool *pp) +static inline void skb_mark_for_recycle(struct sk_buff *skb) { skb->pp_recycle = 1; - page_pool_store_mem_info(page, pp); } #endif diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index 96f319099744..14ab0c0bc924 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -285,11 +285,45 @@ static inline struct sk_psock *sk_psock(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static inline void sk_psock_set_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + set_bit(bit, &psock->state); +} + +static inline void sk_psock_clear_state(struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + clear_bit(bit, &psock->state); +} + +static inline bool sk_psock_test_state(const struct sk_psock *psock, + enum sk_psock_state_bits bit) +{ + return test_bit(bit, &psock->state); +} + +static inline void sock_drop(struct sock *sk, struct sk_buff *skb) +{ + sk_drops_add(sk, skb); + kfree_skb(skb); +} + +static inline void drop_sk_msg(struct sk_psock *psock, struct sk_msg *msg) +{ + if (msg->skb) + sock_drop(psock->sk, msg->skb); + kfree(msg); +} + static inline void sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { spin_lock_bh(&psock->ingress_lock); - list_add_tail(&msg->list, &psock->ingress_msg); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + list_add_tail(&msg->list, &psock->ingress_msg); + else + drop_sk_msg(psock, msg); spin_unlock_bh(&psock->ingress_lock); } @@ -406,24 +440,6 @@ static inline void sk_psock_restore_proto(struct sock *sk, psock->psock_update_sk_prot(sk, psock, true); } -static inline void sk_psock_set_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - set_bit(bit, &psock->state); -} - -static inline void sk_psock_clear_state(struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - clear_bit(bit, &psock->state); -} - -static inline bool sk_psock_test_state(const struct sk_psock *psock, - enum sk_psock_state_bits bit) -{ - return test_bit(bit, &psock->state); -} - static inline struct sk_psock *sk_psock_get(struct sock *sk) { struct sk_psock *psock; diff --git a/include/net/Space.h b/include/net/Space.h index 9cce0d80d37a..08ca9cef0213 100644 --- a/include/net/Space.h +++ b/include/net/Space.h @@ -8,23 +8,13 @@ struct net_device *ultra_probe(int unit); struct net_device *wd_probe(int unit); struct net_device *ne_probe(int unit); struct net_device *fmv18x_probe(int unit); -struct net_device *i82596_probe(int unit); struct net_device *ni65_probe(int unit); struct net_device *sonic_probe(int unit); struct net_device *smc_init(int unit); -struct net_device *atarilance_probe(int unit); -struct net_device *sun3lance_probe(int unit); -struct net_device *sun3_82586_probe(int unit); -struct net_device *apne_probe(int unit); struct net_device *cs89x0_probe(int unit); -struct net_device *mvme147lance_probe(int unit); struct net_device *tc515_probe(int unit); struct net_device *lance_probe(int unit); struct net_device *cops_probe(int unit); -struct net_device *ltpc_probe(void); /* Fibre Channel adapters */ int iph5526_probe(struct net_device *dev); - -/* SBNI adapters */ -int sbni_probe(int unit); diff --git a/include/net/act_api.h b/include/net/act_api.h index 086b291e9530..f19f7f4a463c 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -58,6 +58,14 @@ struct tc_action { #define TCA_ACT_HW_STATS_ANY (TCA_ACT_HW_STATS_IMMEDIATE | \ TCA_ACT_HW_STATS_DELAYED) +/* Reserve 16 bits for user-space. See TCA_ACT_FLAGS_NO_PERCPU_STATS. */ +#define TCA_ACT_FLAGS_USER_BITS 16 +#define TCA_ACT_FLAGS_USER_MASK 0xffff +#define TCA_ACT_FLAGS_POLICE (1U << TCA_ACT_FLAGS_USER_BITS) +#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1)) +#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) +#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) + /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ @@ -99,8 +107,8 @@ struct tc_action_ops { void (*cleanup)(struct tc_action *); int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **act, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **act, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, @@ -179,18 +187,16 @@ int tcf_action_destroy(struct tc_action *actions[], int bind); int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, int bind, + struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, - bool rtnl_held, struct netlink_ext_ack *extack); -struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + u32 flags, struct netlink_ext_ack *extack); +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct tc_action_ops *a_o, int *init_res, - bool rtnl_held, - struct netlink_ext_ack *extack); + u32 flags, struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 435a2c3d5a6f..4757d7f53f13 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -70,6 +70,9 @@ struct unix_sock { struct socket_wq peer_wq; wait_queue_entry_t peer_wake; struct scm_stat scm_stat; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + struct sk_buff *oob_skb; +#endif }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/include/net/ax88796.h b/include/net/ax88796.h index aa52b2e8ff7b..2ed23a368602 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -38,4 +38,7 @@ struct ax_plat_data { int (*check_irq)(struct platform_device *pdev); }; +/* exported from ax88796.c for xsurf100.c */ +extern void ax_NS8390_reinit(struct net_device *dev); + #endif /* __NET_AX88796_PLAT_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index a53e94459ecd..db4312e44d47 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1230,6 +1230,7 @@ struct hci_dev *hci_alloc_dev(void); void hci_free_dev(struct hci_dev *hdev); int hci_register_dev(struct hci_dev *hdev); void hci_unregister_dev(struct hci_dev *hdev); +void hci_cleanup_dev(struct hci_dev *hdev); int hci_suspend_dev(struct hci_dev *hdev); int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c8696a230b7d..38785d48baff 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -303,6 +303,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond, int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave); int bond_3ad_set_carrier(struct bonding *bond); +void bond_3ad_update_lacp_active(struct bonding *bond); void bond_3ad_update_lacp_rate(struct bonding *bond); void bond_3ad_update_ad_actor_settings(struct bonding *bond); int bond_3ad_stats_fill(struct sk_buff *skb, struct bond_3ad_stats *stats); diff --git a/include/net/bond_options.h b/include/net/bond_options.h index 9d382f2f0bc5..e64833a674eb 100644 --- a/include/net/bond_options.h +++ b/include/net/bond_options.h @@ -64,6 +64,7 @@ enum { BOND_OPT_AD_USER_PORT_KEY, BOND_OPT_NUM_PEER_NOTIF_ALIAS, BOND_OPT_PEER_NOTIF_DELAY, + BOND_OPT_LACP_ACTIVE, BOND_OPT_LAST }; diff --git a/include/net/bonding.h b/include/net/bonding.h index 625d9c72dee3..9f3fdc180c6c 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -129,6 +129,7 @@ struct bond_params { int updelay; int downdelay; int peer_notif_delay; + int lacp_active; int lacp_fast; unsigned int min_links; int ad_select; @@ -258,6 +259,7 @@ struct bonding { /* protecting ipsec_list */ spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ + struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ diff --git a/include/net/compat.h b/include/net/compat.h index 84805bdc4435..595fee069b82 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -71,13 +71,26 @@ struct compat_group_source_req { } __packed; struct compat_group_filter { - __u32 gf_interface; - struct __kernel_sockaddr_storage gf_group - __aligned(4); - __u32 gf_fmode; - __u32 gf_numsrc; - struct __kernel_sockaddr_storage gf_slist[1] - __aligned(4); + union { + struct { + __u32 gf_interface_aux; + struct __kernel_sockaddr_storage gf_group_aux + __aligned(4); + __u32 gf_fmode_aux; + __u32 gf_numsrc_aux; + struct __kernel_sockaddr_storage gf_slist[1] + __aligned(4); + } __packed; + struct { + __u32 gf_interface; + struct __kernel_sockaddr_storage gf_group + __aligned(4); + __u32 gf_fmode; + __u32 gf_numsrc; + struct __kernel_sockaddr_storage gf_slist_flex[] + __aligned(4); + } __packed; + }; } __packed; #endif /* NET_COMPAT_H */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 08f4c6191e72..0236c77f2fd0 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1396,8 +1396,8 @@ struct devlink_ops { * * Note: @extack can be NULL when port notifier queries the port function. */ - int (*port_function_hw_addr_get)(struct devlink *devlink, struct devlink_port *port, - u8 *hw_addr, int *hw_addr_len, + int (*port_function_hw_addr_get)(struct devlink_port *port, u8 *hw_addr, + int *hw_addr_len, struct netlink_ext_ack *extack); /** * @port_function_hw_addr_set: Port function's hardware address set function. @@ -1406,7 +1406,7 @@ struct devlink_ops { * by the devlink port. Driver should return -EOPNOTSUPP if it doesn't support port * function handling for a particular port. */ - int (*port_function_hw_addr_set)(struct devlink *devlink, struct devlink_port *port, + int (*port_function_hw_addr_set)(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** @@ -1462,8 +1462,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_get)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_get)(struct devlink_port *port, enum devlink_port_fn_state *state, enum devlink_port_fn_opstate *opstate, struct netlink_ext_ack *extack); @@ -1478,8 +1477,7 @@ struct devlink_ops { * * Return: 0 on success, negative value otherwise. */ - int (*port_fn_state_set)(struct devlink *devlink, - struct devlink_port *port, + int (*port_fn_state_set)(struct devlink_port *port, enum devlink_port_fn_state state, struct netlink_ext_ack *extack); @@ -1546,13 +1544,15 @@ struct net *devlink_net(const struct devlink *devlink); * Drivers that operate on real HW must use devlink_alloc() instead. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net); + size_t priv_size, struct net *net, + struct device *dev); static inline struct devlink *devlink_alloc(const struct devlink_ops *ops, - size_t priv_size) + size_t priv_size, + struct device *dev) { - return devlink_alloc_ns(ops, priv_size, &init_net); + return devlink_alloc_ns(ops, priv_size, &init_net, dev); } -int devlink_register(struct devlink *devlink, struct device *dev); +int devlink_register(struct devlink *devlink); void devlink_unregister(struct devlink *devlink); void devlink_reload_enable(struct devlink *devlink); void devlink_reload_disable(struct devlink *devlink); diff --git a/include/net/dsa.h b/include/net/dsa.h index 2af6ee2f2bfb..0c2cba45fa79 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -79,13 +79,11 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, }; -struct packet_type; struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); unsigned int needed_headroom; @@ -239,8 +237,7 @@ struct dsa_port { /* Copies for faster access in master receive hot path */ struct dsa_switch_tree *dst; - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); enum { DSA_PORT_TYPE_UNUSED = 0, @@ -257,6 +254,8 @@ struct dsa_port { struct device_node *dn; unsigned int ageing_time; bool vlan_filtering; + /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ + bool learning; u8 stp_state; struct net_device *bridge_dev; int bridge_num; @@ -717,8 +716,6 @@ struct dsa_switch_ops { int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); - int (*port_set_mrouter)(struct dsa_switch *ds, int port, bool mrouter, - struct netlink_ext_ack *extack); /* * VLAN support diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 69c9eabf8325..f3c2841566a0 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -293,7 +293,7 @@ static inline bool flow_action_has_entries(const struct flow_action *action) } /** - * flow_action_has_one_action() - check if exactly one action is present + * flow_offload_has_one_action() - check if exactly one action is present * @action: tc filter flow offload action * * Returns true if exactly one action is present. diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 71bb4cc4d05d..42235c178b06 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -82,9 +82,6 @@ struct ip6_sf_socklist { struct in6_addr sl_addr[]; }; -#define IP6_SFLSIZE(count) (sizeof(struct ip6_sf_socklist) + \ - (count) * sizeof(struct in6_addr)) - #define IP6_SFBLOCK 10 /* allocate this many at once */ struct ipv6_mc_socklist { diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ca6a3ea9057e..f72ec113ae56 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -160,6 +160,12 @@ struct inet_hashinfo { ____cacheline_aligned_in_smp; }; +#define inet_lhash2_for_each_icsk_continue(__icsk) \ + hlist_for_each_entry_continue(__icsk, icsk_listen_portaddr_node) + +#define inet_lhash2_for_each_icsk(__icsk, list) \ + hlist_for_each_entry(__icsk, list, icsk_listen_portaddr_node) + #define inet_lhash2_for_each_icsk_rcu(__icsk, list) \ hlist_for_each_entry_rcu(__icsk, list, icsk_listen_portaddr_node) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 820eae3ea95f..5efd0b71dc67 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -265,7 +265,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, static inline unsigned int ip6_skb_dst_mtu(struct sk_buff *skb) { - int mtu; + unsigned int mtu; struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index c0f0a13ed818..49aa79c7b278 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -15,9 +15,11 @@ #include <linux/if_ether.h> /* Lengths of frame formats */ -#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ -#define LLC_PDU_LEN_S 4 -#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +#define LLC_PDU_LEN_I 4 /* header and 2 control bytes */ +#define LLC_PDU_LEN_S 4 +#define LLC_PDU_LEN_U 3 /* header and 1 control byte */ +/* header and 1 control byte and XID info */ +#define LLC_PDU_LEN_U_XID (LLC_PDU_LEN_U + sizeof(struct llc_xid_info)) /* Known SAP addresses */ #define LLC_GLOBAL_SAP 0xFF #define LLC_NULL_SAP 0x00 /* not network-layer visible */ @@ -50,9 +52,10 @@ #define LLC_PDU_TYPE_U_MASK 0x03 /* 8-bit control field */ #define LLC_PDU_TYPE_MASK 0x03 -#define LLC_PDU_TYPE_I 0 /* first bit */ -#define LLC_PDU_TYPE_S 1 /* first two bits */ -#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_I 0 /* first bit */ +#define LLC_PDU_TYPE_S 1 /* first two bits */ +#define LLC_PDU_TYPE_U 3 /* first two bits */ +#define LLC_PDU_TYPE_U_XID 4 /* private type for detecting XID commands */ #define LLC_PDU_TYPE_IS_I(pdu) \ ((!(pdu->ctrl_1 & LLC_PDU_TYPE_I_MASK)) ? 1 : 0) @@ -230,9 +233,18 @@ static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) static inline void llc_pdu_header_init(struct sk_buff *skb, u8 type, u8 ssap, u8 dsap, u8 cr) { - const int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; + int hlen = 4; /* default value for I and S types */ struct llc_pdu_un *pdu; + switch (type) { + case LLC_PDU_TYPE_U: + hlen = 3; + break; + case LLC_PDU_TYPE_U_XID: + hlen = 6; + break; + } + skb_push(skb, hlen); skb_reset_network_header(skb); pdu = llc_pdu_un_hdr(skb); @@ -374,7 +386,10 @@ static inline void llc_pdu_init_as_xid_cmd(struct sk_buff *skb, xid_info->fmt_id = LLC_XID_FMT_ID; /* 0x81 */ xid_info->type = svcs_supported; xid_info->rw = rx_window << 1; /* size of receive window */ - skb_put(skb, sizeof(struct llc_xid_info)); + + /* no need to push/put since llc_pdu_header_init() has already + * pushed 3 + 3 bytes + */ } /** diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index e946366e8ba5..1f4e1816fd36 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -75,6 +75,7 @@ struct netns_xfrm { #endif spinlock_t xfrm_state_lock; seqcount_spinlock_t xfrm_state_hash_generation; + seqcount_spinlock_t xfrm_policy_hash_generation; spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 4770a81f4aa7..a964daedc17b 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -276,8 +276,8 @@ int nci_register_device(struct nci_dev *ndev); void nci_unregister_device(struct nci_dev *ndev); int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout); + const void *opt), + const void *opt, __u32 timeout); int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload); int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 3dd62dd73027..a4082406a003 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -45,7 +45,10 @@ * Please note DMA-sync-for-CPU is still * device driver responsibility */ -#define PP_FLAG_ALL (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) +#define PP_FLAG_PAGE_FRAG BIT(2) /* for page frag feature */ +#define PP_FLAG_ALL (PP_FLAG_DMA_MAP |\ + PP_FLAG_DMA_SYNC_DEV |\ + PP_FLAG_PAGE_FRAG) /* * Fast allocation side cache array/stack @@ -88,6 +91,9 @@ struct page_pool { unsigned long defer_warn; u32 pages_state_hold_cnt; + unsigned int frag_offset; + struct page *frag_page; + long frag_users; /* * Data structure for allocation side @@ -137,6 +143,18 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) return page_pool_alloc_pages(pool, gfp); } +struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, + unsigned int size, gfp_t gfp); + +static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_frag(pool, offset, size, gfp); +} + /* get the stored dma direction. A driver might decide to treat this locally and * avoid the extra cache line from page_pool to determine the direction */ @@ -198,19 +216,48 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, page_pool_put_full_page(pool, page, true); } +#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT \ + (sizeof(dma_addr_t) > sizeof(unsigned long)) + static inline dma_addr_t page_pool_get_dma_addr(struct page *page) { - dma_addr_t ret = page->dma_addr[0]; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16; + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16; + return ret; } static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { - page->dma_addr[0] = addr; - if (sizeof(dma_addr_t) > sizeof(unsigned long)) - page->dma_addr[1] = upper_32_bits(addr); + page->dma_addr = addr; + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT) + page->dma_addr_upper = upper_32_bits(addr); +} + +static inline void page_pool_set_frag_count(struct page *page, long nr) +{ + atomic_long_set(&page->pp_frag_count, nr); +} + +static inline long page_pool_atomic_sub_frag_count_return(struct page *page, + long nr) +{ + long ret; + + /* As suggested by Alexander, atomic_long_read() may cover up the + * reference count errors, so avoid calling atomic_long_read() in + * the cases of freeing or draining the page_frags, where we would + * not expect it to match or that are slowpath anyway. + */ + if (__builtin_constant_p(nr) && + atomic_long_read(&page->pp_frag_count) == nr) + return 0; + + ret = atomic_long_sub_return(nr, &page->pp_frag_count); + WARN_ON(ret < 0); + return ret; } static inline bool is_page_pool_compiled_in(void) @@ -253,11 +300,4 @@ static inline void page_pool_ring_unlock(struct page_pool *pool) spin_unlock_bh(&pool->ring.producer_lock); } -/* Store mem_info on struct page and use it while recycling skb frags */ -static inline -void page_pool_store_mem_info(struct page *page, struct page_pool *pp) -{ - page->pp = pp; -} - #endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dc28fcb6f0a2..8fb47fc61097 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -319,7 +319,7 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, - struct tcf_exts *exts, bool ovr, bool rtnl_held, + struct tcf_exts *exts, u32 flags, struct netlink_ext_ack *extack); void tcf_exts_destroy(struct tcf_exts *exts); void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src); @@ -329,6 +329,9 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts); /** * struct tcf_pkt_info - packet information + * + * @ptr: start of the pkt data + * @nexthdr: offset of the next header */ struct tcf_pkt_info { unsigned char * ptr; @@ -347,6 +350,7 @@ struct tcf_ematch_ops; * @ops: the operations lookup table of the corresponding ematch module * @datalen: length of the ematch specific configuration data * @data: ematch specific data + * @net: the network namespace */ struct tcf_ematch { struct tcf_ematch_ops * ops; diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 384e800665f2..9f48733bfd21 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -153,7 +153,8 @@ struct rtnl_af_ops { u32 ext_filter_mask); int (*validate_link_af)(const struct net_device *dev, - const struct nlattr *attr); + const struct nlattr *attr, + struct netlink_ext_ack *extack); int (*set_link_af)(struct net_device *dev, const struct nlattr *attr, struct netlink_ext_ack *extack); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9ed33e6840bd..c0069ac00e62 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -357,7 +357,7 @@ struct tcf_proto_ops { int (*change)(struct net *net, struct sk_buff *, struct tcf_proto*, unsigned long, u32 handle, struct nlattr **, - void **, bool, bool, + void **, u32, struct netlink_ext_ack *); int (*delete)(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 32fc4a309df5..651bba654d77 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -984,6 +984,7 @@ struct sctp_transport { } cacc; struct { + __u32 last_rtx_chunks; __u16 pmtu; __u16 probe_size; __u16 probe_high; @@ -1024,8 +1025,8 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu); void sctp_transport_immediate_rtx(struct sctp_transport *); void sctp_transport_dst_release(struct sctp_transport *t); void sctp_transport_dst_confirm(struct sctp_transport *t); -void sctp_transport_pl_send(struct sctp_transport *t); -void sctp_transport_pl_recv(struct sctp_transport *t); +bool sctp_transport_pl_send(struct sctp_transport *t); +bool sctp_transport_pl_recv(struct sctp_transport *t); /* This is the structure we use to queue packets as they come into diff --git a/include/net/sock.h b/include/net/sock.h index ff1be7e7e90b..6e761451c927 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -68,6 +68,7 @@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/l3mdev.h> +#include <uapi/linux/socket.h> /* * This structure really needs to be cleaned up. @@ -1438,8 +1439,6 @@ static inline int __sk_prot_rehash(struct sock *sk) #define RCV_SHUTDOWN 1 #define SEND_SHUTDOWN 2 -#define SOCK_SNDBUF_LOCK 1 -#define SOCK_RCVBUF_LOCK 2 #define SOCK_BINDADDR_LOCK 4 #define SOCK_BINDPORT_LOCK 8 diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 66468ff8cc0a..60d806b6a5ae 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -180,6 +180,14 @@ struct switchdev_obj_in_state_mrp { typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj); +struct switchdev_brport { + struct net_device *dev; + const void *ctx; + struct notifier_block *atomic_nb; + struct notifier_block *blocking_nb; + bool tx_fwd_offload; +}; + enum switchdev_notifier_type { SWITCHDEV_FDB_ADD_TO_BRIDGE = 1, SWITCHDEV_FDB_DEL_TO_BRIDGE, @@ -197,6 +205,9 @@ enum switchdev_notifier_type { SWITCHDEV_VXLAN_FDB_ADD_TO_DEVICE, SWITCHDEV_VXLAN_FDB_DEL_TO_DEVICE, SWITCHDEV_VXLAN_FDB_OFFLOADED, + + SWITCHDEV_BRPORT_OFFLOADED, + SWITCHDEV_BRPORT_UNOFFLOADED, }; struct switchdev_notifier_info { @@ -226,6 +237,11 @@ struct switchdev_notifier_port_attr_info { bool handled; }; +struct switchdev_notifier_brport_info { + struct switchdev_notifier_info info; /* must be first */ + const struct switchdev_brport brport; +}; + static inline struct net_device * switchdev_notifier_info_to_dev(const struct switchdev_notifier_info *info) { @@ -246,6 +262,17 @@ switchdev_fdb_is_dynamically_learned(const struct switchdev_notifier_fdb_info *f #ifdef CONFIG_NET_SWITCHDEV +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + void switchdev_deferred_process(void); int switchdev_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, @@ -316,6 +343,25 @@ int switchdev_handle_port_attr_set(struct net_device *dev, struct netlink_ext_ack *extack)); #else +static inline int +switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline void switchdev_deferred_process(void) { } diff --git a/include/net/tcp.h b/include/net/tcp.h index 784d5c3ef1c5..3166dc15d7d6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1958,7 +1958,6 @@ struct tcp_iter_state { struct seq_net_private p; enum tcp_seq_states state; struct sock *syn_wait_sk; - struct tcp_seq_afinfo *bpf_seq_afinfo; int bucket, offset, sbucket, num; loff_t last_pos; }; diff --git a/include/sound/soc.h b/include/sound/soc.h index 675849d07284..8e6dd8a257c5 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -712,6 +712,12 @@ struct snd_soc_dai_link { /* Do not create a PCM for this DAI link (Backend link) */ unsigned int ignore:1; + /* This flag will reorder stop sequence. By enabling this flag + * DMA controller stop sequence will be invoked first followed by + * CPU DAI driver stop sequence + */ + unsigned int stop_dma_first:1; + #ifdef CONFIG_SND_SOC_TOPOLOGY struct snd_soc_dobj dobj; /* For topology */ #endif diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index d588c244ec2f..1f0a2b4864e4 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_NETNS_COOKIE 71 +#define SO_BUF_LOCK 72 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/include/uapi/linux/can/j1939.h b/include/uapi/linux/can/j1939.h index df6e821075c1..38936460f668 100644 --- a/include/uapi/linux/can/j1939.h +++ b/include/uapi/linux/can/j1939.h @@ -78,11 +78,20 @@ enum { enum { J1939_NLA_PAD, J1939_NLA_BYTES_ACKED, + J1939_NLA_TOTAL_SIZE, + J1939_NLA_PGN, + J1939_NLA_SRC_NAME, + J1939_NLA_DEST_NAME, + J1939_NLA_SRC_ADDR, + J1939_NLA_DEST_ADDR, }; enum { J1939_EE_INFO_NONE, J1939_EE_INFO_TX_ABORT, + J1939_EE_INFO_RX_RTS, + J1939_EE_INFO_RX_DPO, + J1939_EE_INFO_RX_ABORT, }; struct j1939_filter { diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h index e33997b4d750..edc346a77c91 100644 --- a/include/uapi/linux/idxd.h +++ b/include/uapi/linux/idxd.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* SPDX-License-Identifier: LGPL-2.1 WITH Linux-syscall-note */ /* Copyright(c) 2019 Intel Corporation. All rights rsvd. */ #ifndef _USR_IDXD_H_ #define _USR_IDXD_H_ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 49b22afab78f..5310003523ce 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -855,6 +855,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index d1b327036ae4..14168225cecd 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ @@ -211,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 025c40fef93d..6836ccb9c45d 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -22,6 +22,7 @@ enum { __TCA_ACT_MAX }; +/* See other TCA_ACT_FLAGS_ * flags in include/net/act_api.h. */ #define TCA_ACT_FLAGS_NO_PERCPU_STATS 1 /* Don't use percpu allocator for * actions stats. */ diff --git a/include/uapi/linux/socket.h b/include/uapi/linux/socket.h index c3409c8ec0dd..eb0a9a5b6e71 100644 --- a/include/uapi/linux/socket.h +++ b/include/uapi/linux/socket.h @@ -26,4 +26,9 @@ struct __kernel_sockaddr_storage { }; }; +#define SOCK_SNDBUF_LOCK 1 +#define SOCK_RCVBUF_LOCK 2 + +#define SOCK_BUF_LOCK_MASK (SOCK_SNDBUF_LOCK | SOCK_RCVBUF_LOCK) + #endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index 26b638a7ad97..a7085e092d34 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB) */ +/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR Linux-OpenIB */ /* * Copyright (c) 2006 - 2021 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. diff --git a/init/main.c b/init/main.c index f5b8246e8aa1..11cbbec309fa 100644 --- a/init/main.c +++ b/init/main.c @@ -1221,7 +1221,7 @@ trace_initcall_start_cb(void *data, initcall_t fn) { ktime_t *calltime = (ktime_t *)data; - printk(KERN_DEBUG "calling %pS @ %i\n", fn, task_pid_nr(current)); + printk(KERN_DEBUG "calling %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled()); *calltime = ktime_get(); } @@ -1235,8 +1235,8 @@ trace_initcall_finish_cb(void *data, initcall_t fn, int ret) rettime = ktime_get(); delta = ktime_sub(rettime, *calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; - printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n", - fn, ret, duration); + printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n", + fn, ret, duration, irqs_disabled()); } static ktime_t initcall_calltime; diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 2d4fbdbb194e..2e9d47bb40ff 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -360,6 +360,28 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog) return supported; } +const struct bpf_func_proto * +bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + const struct bpf_iter_target_info *tinfo; + const struct bpf_func_proto *fn = NULL; + + mutex_lock(&targets_mutex); + list_for_each_entry(tinfo, &targets, list) { + if (tinfo->btf_id == prog->aux->attach_btf_id) { + const struct bpf_iter_reg *reg_info; + + reg_info = tinfo->reg_info; + if (reg_info->get_func_proto) + fn = reg_info->get_func_proto(func_id, prog); + break; + } + } + mutex_unlock(&targets_mutex); + + return fn; +} + static void bpf_iter_link_release(struct bpf_link *link) { struct bpf_iter_link *iter_link = diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7780131f710e..c395024610ed 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4825,6 +4825,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; if (ctx_arg_info->offset == off) { + if (!ctx_arg_info->btf_id) { + bpf_log(log,"invalid btf_id for context argument offset %u\n", off); + return false; + } + info->reg_type = ctx_arg_info->reg_type; info->btf = btf_vmlinux; info->btf_id = ctx_arg_info->btf_id; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9b1577498373..fe807b203a6f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -32,6 +32,8 @@ #include <linux/perf_event.h> #include <linux/extable.h> #include <linux/log2.h> + +#include <asm/barrier.h> #include <asm/unaligned.h> /* Registers */ @@ -1377,6 +1379,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, @@ -1559,7 +1562,7 @@ select_insn: if (unlikely(index >= array->map.max_entries)) goto out; - if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; @@ -1621,7 +1624,21 @@ out: COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP - /* STX and ST and LDX*/ + /* ST, STX and LDX*/ + ST_NOSPEC: + /* Speculation barrier for mitigating Speculative Store Bypass. + * In case of arm64, we rely on the firmware mitigation as + * controlled via the ssbd kernel parameter. Whenever the + * mitigation is enabled, it works for all of the kernel code + * with no need to provide any additional instructions here. + * In case of x86, we use 'lfence' insn for mitigation. We + * reuse preexisting logic from Spectre v1 mitigation that + * happens to produce the required code on x86 for v4 as well. + */ +#ifdef CONFIG_X86 + barrier_nospec(); +#endif + CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 542e94fa30b4..f02d04540c0c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); } -static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp, - int exclude_ifindex) +static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) { - if (!obj || obj->dev->ifindex == exclude_ifindex || + if (!obj || !obj->dev->netdev_ops->ndo_xdp_xmit) return false; @@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj, return 0; } +static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex) +{ + while (num_excluded--) { + if (ifindex == excluded[num_excluded]) + return true; + } + return false; +} + +/* Get ifindex of each upper device. 'indexes' must be able to hold at + * least MAX_NEST_DEV elements. + * Returns the number of ifindexes added. + */ +static int get_upper_ifindexes(struct net_device *dev, int *indexes) +{ + struct net_device *upper; + struct list_head *iter; + int n = 0; + + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + indexes[n++] = upper->ifindex; + } + return n; +} + int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct xdp_frame *xdpf; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + excluded_devices[num_excluded++] = dev_rx->ifindex; + } + xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, head = dev_map_index_hash(dtab, i); hlist_for_each_entry_rcu(dst, head, index_hlist, lockdep_is_held(&dtab->index_lock)) { - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct hlist_node *next; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev, excluded_devices); + excluded_devices[num_excluded++] = dev->ifindex; + } + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return err; last_dst = dst; + } } else { /* BPF_MAP_TYPE_DEVMAP_HASH */ for (i = 0; i < dtab->n_buckets; i++) { head = dev_map_index_hash(dtab, i); hlist_for_each_entry_safe(dst, next, head, index_hlist) { - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ diff --git a/kernel/bpf/disasm.c b/kernel/bpf/disasm.c index bbfc6bb79240..ca3cd9aaa6ce 100644 --- a/kernel/bpf/disasm.c +++ b/kernel/bpf/disasm.c @@ -206,15 +206,17 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs, verbose(cbs->private_data, "BUG_%02x\n", insn->code); } } else if (class == BPF_ST) { - if (BPF_MODE(insn->code) != BPF_MEM) { + if (BPF_MODE(insn->code) == BPF_MEM) { + verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", + insn->code, + bpf_ldst_string[BPF_SIZE(insn->code) >> 3], + insn->dst_reg, + insn->off, insn->imm); + } else if (BPF_MODE(insn->code) == 0xc0 /* BPF_NOSPEC, no UAPI */) { + verbose(cbs->private_data, "(%02x) nospec\n", insn->code); + } else { verbose(cbs->private_data, "BUG_st_%02x\n", insn->code); - return; } - verbose(cbs->private_data, "(%02x) *(%s *)(r%d %+d) = %d\n", - insn->code, - bpf_ldst_string[BPF_SIZE(insn->code) >> 3], - insn->dst_reg, - insn->off, insn->imm); } else if (class == BPF_LDX) { if (BPF_MODE(insn->code) != BPF_MEM) { verbose(cbs->private_data, "BUG_ldx_%02x\n", insn->code); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9fe846ec6bd1..15746f779fe1 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -393,8 +393,6 @@ const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto = { }; #ifdef CONFIG_CGROUP_BPF -DECLARE_PER_CPU(struct bpf_cgroup_storage_info, - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) { @@ -403,17 +401,13 @@ BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags) * verifier checks that its value is correct. */ enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); - struct bpf_cgroup_storage *storage = NULL; + struct bpf_cgroup_storage *storage; + struct bpf_cg_run_ctx *ctx; void *ptr; - int i; - for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) { - if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current)) - continue; - - storage = this_cpu_read(bpf_cgroup_storage_info[i].storage[stype]); - break; - } + /* get current cgroup storage from BPF run context */ + ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx); + storage = ctx->prog_item->cgroup_storage[stype]; if (stype == BPF_CGROUP_STORAGE_SHARED) ptr = &READ_ONCE(storage->buf)->data[0]; diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c index 95d70a08325d..035e9e3a7132 100644 --- a/kernel/bpf/local_storage.c +++ b/kernel/bpf/local_storage.c @@ -1,6 +1,7 @@ //SPDX-License-Identifier: GPL-2.0 #include <linux/bpf-cgroup.h> #include <linux/bpf.h> +#include <linux/bpf_local_storage.h> #include <linux/btf.h> #include <linux/bug.h> #include <linux/filter.h> @@ -11,9 +12,6 @@ #ifdef CONFIG_CGROUP_BPF -DEFINE_PER_CPU(struct bpf_cgroup_storage_info, - bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]); - #include "../cgroup/cgroup-internal.h" #define LOCAL_STORAGE_CREATE_FLAG_MASK \ @@ -286,9 +284,17 @@ enoent: static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) { + __u32 max_value_size = BPF_LOCAL_STORAGE_MAX_VALUE_SIZE; int numa_node = bpf_map_attr_numa_node(attr); struct bpf_cgroup_storage_map *map; + /* percpu is bound by PCPU_MIN_UNIT_SIZE, non-percu + * is the same as other local storages. + */ + if (attr->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) + max_value_size = min_t(__u32, max_value_size, + PCPU_MIN_UNIT_SIZE); + if (attr->key_size != sizeof(struct bpf_cgroup_storage_key) && attr->key_size != sizeof(__u64)) return ERR_PTR(-EINVAL); @@ -296,7 +302,7 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr) if (attr->value_size == 0) return ERR_PTR(-EINVAL); - if (attr->value_size > PAGE_SIZE) + if (attr->value_size > max_value_size) return ERR_PTR(-E2BIG); if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK || @@ -409,7 +415,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map, static void cgroup_storage_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m) { - enum bpf_cgroup_storage_type stype = cgroup_storage_type(map); + enum bpf_cgroup_storage_type stype; struct bpf_cgroup_storage *storage; int cpu; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 475c28e21128..5ea2238a6656 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2667,6 +2667,19 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, cur = env->cur_state->frame[env->cur_state->curframe]; if (value_regno >= 0) reg = &cur->regs[value_regno]; + if (!env->bypass_spec_v4) { + bool sanitize = reg && is_spillable_regtype(reg->type); + + for (i = 0; i < size; i++) { + if (state->stack[spi].slot_type[i] == STACK_INVALID) { + sanitize = true; + break; + } + } + + if (sanitize) + env->insn_aux_data[insn_idx].sanitize_stack_spill = true; + } if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) { @@ -2689,47 +2702,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, verbose(env, "invalid size of register spill\n"); return -EACCES; } - if (state != cur && reg->type == PTR_TO_STACK) { verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); return -EINVAL; } - - if (!env->bypass_spec_v4) { - bool sanitize = false; - - if (state->stack[spi].slot_type[0] == STACK_SPILL && - register_is_const(&state->stack[spi].spilled_ptr)) - sanitize = true; - for (i = 0; i < BPF_REG_SIZE; i++) - if (state->stack[spi].slot_type[i] == STACK_MISC) { - sanitize = true; - break; - } - if (sanitize) { - int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; - int soff = (-spi - 1) * BPF_REG_SIZE; - - /* detected reuse of integer stack slot with a pointer - * which means either llvm is reusing stack slot or - * an attacker is trying to exploit CVE-2018-3639 - * (speculative store bypass) - * Have to sanitize that slot with preemptive - * store of zero. - */ - if (*poff && *poff != soff) { - /* disallow programs where single insn stores - * into two different stack slots, since verifier - * cannot sanitize them - */ - verbose(env, - "insn %d cannot access two stack slots fp%d and fp%d", - insn_idx, *poff, soff); - return -EINVAL; - } - *poff = soff; - } - } save_register_state(state, spi, reg); } else { u8 type = STACK_MISC; @@ -6804,6 +6780,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + /* Limit pruning on unknown scalars to enable deep search for + * potential masking differences from other program paths. + */ + if (!off_is_imm) + env->explore_alu_limits = true; } err = update_alu_sanitation_state(aux, alu_state, alu_limit); @@ -10207,8 +10189,8 @@ next: } /* Returns true if (rold safe implies rcur safe) */ -static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct bpf_id_pair *idmap) +static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, + struct bpf_reg_state *rcur, struct bpf_id_pair *idmap) { bool equal; @@ -10234,6 +10216,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; switch (rold->type) { case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; if (rcur->type == SCALAR_VALUE) { if (!rold->precise && !rcur->precise) return true; @@ -10324,9 +10308,8 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } -static bool stacksafe(struct bpf_func_state *old, - struct bpf_func_state *cur, - struct bpf_id_pair *idmap) +static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, + struct bpf_func_state *cur, struct bpf_id_pair *idmap) { int i, spi; @@ -10371,9 +10354,8 @@ static bool stacksafe(struct bpf_func_state *old, continue; if (old->stack[spi].slot_type[0] != STACK_SPILL) continue; - if (!regsafe(&old->stack[spi].spilled_ptr, - &cur->stack[spi].spilled_ptr, - idmap)) + if (!regsafe(env, &old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, idmap)) /* when explored and current stack slot are both storing * spilled registers, check that stored pointers types * are the same as well. @@ -10430,10 +10412,11 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); for (i = 0; i < MAX_BPF_REG; i++) - if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch)) + if (!regsafe(env, &old->regs[i], &cur->regs[i], + env->idmap_scratch)) return false; - if (!stacksafe(old, cur, env->idmap_scratch)) + if (!stacksafe(env, old, cur, env->idmap_scratch)) return false; if (!refsafe(old, cur)) @@ -12198,35 +12181,33 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++, insn++) { bpf_convert_ctx_access_t convert_ctx_access; + bool ctx_access; if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) || insn->code == (BPF_LDX | BPF_MEM | BPF_W) || - insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) { type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || - insn->code == (BPF_STX | BPF_MEM | BPF_H) || - insn->code == (BPF_STX | BPF_MEM | BPF_W) || - insn->code == (BPF_STX | BPF_MEM | BPF_DW)) + ctx_access = true; + } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || + insn->code == (BPF_STX | BPF_MEM | BPF_H) || + insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW) || + insn->code == (BPF_ST | BPF_MEM | BPF_B) || + insn->code == (BPF_ST | BPF_MEM | BPF_H) || + insn->code == (BPF_ST | BPF_MEM | BPF_W) || + insn->code == (BPF_ST | BPF_MEM | BPF_DW)) { type = BPF_WRITE; - else + ctx_access = BPF_CLASS(insn->code) == BPF_STX; + } else { continue; + } if (type == BPF_WRITE && - env->insn_aux_data[i + delta].sanitize_stack_off) { + env->insn_aux_data[i + delta].sanitize_stack_spill) { struct bpf_insn patch[] = { - /* Sanitize suspicious stack slot with zero. - * There are no memory dependencies for this store, - * since it's only using frame pointer and immediate - * constant of zero - */ - BPF_ST_MEM(BPF_DW, BPF_REG_FP, - env->insn_aux_data[i + delta].sanitize_stack_off, - 0), - /* the original STX instruction will immediately - * overwrite the same stack slot with appropriate value - */ *insn, + BPF_ST_NOSPEC(), }; cnt = ARRAY_SIZE(patch); @@ -12240,6 +12221,9 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) continue; } + if (!ctx_access) + continue; + switch (env->insn_aux_data[i + delta].ptr_type) { case PTR_TO_CTX: if (!ops->convert_ctx_access) @@ -13093,37 +13077,6 @@ static void free_states(struct bpf_verifier_env *env) } } -/* The verifier is using insn_aux_data[] to store temporary data during - * verification and to store information for passes that run after the - * verification like dead code sanitization. do_check_common() for subprogram N - * may analyze many other subprograms. sanitize_insn_aux_data() clears all - * temporary data after do_check_common() finds that subprogram N cannot be - * verified independently. pass_cnt counts the number of times - * do_check_common() was run and insn->aux->seen tells the pass number - * insn_aux_data was touched. These variables are compared to clear temporary - * data from failed pass. For testing and experiments do_check_common() can be - * run multiple times even when prior attempt to verify is unsuccessful. - * - * Note that special handling is needed on !env->bypass_spec_v1 if this is - * ever called outside of error path with subsequent program rejection. - */ -static void sanitize_insn_aux_data(struct bpf_verifier_env *env) -{ - struct bpf_insn *insn = env->prog->insnsi; - struct bpf_insn_aux_data *aux; - int i, class; - - for (i = 0; i < env->prog->len; i++) { - class = BPF_CLASS(insn[i].code); - if (class != BPF_LDX && class != BPF_STX) - continue; - aux = &env->insn_aux_data[i]; - if (aux->seen != env->pass_cnt) - continue; - memset(aux, 0, offsetof(typeof(*aux), orig_idx)); - } -} - static int do_check_common(struct bpf_verifier_env *env, int subprog) { bool pop_log = !(env->log.level & BPF_LOG_LEVEL2); @@ -13200,9 +13153,6 @@ out: if (!ret && pop_log) bpf_vlog_reset(&env->log, 0); free_states(env); - if (ret) - /* clean aux data in case subprog was rejected */ - sanitize_insn_aux_data(env); return ret; } diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 8d6bf56ed77a..de2c432dee20 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -1221,9 +1221,7 @@ int cgroup1_get_tree(struct fs_context *fc) ret = cgroup_do_get_tree(fc); if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) { - struct super_block *sb = fc->root->d_sb; - dput(fc->root); - deactivate_locked_super(sb); + fc_drop_locked(fc); ret = 1; } diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 910ae69cae77..af4a6ef48ce0 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -5,6 +5,13 @@ */ #include <linux/dma-map-ops.h> +static struct page *dma_common_vaddr_to_page(void *cpu_addr) +{ + if (is_vmalloc_addr(cpu_addr)) + return vmalloc_to_page(cpu_addr); + return virt_to_page(cpu_addr); +} + /* * Create scatter-list for the already allocated DMA buffer. */ @@ -12,7 +19,7 @@ int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct page *page = virt_to_page(cpu_addr); + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret; ret = sg_alloc_table(sgt, 1, GFP_KERNEL); @@ -32,6 +39,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + struct page *page = dma_common_vaddr_to_page(cpu_addr); int ret = -ENXIO; vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); @@ -43,7 +51,7 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + page_to_pfn(page) + vma->vm_pgoff, user_count << PAGE_SHIFT, vma->vm_page_prot); #else return -ENXIO; diff --git a/kernel/fork.c b/kernel/fork.c index bc94b2cc5995..e8b41e212110 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2083,6 +2083,7 @@ static __latent_entropy struct task_struct *copy_process( #endif #ifdef CONFIG_BPF_SYSCALL RCU_INIT_POINTER(p->bpf_storage, NULL); + p->bpf_ctx = NULL; #endif /* Perform scheduler related setup. Assign this task to a CPU. */ diff --git a/kernel/smpboot.c b/kernel/smpboot.c index e4163042c4d6..cf6acab78538 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -47,7 +47,7 @@ void __init idle_thread_set_boot_cpu(void) * * Creates the thread if it does not exist. */ -static inline void idle_init(unsigned int cpu) +static __always_inline void idle_init(unsigned int cpu) { struct task_struct *tsk = per_cpu(idle_threads, cpu); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 29a5e54e6e10..517be7fd175e 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -991,6 +991,11 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) if (!p) goto out; + /* Protect timer list r/w in arm_timer() */ + sighand = lock_task_sighand(p, &flags); + if (unlikely(sighand == NULL)) + goto out; + /* * Fetch the current sample and update the timer's expiry time. */ @@ -1001,11 +1006,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer) bump_cpu_timer(timer, now); - /* Protect timer list r/w in arm_timer() */ - sighand = lock_task_sighand(p, &flags); - if (unlikely(sighand == NULL)) - goto out; - /* * Now re-arm for the new expiry time. */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3fadb58fc9d7..9eb11c2209e5 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -207,6 +207,7 @@ struct timer_base { unsigned int cpu; bool next_expiry_recalc; bool is_idle; + bool timers_pending; DECLARE_BITMAP(pending_map, WHEEL_SIZE); struct hlist_head vectors[WHEEL_SIZE]; } ____cacheline_aligned; @@ -595,6 +596,7 @@ static void enqueue_timer(struct timer_base *base, struct timer_list *timer, * can reevaluate the wheel: */ base->next_expiry = bucket_expiry; + base->timers_pending = true; base->next_expiry_recalc = false; trigger_dyntick_cpu(base, timer); } @@ -1582,6 +1584,7 @@ static unsigned long __next_timer_interrupt(struct timer_base *base) } base->next_expiry_recalc = false; + base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); return next; } @@ -1633,7 +1636,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); u64 expires = KTIME_MAX; unsigned long nextevt; - bool is_max_delta; /* * Pretend that there is no timer pending if the cpu is offline. @@ -1646,7 +1648,6 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) if (base->next_expiry_recalc) base->next_expiry = __next_timer_interrupt(base); nextevt = base->next_expiry; - is_max_delta = (nextevt == base->clk + NEXT_TIMER_MAX_DELTA); /* * We have a fresh next event. Check whether we can forward the @@ -1664,7 +1665,7 @@ u64 get_next_timer_interrupt(unsigned long basej, u64 basem) expires = basem; base->is_idle = false; } else { - if (!is_max_delta) + if (base->timers_pending) expires = basem + (u64)(nextevt - basej) * TICK_NSEC; /* * If we expect to sleep more than a tick, mark the base idle. @@ -1947,6 +1948,7 @@ int timers_prepare_cpu(unsigned int cpu) base = per_cpu_ptr(&timer_bases[b], cpu); base->clk = jiffies; base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; + base->timers_pending = false; base->is_idle = false; } return 0; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 08906007306d..c5e0b6a64091 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -965,7 +965,7 @@ BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs) { struct kprobe *kp = kprobe_running(); - return kp ? (u64) kp->addr : 0; + return kp ? (uintptr_t)kp->addr : 0; } static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = { @@ -1461,6 +1461,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) const struct bpf_func_proto * tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { + const struct bpf_func_proto *fn; + switch (func_id) { #ifdef CONFIG_NET case BPF_FUNC_skb_output: @@ -1501,7 +1503,10 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) case BPF_FUNC_d_path: return &bpf_d_path_proto; default: - return raw_tp_prog_func_proto(func_id, prog); + fn = raw_tp_prog_func_proto(func_id, prog); + if (!fn && prog->expected_attach_type == BPF_TRACE_ITER) + fn = bpf_iter_get_func_proto(func_id, prog); + return fn; } } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e6fb3e6e1ffc..7b180f61e6d3 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5985,7 +5985,8 @@ ftrace_graph_release(struct inode *inode, struct file *file) * infrastructure to do the synchronization, thus we must do it * ourselves. */ - synchronize_rcu_tasks_rude(); + if (old_hash != EMPTY_HASH) + synchronize_rcu_tasks_rude(); free_ftrace_hash(old_hash); } @@ -7544,7 +7545,7 @@ int ftrace_is_dead(void) */ int register_ftrace_function(struct ftrace_ops *ops) { - int ret = -1; + int ret; ftrace_ops_init(ops); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index d1463eac11a3..e592d1df6f88 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3880,10 +3880,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) if (unlikely(!head)) return true; - return reader->read == rb_page_commit(reader) && - (commit == reader || - (commit == head && - head->read == rb_page_commit(commit))); + /* Reader should exhaust content in reader page */ + if (reader->read != rb_page_commit(reader)) + return false; + + /* + * If writers are committing on the reader page, knowing all + * committed content has been read, the ring buffer is empty. + */ + if (commit == reader) + return true; + + /* + * If writers are committing on a page other than reader page + * and head page, there should always be content to read. + */ + if (commit != head) + return false; + + /* + * Writers are committing on the head page, we just need + * to care about there're committed data, and the reader will + * swap reader page with head page when it is to read data. + */ + return rb_page_commit(commit) == 0; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index f8b80b5bab71..33899a71fdc1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5609,6 +5609,10 @@ static const char readme_msg[] = "\t [:name=histname1]\n" "\t [:<handler>.<action>]\n" "\t [if <filter>]\n\n" + "\t Note, special fields can be used as well:\n" + "\t common_timestamp - to record current timestamp\n" + "\t common_cpu - to record the CPU the event happened on\n" + "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" @@ -9131,8 +9135,10 @@ static int trace_array_create_dir(struct trace_array *tr) return -EINVAL; ret = event_trace_add_tracer(tr->dir, tr); - if (ret) + if (ret) { tracefs_remove(tr->dir); + return ret; + } init_tracer_tracefs(tr, tr->dir); __update_tracer_options(tr); diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 16a9dfc9fffc..949ef09dc537 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -65,7 +65,8 @@ C(INVALID_SORT_MODIFIER,"Invalid sort modifier"), \ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ - C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), + C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), #undef C #define C(a, b) HIST_ERR_##a @@ -1111,7 +1112,7 @@ static const char *hist_field_name(struct hist_field *field, field->flags & HIST_FIELD_FL_ALIAS) field_name = hist_field_name(field->operands[0], ++level); else if (field->flags & HIST_FIELD_FL_CPU) - field_name = "cpu"; + field_name = "common_cpu"; else if (field->flags & HIST_FIELD_FL_EXPR || field->flags & HIST_FIELD_FL_VAR_REF) { if (field->system) { @@ -1991,14 +1992,24 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, hist_data->enable_timestamps = true; if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS) hist_data->attrs->ts_in_usecs = true; - } else if (strcmp(field_name, "cpu") == 0) + } else if (strcmp(field_name, "common_cpu") == 0) *flags |= HIST_FIELD_FL_CPU; else { field = trace_find_event_field(file->event_call, field_name); if (!field || !field->size) { - hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, errpos(field_name)); - field = ERR_PTR(-EINVAL); - goto out; + /* + * For backward compatibility, if field_name + * was "cpu", then we treat this the same as + * common_cpu. + */ + if (strcmp(field_name, "cpu") == 0) { + *flags |= HIST_FIELD_FL_CPU; + } else { + hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, + errpos(field_name)); + field = ERR_PTR(-EINVAL); + goto out; + } } } out: @@ -2146,6 +2157,13 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, ret = PTR_ERR(operand1); goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + /* String type can not be the operand of unary operator. */ + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + destroy_hist_field(operand1, 0); + ret = -EINVAL; + goto free; + } expr->flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2247,6 +2265,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand1 = NULL; goto free; } + if (operand1->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); + ret = -EINVAL; + goto free; + } /* rest of string could be another expression e.g. b+c in a+b+c */ operand_flags = 0; @@ -2256,6 +2279,11 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, operand2 = NULL; goto free; } + if (operand2->flags & HIST_FIELD_FL_STRING) { + hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); + ret = -EINVAL; + goto free; + } ret = check_expr_operands(file->tr, operand1, operand2); if (ret) @@ -2277,6 +2305,10 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr->operands[0] = operand1; expr->operands[1] = operand2; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->operator = field_op; expr->name = expr_str(expr, 0); expr->type = kstrdup(operand1->type, GFP_KERNEL); @@ -5085,7 +5117,7 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) seq_printf(m, "%s=", hist_field->var.name); if (hist_field->flags & HIST_FIELD_FL_CPU) - seq_puts(m, "cpu"); + seq_puts(m, "common_cpu"); else if (field_name) { if (hist_field->flags & HIST_FIELD_FL_VAR_REF || hist_field->flags & HIST_FIELD_FL_ALIAS) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 2ac75eb6aa86..9315fc03e303 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -893,15 +893,13 @@ static struct synth_event *alloc_synth_event(const char *name, int n_fields, dyn_event_init(&event->devent, &synth_event_ops); for (i = 0, j = 0; i < n_fields; i++) { + fields[i]->field_pos = i; event->fields[i] = fields[i]; - if (fields[i]->is_dynamic) { - event->dynamic_fields[j] = fields[i]; - event->dynamic_fields[j]->field_pos = i; + if (fields[i]->is_dynamic) event->dynamic_fields[j++] = fields[i]; - event->n_dynamic_fields++; - } } + event->n_dynamic_fields = j; event->n_fields = n_fields; out: return event; diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index a6c0cdaf4b87..14f46aae1981 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -327,7 +327,7 @@ static void move_to_next_cpu(void) get_online_cpus(); cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask); - next_cpu = cpumask_next(smp_processor_id(), current_mask); + next_cpu = cpumask_next(raw_smp_processor_id(), current_mask); put_online_cpus(); if (next_cpu >= nr_cpu_ids) diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h index 6e146b959dcd..4007fe95cf42 100644 --- a/kernel/trace/trace_synth.h +++ b/kernel/trace/trace_synth.h @@ -14,10 +14,10 @@ struct synth_field { char *name; size_t size; unsigned int offset; + unsigned int field_pos; bool is_signed; bool is_string; bool is_dynamic; - bool field_pos; }; struct synth_event { diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 976bf8ce8039..fc32821f8240 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -299,8 +299,8 @@ static int tracepoint_add_func(struct tracepoint *tp, * a pointer to it. This array is referenced by __DO_TRACE from * include/linux/tracepoint.h using rcu_dereference_sched(). */ - rcu_assign_pointer(tp->funcs, tp_funcs); tracepoint_update_call(tp, tp_funcs, false); + rcu_assign_pointer(tp->funcs, tp_funcs); static_key_enable(&tp->key); release_probes(old); diff --git a/kernel/ucount.c b/kernel/ucount.c index 87799e2379bd..77be3bbe3cc4 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -160,6 +160,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; + long overflow; spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -184,8 +185,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) return new; } } + overflow = atomic_add_negative(1, &ucounts->count); spin_unlock_irq(&ucounts_lock); - ucounts = get_ucounts(ucounts); + if (overflow) { + put_ucounts(ucounts); + return NULL; + } return ucounts; } @@ -193,8 +198,7 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - if (atomic_dec_and_test(&ucounts->count)) { - spin_lock_irqsave(&ucounts_lock, flags); + if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); kfree(ucounts); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 50142fc08902..f148eacda55a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3676,15 +3676,21 @@ static void pwq_unbound_release_workfn(struct work_struct *work) unbound_release_work); struct workqueue_struct *wq = pwq->wq; struct worker_pool *pool = pwq->pool; - bool is_last; + bool is_last = false; - if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) - return; + /* + * when @pwq is not linked, it doesn't hold any reference to the + * @wq, and @wq is invalid to access. + */ + if (!list_empty(&pwq->pwqs_node)) { + if (WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND))) + return; - mutex_lock(&wq->mutex); - list_del_rcu(&pwq->pwqs_node); - is_last = list_empty(&wq->pwqs); - mutex_unlock(&wq->mutex); + mutex_lock(&wq->mutex); + list_del_rcu(&pwq->pwqs_node); + is_last = list_empty(&wq->pwqs); + mutex_unlock(&wq->mutex); + } mutex_lock(&wq_pool_mutex); put_unbound_pool(pool); diff --git a/lib/Kconfig b/lib/Kconfig index d241fe476fda..5c9c0687f76d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -683,9 +683,6 @@ config PARMAN config OBJAGG tristate "objagg" if COMPILE_TEST -config STRING_SELFTEST - tristate "Test string functions" - endmenu config GENERIC_IOREMAP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 831212722924..5ddd575159fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2180,6 +2180,9 @@ config ASYNC_RAID6_TEST config TEST_HEXDUMP tristate "Test functions located in the hexdump module at runtime" +config STRING_SELFTEST + tristate "Test string functions at runtime" + config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" diff --git a/lib/test_bpf.c b/lib/test_bpf.c index d500320778c7..44d8197bbffb 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self) return __bpf_fill_stxdw(self, BPF_DW); } +static int bpf_fill_long_jmp(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1); + insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1); + + /* + * Fill with a complex 64-bit operation that expands to a lot of + * instructions on 32-bit JITs. The large jump offset can then + * overflow the conditional branch field size, triggering a branch + * conversion mechanism in some JITs. + * + * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch + * conversion on the 32-bit MIPS JIT. For other JITs, the instruction + * count and/or operation may need to be modified to trigger the + * branch conversion. + */ + for (i = 2; i < len - 1; i++) + insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i); + + insn[len - 1] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -1917,6 +1952,163 @@ static struct bpf_test tests[] = { { { 0, -1 } } }, { + /* + * Register (non-)clobbering test, in the case where a 32-bit + * JIT implements complex ALU64 operations via function calls. + * If so, the function call must be invisible in the eBPF + * registers. The JIT must then save and restore relevant + * registers during the call. The following tests check that + * the eBPF registers retain their values after such a call. + */ + "INT: Register clobbering, R1 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 123456789), + BPF_ALU32_IMM(BPF_MOV, R2, 2), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R1, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: Register clobbering, R2 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R2, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + /* + * Test 32-bit JITs that implement complex ALU64 operations as + * function calls R0 = f(R1, R2), and must re-arrange operands. + */ +#define NUMER 0xfedcba9876543210ULL +#define DENOM 0x0123456789abcdefULL + "ALU64_DIV X: Operand register permutations", + .u.insns_int = { + /* R0 / R2 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R2), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R0 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R0), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R0 / R1 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R0 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R0), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R1 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R1), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R2 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R2), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R1 */ + BPF_LD_IMM64(R1, NUMER), + BPF_ALU64_REG(BPF_DIV, R1, R1), + BPF_JMP_IMM(BPF_JEQ, R1, 1, 1), + BPF_EXIT_INSN(), + /* R2 / R2 */ + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R2), + BPF_JMP_IMM(BPF_JEQ, R2, 1, 1), + BPF_EXIT_INSN(), + /* R3 / R4 */ + BPF_LD_IMM64(R3, NUMER), + BPF_LD_IMM64(R4, DENOM), + BPF_ALU64_REG(BPF_DIV, R3, R4), + BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* Successful return */ + BPF_LD_IMM64(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, +#undef NUMER +#undef DENOM + }, +#ifdef CONFIG_32BIT + { + "INT: 32-bit context pointer word order and zero-extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_JMP32_IMM(BPF_JNE, R1, 0, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, +#endif + { "check: missing ret", .u.insns = { BPF_STMT(BPF_LD | BPF_IMM, 1), @@ -2361,6 +2553,48 @@ static struct bpf_test tests[] = { { { 0, 0x1 } }, }, { + "ALU_MOV_K: small negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "ALU_MOV_K: small negative zero extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU_MOV_K: large negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123456789), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123456789 } } + }, + { + "ALU_MOV_K: large negative zero extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123456789), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { "ALU64_MOV_K: dst = 2", .u.insns_int = { BPF_ALU64_IMM(BPF_MOV, R0, 2), @@ -2412,6 +2646,48 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_MOV_K: small negative", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "ALU64_MOV_K: small negative sign extension", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } } + }, + { + "ALU64_MOV_K: large negative", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123456789), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123456789 } } + }, + { + "ALU64_MOV_K: large negative sign extension", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123456789), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } } + }, /* BPF_ALU | BPF_ADD | BPF_X */ { "ALU_ADD_X: 1 + 2 = 3", @@ -2967,6 +3243,31 @@ static struct bpf_test tests[] = { { }, { { 0, 2147483647 } }, }, + { + "ALU64_MUL_X: 64x64 multiply, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0fedcba987654321LL), + BPF_LD_IMM64(R1, 0x123456789abcdef0LL), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xe5618cf0 } } + }, + { + "ALU64_MUL_X: 64x64 multiply, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0fedcba987654321LL), + BPF_LD_IMM64(R1, 0x123456789abcdef0LL), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x2236d88f } } + }, /* BPF_ALU | BPF_MUL | BPF_K */ { "ALU_MUL_K: 2 * 3 = 6", @@ -3077,6 +3378,29 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_MUL_K: 64x32 multiply, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xe242d208 } } + }, + { + "ALU64_MUL_K: 64x32 multiply, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xc28f5c28 } } + }, /* BPF_ALU | BPF_DIV | BPF_X */ { "ALU_DIV_X: 6 / 2 = 3", @@ -3431,6 +3755,44 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { + "ALU_AND_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_AND, R0, 15), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4 } } + }, + { + "ALU_AND_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4), + BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xa1b2c3d4 } } + }, + { + "ALU_AND_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL), + BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { "ALU64_AND_K: 3 & 2 = 2", .u.insns_int = { BPF_LD_IMM64(R0, 3), @@ -3453,7 +3815,7 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { - "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000000000000000LL), @@ -3469,7 +3831,7 @@ static struct bpf_test tests[] = { { { 0, 0x1 } }, }, { - "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), @@ -3500,6 +3862,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_AND_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000090b0d0fLL), + BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_AND_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL), + BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_OR | BPF_X */ { "ALU_OR_X: 1 | 2 = 3", @@ -3573,6 +3967,44 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { + "ALU_OR_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_OR, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01020305 } } + }, + { + "ALU_OR_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xa1b2c3d4 } } + }, + { + "ALU_OR_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL), + BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { "ALU64_OR_K: 1 | 2 = 3", .u.insns_int = { BPF_LD_IMM64(R0, 1), @@ -3595,7 +4027,7 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { - "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), @@ -3642,6 +4074,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_OR_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x012345678fafcfefLL), + BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_OR_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL), + BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_XOR | BPF_X */ { "ALU_XOR_X: 5 ^ 6 = 3", @@ -3715,6 +4179,44 @@ static struct bpf_test tests[] = { { { 0, 0xfffffffe } }, }, { + "ALU_XOR_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_XOR, R0, 15), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x0102030b } } + }, + { + "ALU_XOR_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4), + BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x5e4d3c2b } } + }, + { + "ALU_XOR_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000795b3d1fLL), + BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { "ALU64_XOR_K: 5 ^ 6 = 3", .u.insns_int = { BPF_LD_IMM64(R0, 5), @@ -3726,7 +4228,7 @@ static struct bpf_test tests[] = { { { 0, 3 } }, }, { - "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", .u.insns_int = { BPF_LD_IMM64(R0, 1), BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), @@ -3784,6 +4286,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_XOR_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL), + BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_XOR_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL), + BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_LSH | BPF_X */ { "ALU_LSH_X: 1 << 1 = 2", @@ -3810,6 +4344,18 @@ static struct bpf_test tests[] = { { { 0, 0x80000000 } }, }, { + "ALU_LSH_X: 0x12345678 << 12 = 0x45678000", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x45678000 } } + }, + { "ALU64_LSH_X: 1 << 1 = 2", .u.insns_int = { BPF_LD_IMM64(R0, 1), @@ -3833,6 +4379,106 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU64_LSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xbcdef000 } } + }, + { + "ALU64_LSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x3456789a } } + }, + { + "ALU64_LSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x9abcdef0 } } + }, + { + "ALU64_LSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } } + }, /* BPF_ALU | BPF_LSH | BPF_K */ { "ALU_LSH_K: 1 << 1 = 2", @@ -3857,6 +4503,28 @@ static struct bpf_test tests[] = { { { 0, 0x80000000 } }, }, { + "ALU_LSH_K: 0x12345678 << 12 = 0x45678000", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_LSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x45678000 } } + }, + { + "ALU_LSH_K: 0x12345678 << 0 = 0x12345678", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_LSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12345678 } } + }, + { "ALU64_LSH_K: 1 << 1 = 2", .u.insns_int = { BPF_LD_IMM64(R0, 1), @@ -3878,6 +4546,86 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU64_LSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xbcdef000 } } + }, + { + "ALU64_LSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x3456789a } } + }, + { + "ALU64_LSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x9abcdef0 } } + }, + { + "ALU64_LSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_K: Zero shift", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_RSH | BPF_X */ { "ALU_RSH_X: 2 >> 1 = 1", @@ -3904,6 +4652,18 @@ static struct bpf_test tests[] = { { { 0, 1 } }, }, { + "ALU_RSH_X: 0x12345678 >> 20 = 0x123", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_MOV, R1, 20), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x123 } } + }, + { "ALU64_RSH_X: 2 >> 1 = 1", .u.insns_int = { BPF_LD_IMM64(R0, 2), @@ -3927,6 +4687,106 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU64_RSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_RSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x00081234 } } + }, + { + "ALU64_RSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x08123456 } } + }, + { + "ALU64_RSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_RSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_RSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, /* BPF_ALU | BPF_RSH | BPF_K */ { "ALU_RSH_K: 2 >> 1 = 1", @@ -3951,6 +4811,28 @@ static struct bpf_test tests[] = { { { 0, 1 } }, }, { + "ALU_RSH_K: 0x12345678 >> 20 = 0x123", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_RSH, R0, 20), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x123 } } + }, + { + "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_RSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12345678 } } + }, + { "ALU64_RSH_K: 2 >> 1 = 1", .u.insns_int = { BPF_LD_IMM64(R0, 2), @@ -3972,9 +4854,101 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU64_RSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_RSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x00081234 } } + }, + { + "ALU64_RSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x08123456 } } + }, + { + "ALU64_RSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_RSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_K: Zero shift", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_ARSH | BPF_X */ { - "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + "ALU32_ARSH_X: -1234 >> 7 = -10", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_MOV, R1, 7), + BPF_ALU32_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -10 } } + }, + { + "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { BPF_LD_IMM64(R0, 0xff00ff0000000000LL), BPF_ALU32_IMM(BPF_MOV, R1, 40), @@ -3985,9 +4959,131 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffff00ff } }, }, + { + "ALU64_ARSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_ARSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfff81234 } } + }, + { + "ALU64_ARSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xf8123456 } } + }, + { + "ALU64_ARSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_ARSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_ARSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, /* BPF_ALU | BPF_ARSH | BPF_K */ { - "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + "ALU32_ARSH_K: -1234 >> 7 = -10", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_ARSH, R0, 7), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -10 } } + }, + { + "ALU32_ARSH_K: -1234 >> 0 = -1234", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_ARSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1234 } } + }, + { + "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { BPF_LD_IMM64(R0, 0xff00ff0000000000LL), BPF_ALU64_IMM(BPF_ARSH, R0, 40), @@ -3997,6 +5093,86 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffff00ff } }, }, + { + "ALU64_ARSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_ARSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfff81234 } } + }, + { + "ALU64_ARSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xf8123456 } } + }, + { + "ALU64_ARSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xf123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_ARSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_K: Zero shoft", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_NEG */ { "ALU_NEG: -(3) = -3", @@ -4286,8 +5462,8 @@ static struct bpf_test tests[] = { .u.insns_int = { BPF_LD_IMM64(R0, 0), BPF_LD_IMM64(R1, 0xffffffffffffffffLL), - BPF_STX_MEM(BPF_W, R10, R1, -40), - BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, @@ -4295,80 +5471,346 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, .stack_depth = 40, }, + { + "STX_MEM_DW: Store double word: first word in memory", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0x0123456789abcdefLL), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, +#ifdef __BIG_ENDIAN + { { 0, 0x01234567 } }, +#else + { { 0, 0x89abcdef } }, +#endif + .stack_depth = 40, + }, + { + "STX_MEM_DW: Store double word: second word in memory", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0x0123456789abcdefLL), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, +#ifdef __BIG_ENDIAN + { { 0, 0x89abcdef } }, +#else + { { 0, 0x01234567 } }, +#endif + .stack_depth = 40, + }, /* BPF_STX | BPF_ATOMIC | BPF_W/DW */ { - "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", + "STX_XADD_W: X + 1 + 1 + 1 + ...", + { }, + INTERNAL, + { }, + { { 0, 4134 } }, + .fill_helper = bpf_fill_stxw, + }, + { + "STX_XADD_DW: X + 1 + 1 + 1 + ...", + { }, + INTERNAL, + { }, + { { 0, 4134 } }, + .fill_helper = bpf_fill_stxdw, + }, + /* + * Exhaustive tests of atomic operation variants. + * Individual tests are expanded from template macros for all + * combinations of ALU operation, word size and fetching. + */ +#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU32_IMM(BPF_MOV, R5, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R5, -40), \ + BPF_LDX_MEM(width, R0, R10, -40), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, result } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU64_REG(BPF_MOV, R1, R10), \ + BPF_ALU32_IMM(BPF_MOV, R0, update), \ + BPF_ST_MEM(BPF_W, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R0, -40), \ + BPF_ALU64_REG(BPF_MOV, R0, R10), \ + BPF_ALU64_REG(BPF_SUB, R0, R1), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, 0 } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU64_REG(BPF_MOV, R0, R10), \ + BPF_ALU32_IMM(BPF_MOV, R1, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R1, -40), \ + BPF_ALU64_REG(BPF_SUB, R0, R10), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, 0 } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test fetch: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU32_IMM(BPF_MOV, R3, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R3, -40), \ + BPF_ALU64_REG(BPF_MOV, R0, R3), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, (op) & BPF_FETCH ? old : update } }, \ + .stack_depth = 40, \ +} + /* BPF_ATOMIC | BPF_W: BPF_ADD */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_DW: BPF_ADD */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_W: BPF_AND */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_DW: BPF_AND */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_W: BPF_OR */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_DW: BPF_OR */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_W: BPF_XOR */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_DW: BPF_XOR */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_W: BPF_XCHG */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + /* BPF_ATOMIC | BPF_DW: BPF_XCHG */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), +#undef BPF_ATOMIC_OP_TEST1 +#undef BPF_ATOMIC_OP_TEST2 +#undef BPF_ATOMIC_OP_TEST3 +#undef BPF_ATOMIC_OP_TEST4 + /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */ + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), BPF_LDX_MEM(BPF_W, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x22 } }, + { { 0, 0x89abcdef } }, .stack_depth = 40, }, { - "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return", .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R1, R10), - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), - BPF_ALU64_REG(BPF_MOV, R0, R10), - BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0 } }, + { { 0, 0x01234567 } }, .stack_depth = 40, }, { - "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x12 } }, + { { 0, 0x01234567 } }, .stack_depth = 40, }, { - "STX_XADD_W: X + 1 + 1 + 1 + ...", + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_ALU32_REG(BPF_MOV, R0, R3), + BPF_EXIT_INSN(), + }, + INTERNAL, { }, + { { 0, 0x89abcdef } }, + .stack_depth = 40, + }, + /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */ + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, INTERNAL, { }, - { { 0, 4134 } }, - .fill_helper = bpf_fill_stxw, + { { 0, 0 } }, + .stack_depth = 40, }, { - "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R0, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_REG(BPF_SUB, R0, R2), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x22 } }, + { { 0, 0 } }, .stack_depth = 40, }, { - "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return", .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R1, R10), - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), - BPF_ALU64_REG(BPF_MOV, R0, R10), + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), BPF_ALU64_REG(BPF_SUB, R0, R1), BPF_EXIT_INSN(), }, @@ -4378,25 +5820,552 @@ static struct bpf_test tests[] = { .stack_depth = 40, }, { - "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store", .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), BPF_EXIT_INSN(), }, INTERNAL, { }, - { { 0, 0x12 } }, + { { 0, 0 } }, .stack_depth = 40, }, { - "STX_XADD_DW: X + 1 + 1 + 1 + ...", + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LD_IMM64(R0, 0xfecdba9876543210ULL), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_EXIT_INSN(), + }, + INTERNAL, { }, + { { 0, 0 } }, + .stack_depth = 40, + }, + /* BPF_JMP32 | BPF_JEQ | BPF_K */ + { + "JMP32_JEQ_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, INTERNAL, { }, - { { 0, 4134 } }, - .fill_helper = bpf_fill_stxdw, + { { 0, 123 } } + }, + { + "JMP32_JEQ_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 12345678), + BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 12345678 } } + }, + { + "JMP32_JEQ_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JEQ | BPF_X */ + { + "JMP32_JEQ_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1234), + BPF_ALU32_IMM(BPF_MOV, R1, 4321), + BPF_JMP32_REG(BPF_JEQ, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1234), + BPF_JMP32_REG(BPF_JEQ, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1234 } } + }, + /* BPF_JMP32 | BPF_JNE | BPF_K */ + { + "JMP32_JNE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JNE, R0, 123, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 321, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JNE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 12345678), + BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 12345678 } } + }, + { + "JMP32_JNE_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JNE, R0, -123, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JNE | BPF_X */ + { + "JMP32_JNE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1234), + BPF_ALU32_IMM(BPF_MOV, R1, 1234), + BPF_JMP32_REG(BPF_JNE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 4321), + BPF_JMP32_REG(BPF_JNE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1234 } } + }, + /* BPF_JMP32 | BPF_JSET | BPF_K */ + { + "JMP32_JSET_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 2, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 3, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "JMP32_JSET_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000), + BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x40000000 } } + }, + { + "JMP32_JSET_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSET, R0, -1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JSET | BPF_X */ + { + "JMP32_JSET_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 8), + BPF_ALU32_IMM(BPF_MOV, R1, 7), + BPF_JMP32_REG(BPF_JSET, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2), + BPF_JMP32_REG(BPF_JNE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 8 } } + }, + /* BPF_JMP32 | BPF_JGT | BPF_K */ + { + "JMP32_JGT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JGT, R0, 123, 1), + BPF_JMP32_IMM(BPF_JGT, R0, 122, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JGT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGT | BPF_X */ + { + "JMP32_JGT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JGT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JGT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGE | BPF_K */ + { + "JMP32_JGE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JGE, R0, 124, 1), + BPF_JMP32_IMM(BPF_JGE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JGE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGE | BPF_X */ + { + "JMP32_JGE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JGE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe), + BPF_JMP32_REG(BPF_JGE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLT | BPF_K */ + { + "JMP32_JLT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JLT, R0, 123, 1), + BPF_JMP32_IMM(BPF_JLT, R0, 124, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JLT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1), + BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLT | BPF_X */ + { + "JMP32_JLT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JLT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JLT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLE | BPF_K */ + { + "JMP32_JLE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JLE, R0, 122, 1), + BPF_JMP32_IMM(BPF_JLE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JLE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1), + BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLE | BPF_X */ + { + "JMP32_JLE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JLE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe), + BPF_JMP32_REG(BPF_JLE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JSGT | BPF_K */ + { + "JMP32_JSGT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1), + BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSGT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1), + BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGT | BPF_X */ + { + "JMP32_JSGT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSGT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345679), + BPF_JMP32_REG(BPF_JSGT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGE | BPF_K */ + { + "JMP32_JSGE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1), + BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSGE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1), + BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGE | BPF_X */ + { + "JMP32_JSGE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345677), + BPF_JMP32_REG(BPF_JSGE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSGE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLT | BPF_K */ + { + "JMP32_JSLT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1), + BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSLT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1), + BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLT | BPF_X */ + { + "JMP32_JSLT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSLT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345677), + BPF_JMP32_REG(BPF_JSLT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLE | BPF_K */ + { + "JMP32_JSLE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1), + BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSLE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1), + BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLE | BPF_K */ + { + "JMP32_JSLE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345679), + BPF_JMP32_REG(BPF_JSLE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSLE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } }, /* BPF_JMP | BPF_EXIT */ { @@ -5223,6 +7192,14 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long conditional jump", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_long_jmp, + }, { "JMP_JA: Jump, gap, jump, ...", { }, @@ -6659,7 +8636,14 @@ static int run_one(const struct bpf_prog *fp, struct bpf_test *test) u64 duration; u32 ret; - if (test->test[i].data_size == 0 && + /* + * NOTE: Several sub-tests may be present, in which case + * a zero {data_size, result} tuple indicates the end of + * the sub-test array. The first test is always run, + * even if both data_size and result happen to be zero. + */ + if (i > 0 && + test->test[i].data_size == 0 && test->test[i].result == 0) break; @@ -7005,8 +8989,248 @@ static __init int test_bpf(void) return err_cnt ? -EINVAL : 0; } +struct tail_call_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + int result; + int stack_depth; +}; + +/* + * Magic marker used in test snippets for tail calls below. + * BPF_LD/MOV to R2 and R2 with this immediate value is replaced + * with the proper values by the test runner. + */ +#define TAIL_CALL_MARKER 0x7a11ca11 + +/* Special offset to indicate a NULL call target */ +#define TAIL_CALL_NULL 0x7fff + +/* Special offset to indicate an out-of-range index */ +#define TAIL_CALL_INVALID 0x7ffe + +#define TAIL_CALL(offset) \ + BPF_LD_IMM64(R2, TAIL_CALL_MARKER), \ + BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \ + offset, TAIL_CALL_MARKER), \ + BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0) + +/* + * Tail call tests. Each test case may call any other test in the table, + * including itself, specified as a relative index offset from the calling + * test. The index TAIL_CALL_NULL can be used to specify a NULL target + * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID + * results in a target index that is out of range. + */ +static struct tail_call_test tail_call_tests[] = { + { + "Tail call leaf", + .insns = { + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, + { + "Tail call 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 2), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 3, + }, + { + "Tail call 3", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 3), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 6, + }, + { + "Tail call 4", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 4), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 10, + }, + { + "Tail call error path, max count reached", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_ALU64_REG(BPF_MOV, R0, R1), + TAIL_CALL(0), + BPF_EXIT_INSN(), + }, + .result = MAX_TAIL_CALL_CNT + 1, + }, + { + "Tail call error path, NULL target", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + TAIL_CALL(TAIL_CALL_NULL), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, + { + "Tail call error path, index out of range", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + TAIL_CALL(TAIL_CALL_INVALID), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, +}; + +static void __init destroy_tail_call_tests(struct bpf_array *progs) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) + if (progs->ptrs[i]) + bpf_prog_free(progs->ptrs[i]); + kfree(progs); +} + +static __init int prepare_tail_call_tests(struct bpf_array **pprogs) +{ + int ntests = ARRAY_SIZE(tail_call_tests); + struct bpf_array *progs; + int which, err; + + /* Allocate the table of programs to be used for tall calls */ + progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]), + GFP_KERNEL); + if (!progs) + goto out_nomem; + + /* Create all eBPF programs and populate the table */ + for (which = 0; which < ntests; which++) { + struct tail_call_test *test = &tail_call_tests[which]; + struct bpf_prog *fp; + int len, i; + + /* Compute the number of program instructions */ + for (len = 0; len < MAX_INSNS; len++) { + struct bpf_insn *insn = &test->insns[len]; + + if (len < MAX_INSNS - 1 && + insn->code == (BPF_LD | BPF_DW | BPF_IMM)) + len++; + if (insn->code == 0) + break; + } + + /* Allocate and initialize the program */ + fp = bpf_prog_alloc(bpf_prog_size(len), 0); + if (!fp) + goto out_nomem; + + fp->len = len; + fp->type = BPF_PROG_TYPE_SOCKET_FILTER; + fp->aux->stack_depth = test->stack_depth; + memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn)); + + /* Relocate runtime tail call offsets and addresses */ + for (i = 0; i < len; i++) { + struct bpf_insn *insn = &fp->insnsi[i]; + + if (insn->imm != TAIL_CALL_MARKER) + continue; + + switch (insn->code) { + case BPF_LD | BPF_DW | BPF_IMM: + insn[0].imm = (u32)(long)progs; + insn[1].imm = ((u64)(long)progs) >> 32; + break; + + case BPF_ALU | BPF_MOV | BPF_K: + if (insn->off == TAIL_CALL_NULL) + insn->imm = ntests; + else if (insn->off == TAIL_CALL_INVALID) + insn->imm = ntests + 1; + else + insn->imm = which + insn->off; + insn->off = 0; + } + } + + fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err; + + progs->ptrs[which] = fp; + } + + /* The last entry contains a NULL program pointer */ + progs->map.max_entries = ntests + 1; + *pprogs = progs; + return 0; + +out_nomem: + err = -ENOMEM; + +out_err: + if (progs) + destroy_tail_call_tests(progs); + return err; +} + +static __init int test_tail_calls(struct bpf_array *progs) +{ + int i, err_cnt = 0, pass_cnt = 0; + int jit_cnt = 0, run_cnt = 0; + + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) { + struct tail_call_test *test = &tail_call_tests[i]; + struct bpf_prog *fp = progs->ptrs[i]; + u64 duration; + int ret; + + cond_resched(); + + pr_info("#%d %s ", i, test->descr); + if (!fp) { + err_cnt++; + continue; + } + pr_cont("jited:%u ", fp->jited); + + run_cnt++; + if (fp->jited) + jit_cnt++; + + ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration); + if (ret == test->result) { + pr_cont("%lld PASS", duration); + pass_cnt++; + } else { + pr_cont("ret %d != %d FAIL", ret, test->result); + err_cnt++; + } + } + + pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n", + __func__, pass_cnt, err_cnt, jit_cnt, run_cnt); + + return err_cnt ? -EINVAL : 0; +} + static int __init test_bpf_init(void) { + struct bpf_array *progs = NULL; int ret; ret = prepare_bpf_tests(); @@ -7018,6 +9242,14 @@ static int __init test_bpf_init(void) if (ret) return ret; + ret = prepare_tail_call_tests(&progs); + if (ret) + return ret; + ret = test_tail_calls(progs); + destroy_tail_call_tests(progs); + if (ret) + return ret; + return test_skb_segment(); } diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 271f2ca862c8..f5561ea7d90a 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -398,12 +398,12 @@ static void cgwb_release_workfn(struct work_struct *work) blkcg_unpin_online(blkcg); fprop_local_destroy_percpu(&wb->memcg_completions); - percpu_ref_exit(&wb->refcnt); spin_lock_irq(&cgwb_lock); list_del(&wb->offline_node); spin_unlock_irq(&cgwb_lock); + percpu_ref_exit(&wb->refcnt); wb_exit(wb); WARN_ON_ONCE(!list_empty(&wb->b_attached)); kfree_rcu(wb, rcu); diff --git a/mm/kfence/core.c b/mm/kfence/core.c index d7666ace9d2e..575c685aa642 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -734,6 +734,22 @@ void kfence_shutdown_cache(struct kmem_cache *s) void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { /* + * Perform size check before switching kfence_allocation_gate, so that + * we don't disable KFENCE without making an allocation. + */ + if (size > PAGE_SIZE) + return NULL; + + /* + * Skip allocations from non-default zones, including DMA. We cannot + * guarantee that pages in the KFENCE pool will have the requested + * properties (e.g. reside in DMAable memory). + */ + if ((flags & GFP_ZONEMASK) || + (s->flags & (SLAB_CACHE_DMA | SLAB_CACHE_DMA32))) + return NULL; + + /* * allocation_gate only needs to become non-zero, so it doesn't make * sense to continue writing to it and pay the associated contention * cost, in case we have a large number of concurrent allocations. @@ -757,9 +773,6 @@ void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) if (!READ_ONCE(kfence_enabled)) return NULL; - if (size > PAGE_SIZE) - return NULL; - return kfence_guarded_alloc(s, size, flags); } diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index 7f24b9bcb2ec..942cbc16ad26 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -852,7 +852,7 @@ static void kfence_test_exit(void) tracepoint_synchronize_unregister(); } -late_initcall(kfence_test_init); +late_initcall_sync(kfence_test_init); module_exit(kfence_test_exit); MODULE_LICENSE("GPL v2"); diff --git a/mm/memblock.c b/mm/memblock.c index 0041ff62c584..de7b553baa50 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -947,7 +947,8 @@ static bool should_skip_region(struct memblock_type *type, return true; /* skip hotpluggable memory regions if needed */ - if (movable_node_is_enabled() && memblock_is_hotpluggable(m)) + if (movable_node_is_enabled() && memblock_is_hotpluggable(m) && + !(flags & MEMBLOCK_HOTPLUG)) return true; /* if we want mirror memory skip non-mirror memory regions */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1bbf239b06f2..8ef06f9e0db1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3574,7 +3574,8 @@ static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) unsigned long val; if (mem_cgroup_is_root(memcg)) { - cgroup_rstat_flush(memcg->css.cgroup); + /* mem_cgroup_threshold() calls here from irqsafe context */ + cgroup_rstat_flush_irqsafe(memcg->css.cgroup); val = memcg_page_state(memcg, NR_FILE_PAGES) + memcg_page_state(memcg, NR_ANON_MAPPED); if (swap) diff --git a/mm/memory.c b/mm/memory.c index 747a01d495f2..25fc46e87214 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4026,8 +4026,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) + if (vmf->prealloc_pte) { + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(vma->vm_mm); + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { return VM_FAULT_OOM; + } } /* See comment in handle_pte_fault() */ diff --git a/mm/migrate.c b/mm/migrate.c index 34a9ad3e0a4f..7e240437e7d9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2068,7 +2068,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, LIST_HEAD(migratepages); new_page_t *new; bool compound; - unsigned int nr_pages = thp_nr_pages(page); + int nr_pages = thp_nr_pages(page); /* * PTE mapped THP or HugeTLB page can't reach here so the page could diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c index f5852a058ce0..1854850b4b89 100644 --- a/mm/mmap_lock.c +++ b/mm/mmap_lock.c @@ -156,14 +156,14 @@ static inline void put_memcg_path_buf(void) #define TRACE_MMAP_LOCK_EVENT(type, mm, ...) \ do { \ const char *memcg_path; \ - preempt_disable(); \ + local_lock(&memcg_paths.lock); \ memcg_path = get_mm_memcg_path(mm); \ trace_mmap_lock_##type(mm, \ memcg_path != NULL ? memcg_path : "", \ ##__VA_ARGS__); \ if (likely(memcg_path != NULL)) \ put_memcg_path_buf(); \ - preempt_enable(); \ + local_unlock(&memcg_paths.lock); \ } while (0) #else /* !CONFIG_MEMCG */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 3e97e68aef7a..856b175c15a4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -840,21 +840,24 @@ void init_mem_debugging_and_hardening(void) } #endif - if (_init_on_alloc_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc\n"); - else - static_branch_enable(&init_on_alloc); - } - if (_init_on_free_enabled_early) { - if (page_poisoning_requested) - pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_free\n"); - else - static_branch_enable(&init_on_free); + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && + page_poisoning_requested) { + pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " + "will take precedence over init_on_alloc and init_on_free\n"); + _init_on_alloc_enabled_early = false; + _init_on_free_enabled_early = false; } + if (_init_on_alloc_enabled_early) + static_branch_enable(&init_on_alloc); + else + static_branch_disable(&init_on_alloc); + + if (_init_on_free_enabled_early) + static_branch_enable(&init_on_free); + else + static_branch_disable(&init_on_free); + #ifdef CONFIG_DEBUG_PAGEALLOC if (!debug_pagealloc_enabled()) return; diff --git a/mm/secretmem.c b/mm/secretmem.c index f77d25467a14..030f02ddc7c1 100644 --- a/mm/secretmem.c +++ b/mm/secretmem.c @@ -152,6 +152,7 @@ static void secretmem_freepage(struct page *page) } const struct address_space_operations secretmem_aops = { + .set_page_dirty = __set_page_dirty_no_writeback, .freepage = secretmem_freepage, .migratepage = secretmem_migratepage, .isolate_page = secretmem_isolate_page, diff --git a/mm/slab.h b/mm/slab.h index f997fd5e42c8..58c01a34e5b8 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -346,7 +346,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s_orig, continue; page = virt_to_head_page(p[i]); - objcgs = page_objcgs(page); + objcgs = page_objcgs_check(page); if (!objcgs) continue; diff --git a/mm/slub.c b/mm/slub.c index 090fa14628f9..af984e4990e8 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3236,6 +3236,16 @@ struct detached_freelist { struct kmem_cache *s; }; +static inline void free_nonslab_page(struct page *page) +{ + unsigned int order = compound_order(page); + + VM_BUG_ON_PAGE(!PageCompound(page), page); + kfree_hook(page_address(page)); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, -(PAGE_SIZE << order)); + __free_pages(page, order); +} + /* * This function progressively scans the array with free objects (with * a limited look ahead) and extract objects belonging to the same @@ -3272,9 +3282,7 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, if (!s) { /* Handle kalloc'ed objects */ if (unlikely(!PageSlab(page))) { - BUG_ON(!PageCompound(page)); - kfree_hook(object); - __free_pages(page, compound_order(page)); + free_nonslab_page(page); p[size] = NULL; /* mark object processed */ return size; } @@ -4250,13 +4258,7 @@ void kfree(const void *x) page = virt_to_head_page(x); if (unlikely(!PageSlab(page))) { - unsigned int order = compound_order(page); - - BUG_ON(!PageCompound(page)); - kfree_hook(object); - mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); - __free_pages(page, order); + free_nonslab_page(page); return; } slab_free(page->slab_cache, page, object, NULL, 1, _RET_IP_); diff --git a/net/ax25/ax25_ip.c b/net/ax25/ax25_ip.c index e4f63dd43cb5..36249776c021 100644 --- a/net/ax25/ax25_ip.c +++ b/net/ax25/ax25_ip.c @@ -193,10 +193,8 @@ netdev_tx_t ax25_ip_xmit(struct sk_buff *skb) skb_pull(skb, AX25_KISS_HEADER_LEN); if (digipeat != NULL) { - if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) { - kfree_skb(skb); + if ((ourskb = ax25_rt_build_path(skb, src, dst, route->digipeat)) == NULL) goto put; - } skb = ourskb; } diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c index f53751ba81b3..22f2f66c6e0a 100644 --- a/net/ax25/ax25_out.c +++ b/net/ax25/ax25_out.c @@ -325,7 +325,6 @@ void ax25_kick(ax25_cb *ax25) void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) { - struct sk_buff *skbn; unsigned char *ptr; int headroom; @@ -336,18 +335,12 @@ void ax25_transmit_buffer(ax25_cb *ax25, struct sk_buff *skb, int type) headroom = ax25_addr_size(ax25->digipeat); - if (skb_headroom(skb) < headroom) { - if ((skbn = skb_realloc_headroom(skb, headroom)) == NULL) { + if (unlikely(skb_headroom(skb) < headroom)) { + skb = skb_expand_head(skb, headroom); + if (!skb) { printk(KERN_CRIT "AX.25: ax25_transmit_buffer - out of memory\n"); - kfree_skb(skb); return; } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - consume_skb(skb); - skb = skbn; } ptr = skb_push(skb, headroom); diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index b40e0bce67ea..d0b2e094bd55 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -441,24 +441,17 @@ put: struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, ax25_address *dest, ax25_digi *digi) { - struct sk_buff *skbn; unsigned char *bp; int len; len = digi->ndigi * AX25_ADDR_LEN; - if (skb_headroom(skb) < len) { - if ((skbn = skb_realloc_headroom(skb, len)) == NULL) { + if (unlikely(skb_headroom(skb) < len)) { + skb = skb_expand_head(skb, len); + if (!skb) { printk(KERN_CRIT "AX.25: ax25_dg_build_path - out of memory\n"); return NULL; } - - if (skb->sk != NULL) - skb_set_owner_w(skbn, skb->sk); - - consume_skb(skb); - - skb = skbn; } bp = skb_push(skb, len); diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index 63d42dcc9324..2b639c8b0ded 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -2274,8 +2274,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } @@ -2446,8 +2445,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 8c95a11a830a..7976a0435662 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -984,8 +984,7 @@ out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 007f2827935d..36a98d3cefe0 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -557,8 +557,7 @@ int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 923e2197c2db..0158f267c403 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -91,8 +91,7 @@ static struct net_device *batadv_mcast_get_bridge(struct net_device *soft_iface) upper = netdev_master_upper_dev_get_rcu(upper); } while (upper && !(upper->priv_flags & IFF_EBRIDGE)); - if (upper) - dev_hold(upper); + dev_hold(upper); rcu_read_unlock(); return upper; @@ -509,8 +508,7 @@ batadv_mcast_mla_softif_get(struct net_device *dev, } out: - if (bridge) - dev_put(bridge); + dev_put(bridge); return ret4 + ret6; } @@ -2239,8 +2237,7 @@ batadv_mcast_netlink_get_primary(struct netlink_callback *cb, } out: - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); if (!ret && primary_if) *primary_if = hard_iface; diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index da7249448474..6a4d3f437e00 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -799,12 +799,10 @@ int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (hardif) batadv_hardif_put(hardif); - if (hard_iface) - dev_put(hard_iface); + dev_put(hard_iface); if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } @@ -1412,12 +1410,10 @@ int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (hardif) batadv_hardif_put(hardif); - if (hard_iface) - dev_put(hard_iface); + dev_put(hard_iface); if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); return ret; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 434b4f042909..711fe5a2cec4 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -820,8 +820,7 @@ check_roaming: out: if (in_hardif) batadv_hardif_put(in_hardif); - if (in_dev) - dev_put(in_dev); + dev_put(in_dev); if (tt_local) batadv_tt_local_entry_put(tt_local); if (tt_global) @@ -1217,8 +1216,7 @@ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; @@ -2005,8 +2003,7 @@ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) out: if (primary_if) batadv_hardif_put(primary_if); - if (soft_iface) - dev_put(soft_iface); + dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2560ed2f144d..e1a545c8a69f 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3996,14 +3996,10 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int id; - BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hci_dev_set_flag(hdev, HCI_UNREGISTER); - id = hdev->id; - write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); @@ -4038,7 +4034,14 @@ void hci_unregister_dev(struct hci_dev *hdev) } device_del(&hdev->dev); + /* Actual cleanup is deferred until hci_cleanup_dev(). */ + hci_dev_put(hdev); +} +EXPORT_SYMBOL(hci_unregister_dev); +/* Cleanup HCI device */ +void hci_cleanup_dev(struct hci_dev *hdev) +{ debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); @@ -4063,11 +4066,8 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_blocked_keys_clear(hdev); hci_dev_unlock(hdev); - hci_dev_put(hdev); - - ida_simple_remove(&hci_index_ida, id); + ida_simple_remove(&hci_index_ida, hdev->id); } -EXPORT_SYMBOL(hci_unregister_dev); /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index b04a5a02ecf3..f1128c2134f0 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -59,6 +59,17 @@ struct hci_pinfo { char comm[TASK_COMM_LEN]; }; +static struct hci_dev *hci_hdev_from_sock(struct sock *sk) +{ + struct hci_dev *hdev = hci_pi(sk)->hdev; + + if (!hdev) + return ERR_PTR(-EBADFD); + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + return ERR_PTR(-EPIPE); + return hdev; +} + void hci_sock_set_flag(struct sock *sk, int nr) { set_bit(nr, &hci_pi(sk)->flags); @@ -759,19 +770,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event) if (event == HCI_DEV_UNREG) { struct sock *sk; - /* Detach sockets from device */ + /* Wake up sockets using this dead device */ read_lock(&hci_sk_list.lock); sk_for_each(sk, &hci_sk_list.head) { - lock_sock(sk); if (hci_pi(sk)->hdev == hdev) { - hci_pi(sk)->hdev = NULL; sk->sk_err = EPIPE; - sk->sk_state = BT_OPEN; sk->sk_state_change(sk); - - hci_dev_put(hdev); } - release_sock(sk); } read_unlock(&hci_sk_list.lock); } @@ -930,10 +935,10 @@ static int hci_sock_reject_list_del(struct hci_dev *hdev, void __user *arg) static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd, unsigned long arg) { - struct hci_dev *hdev = hci_pi(sk)->hdev; + struct hci_dev *hdev = hci_hdev_from_sock(sk); - if (!hdev) - return -EBADFD; + if (IS_ERR(hdev)) + return PTR_ERR(hdev); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; @@ -1103,6 +1108,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, lock_sock(sk); + /* Allow detaching from dead device and attaching to alive device, if + * the caller wants to re-bind (instead of close) this socket in + * response to hci_sock_dev_event(HCI_DEV_UNREG) notification. + */ + hdev = hci_pi(sk)->hdev; + if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) { + hci_pi(sk)->hdev = NULL; + sk->sk_state = BT_OPEN; + hci_dev_put(hdev); + } + hdev = NULL; + if (sk->sk_state == BT_BOUND) { err = -EALREADY; goto done; @@ -1379,9 +1396,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr, lock_sock(sk); - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } @@ -1743,9 +1760,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg, goto done; } - hdev = hci_pi(sk)->hdev; - if (!hdev) { - err = -EBADFD; + hdev = hci_hdev_from_sock(sk); + if (IS_ERR(hdev)) { + err = PTR_ERR(hdev); goto done; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 9874844a95a9..b69d88b88d2e 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -83,6 +83,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) static void bt_host_release(struct device *dev) { struct hci_dev *hdev = to_hci_dev(dev); + + if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) + hci_cleanup_dev(hdev); kfree(hdev); module_put(THIS_MODULE); } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b488e2779718..335e1d8c17f8 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -88,17 +88,19 @@ reset: static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, u32 *retval, u32 *time, bool xdp) { - struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; + struct bpf_prog_array_item item = {.prog = prog}; + struct bpf_run_ctx *old_ctx; + struct bpf_cg_run_ctx run_ctx; struct bpf_test_timer t = { NO_MIGRATE }; enum bpf_cgroup_storage_type stype; int ret; for_each_cgroup_storage_type(stype) { - storage[stype] = bpf_cgroup_storage_alloc(prog, stype); - if (IS_ERR(storage[stype])) { - storage[stype] = NULL; + item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype); + if (IS_ERR(item.cgroup_storage[stype])) { + item.cgroup_storage[stype] = NULL; for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); + bpf_cgroup_storage_free(item.cgroup_storage[stype]); return -ENOMEM; } } @@ -107,22 +109,19 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, repeat = 1; bpf_test_timer_enter(&t); + old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); do { - ret = bpf_cgroup_storage_set(storage); - if (ret) - break; - + run_ctx.prog_item = &item; if (xdp) *retval = bpf_prog_run_xdp(prog, ctx); else *retval = BPF_PROG_RUN(prog, ctx); - - bpf_cgroup_storage_unset(); } while (bpf_test_timer_continue(&t, repeat, &ret, time)); + bpf_reset_run_ctx(old_ctx); bpf_test_timer_leave(&t); for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); + bpf_cgroup_storage_free(item.cgroup_storage[stype]); return ret; } @@ -764,8 +763,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/net/bridge/br.c b/net/bridge/br.c index 8fb5dca5f8e0..c8ae823aa8e7 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -166,7 +166,8 @@ static int br_switchdev_event(struct notifier_block *unused, case SWITCHDEV_FDB_ADD_TO_BRIDGE: fdb_info = ptr; err = br_fdb_external_learn_add(br, p, fdb_info->addr, - fdb_info->vid, false); + fdb_info->vid, + fdb_info->is_local, false); if (err) { err = notifier_from_errno(err); break; @@ -201,6 +202,48 @@ static struct notifier_block br_switchdev_notifier = { .notifier_call = br_switchdev_event, }; +/* called under rtnl_mutex */ +static int br_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_brport_info *brport_info; + const struct switchdev_brport *b; + struct net_bridge_port *p; + int err = NOTIFY_DONE; + + p = br_port_get_rtnl(dev); + if (!p) + goto out; + + switch (event) { + case SWITCHDEV_BRPORT_OFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + err = br_switchdev_port_offload(p, b->dev, b->ctx, + b->atomic_nb, b->blocking_nb, + b->tx_fwd_offload, extack); + err = notifier_from_errno(err); + break; + case SWITCHDEV_BRPORT_UNOFFLOADED: + brport_info = ptr; + b = &brport_info->brport; + + br_switchdev_port_unoffload(p, b->ctx, b->atomic_nb, + b->blocking_nb); + break; + } + +out: + return err; +} + +static struct notifier_block br_switchdev_blocking_notifier = { + .notifier_call = br_switchdev_blocking_event, +}; + /* br_boolopt_toggle - change user-controlled boolean option * * @br: bridge device @@ -355,10 +398,14 @@ static int __init br_init(void) if (err) goto err_out4; - err = br_netlink_init(); + err = register_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); if (err) goto err_out5; + err = br_netlink_init(); + if (err) + goto err_out6; + brioctl_set(br_ioctl_stub); #if IS_ENABLED(CONFIG_ATM_LANE) @@ -373,6 +420,8 @@ static int __init br_init(void) return 0; +err_out6: + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); err_out5: unregister_switchdev_notifier(&br_switchdev_notifier); err_out4: @@ -392,6 +441,7 @@ static void __exit br_deinit(void) { stp_proto_unregister(&br_stp_proto); br_netlink_fini(); + unregister_switchdev_blocking_notifier(&br_switchdev_blocking_notifier); unregister_switchdev_notifier(&br_switchdev_notifier); unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 4ff8c67ac88f..ddd09f5994a7 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -745,7 +745,7 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); - item.info.dev = item.is_local ? br->dev : p->dev; + item.info.dev = (!p || item.is_local) ? br->dev : p->dev; item.info.ctx = ctx; err = nb->notifier_call(nb, action, &item); @@ -1011,7 +1011,8 @@ static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, - u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[]) + u16 nlh_flags, u16 vid, struct nlattr *nfea_tb[], + struct netlink_ext_ack *extack) { int err = 0; @@ -1030,7 +1031,15 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, rcu_read_unlock(); local_bh_enable(); } else if (ndm->ndm_flags & NTF_EXT_LEARNED) { - err = br_fdb_external_learn_add(br, p, addr, vid, true); + if (!p && !(ndm->ndm_state & NUD_PERMANENT)) { + NL_SET_ERR_MSG_MOD(extack, + "FDB entry towards bridge must be permanent"); + return -EINVAL; + } + + err = br_fdb_external_learn_add(br, p, addr, vid, + ndm->ndm_state & NUD_PERMANENT, + true); } else { spin_lock_bh(&br->hash_lock); err = fdb_add_entry(br, p, addr, ndm, nlh_flags, vid, nfea_tb); @@ -1102,9 +1111,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid, nfea_tb, + extack); } else { - err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0, nfea_tb, + extack); if (err || !vg || !vg->num_vlans) goto out; @@ -1116,7 +1127,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (!br_vlan_should_use(v)) continue; err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid, - nfea_tb); + nfea_tb, extack); if (err) goto out; } @@ -1256,7 +1267,7 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p) } int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify) { struct net_bridge_fdb_entry *fdb; @@ -1273,6 +1284,10 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) flags |= BIT(BR_FDB_ADDED_BY_USER); + + if (is_local) + flags |= BIT(BR_FDB_LOCAL); + fdb = fdb_create(br, p, addr, vid, flags); if (!fdb) { err = -ENOMEM; @@ -1299,6 +1314,9 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, if (swdev_notify) set_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); + if (is_local) + set_bit(BR_FDB_LOCAL, &fdb->flags); + if (modified) fdb_notify(br, fdb, RTM_NEWNEIGH, swdev_notify); } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 86f6d7e93ea8..67c60240b713 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -456,7 +456,7 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); dev->rtnl_link_ops = &br_link_ops; - res = register_netdev(dev); + res = register_netdevice(dev); if (res) free_netdev(dev); return res; @@ -467,7 +467,6 @@ int br_del_bridge(struct net *net, const char *name) struct net_device *dev; int ret = 0; - rtnl_lock(); dev = __dev_get_by_name(net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -485,7 +484,6 @@ int br_del_bridge(struct net *net, const char *name) else br_dev_delete(dev, NULL); - rtnl_unlock(); return ret; } diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 46a24c20e405..793b0db9d9a3 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -351,7 +351,7 @@ static int old_deviceless(struct net *net, void __user *uarg) if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(buf, uarg, IFNAMSIZ)) + if (copy_from_user(buf, (void __user *)args[1], IFNAMSIZ)) return -EFAULT; buf[IFNAMSIZ-1] = 0; @@ -369,33 +369,44 @@ static int old_deviceless(struct net *net, void __user *uarg) int br_ioctl_stub(struct net *net, struct net_bridge *br, unsigned int cmd, struct ifreq *ifr, void __user *uarg) { + int ret = -EOPNOTSUPP; + + rtnl_lock(); + switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(net, uarg); - + ret = old_deviceless(net, uarg); + break; case SIOCBRADDBR: case SIOCBRDELBR: { char buf[IFNAMSIZ]; - if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) - return -EPERM; + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + ret = -EPERM; + break; + } - if (copy_from_user(buf, uarg, IFNAMSIZ)) - return -EFAULT; + if (copy_from_user(buf, uarg, IFNAMSIZ)) { + ret = -EFAULT; + break; + } buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(net, buf); - - return br_del_bridge(net, buf); + ret = br_add_bridge(net, buf); + else + ret = br_del_bridge(net, buf); } - + break; case SIOCBRADDIF: case SIOCBRDELIF: - return add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); - + ret = add_del_if(br, ifr->ifr_ifindex, cmd == SIOCBRADDIF); + break; } - return -EOPNOTSUPP; + + rtnl_unlock(); + + return ret; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c939631428b9..51991f1b3e5a 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -770,7 +770,7 @@ int br_fdb_get(struct sk_buff *skb, struct nlattr *tb[], struct net_device *dev, int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, - const unsigned char *addr, u16 vid, + const unsigned char *addr, u16 vid, bool is_local, bool swdev_notify); int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, @@ -1880,6 +1880,17 @@ static inline void br_sysfs_delbr(struct net_device *dev) { return; } /* br_switchdev.c */ #ifdef CONFIG_NET_SWITCHDEV +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack); + +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb); + bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb); void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb); @@ -1908,6 +1919,24 @@ static inline void br_switchdev_frame_unmark(struct sk_buff *skb) skb->offload_fwd_mark = 0; } #else +static inline int +br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline void +br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ +} + static inline bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb) { return false; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 023de0e958f1..6bf518d78f02 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -134,7 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br, .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), }; - struct net_device *dev = info.is_local ? br->dev : dst->dev; + struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev; switch (type) { case RTM_DELNEIGH: @@ -312,23 +312,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, /* Let the bridge know that this port is offloaded, so that it can assign a * switchdev hardware domain to it. */ -int switchdev_bridge_port_offload(struct net_device *brport_dev, - struct net_device *dev, const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb, - bool tx_fwd_offload, - struct netlink_ext_ack *extack) +int br_switchdev_port_offload(struct net_bridge_port *p, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) { struct netdev_phys_item_id ppid; - struct net_bridge_port *p; int err; - ASSERT_RTNL(); - - p = br_port_get_rtnl(brport_dev); - if (!p) - return -ENODEV; - err = dev_get_port_parent_id(dev, &ppid, false); if (err) return err; @@ -348,23 +341,12 @@ out_switchdev_del: return err; } -EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); -void switchdev_bridge_port_unoffload(struct net_device *brport_dev, - const void *ctx, - struct notifier_block *atomic_nb, - struct notifier_block *blocking_nb) +void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) { - struct net_bridge_port *p; - - ASSERT_RTNL(); - - p = br_port_get_rtnl(brport_dev); - if (!p) - return; - nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb); nbp_switchdev_del(p); } -EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 12369b604ce9..f6df20808f5e 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -20,9 +20,12 @@ struct j1939_session; enum j1939_sk_errqueue_type { - J1939_ERRQUEUE_ACK, - J1939_ERRQUEUE_SCHED, - J1939_ERRQUEUE_ABORT, + J1939_ERRQUEUE_TX_ACK, + J1939_ERRQUEUE_TX_SCHED, + J1939_ERRQUEUE_TX_ABORT, + J1939_ERRQUEUE_RX_RTS, + J1939_ERRQUEUE_RX_DPO, + J1939_ERRQUEUE_RX_ABORT, }; /* j1939 devices */ @@ -87,6 +90,7 @@ struct j1939_priv { struct list_head j1939_socks; struct kref rx_kref; + u32 rx_tskey; }; void j1939_ecu_put(struct j1939_ecu *ecu); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index b904c06ab0cf..6dff4510687a 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -905,20 +905,33 @@ failure: return NULL; } -static size_t j1939_sk_opt_stats_get_size(void) +static size_t j1939_sk_opt_stats_get_size(enum j1939_sk_errqueue_type type) { - return - nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ - 0; + switch (type) { + case J1939_ERRQUEUE_RX_RTS: + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_TOTAL_SIZE */ + nla_total_size(sizeof(u32)) + /* J1939_NLA_PGN */ + nla_total_size(sizeof(u64)) + /* J1939_NLA_SRC_NAME */ + nla_total_size(sizeof(u64)) + /* J1939_NLA_DEST_NAME */ + nla_total_size(sizeof(u8)) + /* J1939_NLA_SRC_ADDR */ + nla_total_size(sizeof(u8)) + /* J1939_NLA_DEST_ADDR */ + 0; + default: + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ + 0; + } } static struct sk_buff * -j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) +j1939_sk_get_timestamping_opt_stats(struct j1939_session *session, + enum j1939_sk_errqueue_type type) { struct sk_buff *stats; u32 size; - stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC); + stats = alloc_skb(j1939_sk_opt_stats_get_size(type), GFP_ATOMIC); if (!stats) return NULL; @@ -928,32 +941,67 @@ j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) size = min(session->pkt.tx_acked * 7, session->total_message_size); - nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + switch (type) { + case J1939_ERRQUEUE_RX_RTS: + nla_put_u32(stats, J1939_NLA_TOTAL_SIZE, + session->total_message_size); + nla_put_u32(stats, J1939_NLA_PGN, + session->skcb.addr.pgn); + nla_put_u64_64bit(stats, J1939_NLA_SRC_NAME, + session->skcb.addr.src_name, J1939_NLA_PAD); + nla_put_u64_64bit(stats, J1939_NLA_DEST_NAME, + session->skcb.addr.dst_name, J1939_NLA_PAD); + nla_put_u8(stats, J1939_NLA_SRC_ADDR, + session->skcb.addr.sa); + nla_put_u8(stats, J1939_NLA_DEST_ADDR, + session->skcb.addr.da); + break; + default: + nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + } return stats; } -void j1939_sk_errqueue(struct j1939_session *session, - enum j1939_sk_errqueue_type type) +static void __j1939_sk_errqueue(struct j1939_session *session, struct sock *sk, + enum j1939_sk_errqueue_type type) { struct j1939_priv *priv = session->priv; - struct sock *sk = session->sk; struct j1939_sock *jsk; struct sock_exterr_skb *serr; struct sk_buff *skb; char *state = "UNK"; int err; - /* currently we have no sk for the RX session */ - if (!sk) - return; - jsk = j1939_sk(sk); if (!(jsk->state & J1939_SOCK_ERRQUEUE)) return; - skb = j1939_sk_get_timestamping_opt_stats(session); + switch (type) { + case J1939_ERRQUEUE_TX_ACK: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + return; + break; + case J1939_ERRQUEUE_TX_SCHED: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + return; + break; + case J1939_ERRQUEUE_TX_ABORT: + break; + case J1939_ERRQUEUE_RX_RTS: + fallthrough; + case J1939_ERRQUEUE_RX_DPO: + fallthrough; + case J1939_ERRQUEUE_RX_ABORT: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE)) + return; + break; + default: + netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); + } + + skb = j1939_sk_get_timestamping_opt_stats(session, type); if (!skb) return; @@ -964,36 +1012,42 @@ void j1939_sk_errqueue(struct j1939_session *session, serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); switch (type) { - case J1939_ERRQUEUE_ACK: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) { - kfree_skb(skb); - return; - } - + case J1939_ERRQUEUE_TX_ACK: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_ACK; - state = "ACK"; + state = "TX ACK"; break; - case J1939_ERRQUEUE_SCHED: - if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) { - kfree_skb(skb); - return; - } - + case J1939_ERRQUEUE_TX_SCHED: serr->ee.ee_errno = ENOMSG; serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; serr->ee.ee_info = SCM_TSTAMP_SCHED; - state = "SCH"; + state = "TX SCH"; break; - case J1939_ERRQUEUE_ABORT: + case J1939_ERRQUEUE_TX_ABORT: serr->ee.ee_errno = session->err; serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; - state = "ABT"; + state = "TX ABT"; + break; + case J1939_ERRQUEUE_RX_RTS: + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_RTS; + state = "RX RTS"; + break; + case J1939_ERRQUEUE_RX_DPO: + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_DPO; + state = "RX DPO"; + break; + case J1939_ERRQUEUE_RX_ABORT: + serr->ee.ee_errno = session->err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_RX_ABORT; + state = "RX ABT"; break; - default: - netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); } serr->opt_stats = true; @@ -1008,6 +1062,27 @@ void j1939_sk_errqueue(struct j1939_session *session, kfree_skb(skb); }; +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sock *jsk; + + if (session->sk) { + /* send TX notifications to the socket of origin */ + __j1939_sk_errqueue(session, session->sk, type); + return; + } + + /* spread RX notifications to all sockets subscribed to this session */ + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + if (j1939_sk_recv_match_one(jsk, &session->skcb)) + __j1939_sk_errqueue(session, &jsk->sk, type); + } + spin_unlock_bh(&priv->j1939_socks_lock); +}; + void j1939_sk_send_loop_abort(struct sock *sk, int err) { sk->sk_err = err; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index efdf79de3db6..bb5c4b8979be 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -260,10 +260,14 @@ static void __j1939_session_drop(struct j1939_session *session) static void j1939_session_destroy(struct j1939_session *session) { - if (session->err) - j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); - else - j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK); + if (session->transmission) { + if (session->err) + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ABORT); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_ACK); + } else if (session->err) { + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); + } netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); @@ -822,7 +826,7 @@ static int j1939_session_tx_dat(struct j1939_session *session) memcpy(&dat[1], &tpdat[offset], len); ret = j1939_tp_tx_dat(session, dat, len + 1); if (ret < 0) { - /* ENOBUS == CAN interface TX queue is full */ + /* ENOBUFS == CAN interface TX queue is full */ if (ret != -ENOBUFS) netdev_alert(priv->ndev, "%s: 0x%p: queue data error: %i\n", @@ -1044,7 +1048,7 @@ static int j1939_simple_txnext(struct j1939_session *session) if (ret) goto out_free; - j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); + j1939_sk_errqueue(session, J1939_ERRQUEUE_TX_SCHED); j1939_sk_queue_activate_next(session); out_free: @@ -1076,11 +1080,16 @@ static bool j1939_session_deactivate_locked(struct j1939_session *session) static bool j1939_session_deactivate(struct j1939_session *session) { + struct j1939_priv *priv = session->priv; bool active; - j1939_session_list_lock(session->priv); + j1939_session_list_lock(priv); + /* This function should be called with a session ref-count of at + * least 2. + */ + WARN_ON_ONCE(kref_read(&session->kref) < 2); active = j1939_session_deactivate_locked(session); - j1939_session_list_unlock(session->priv); + j1939_session_list_unlock(priv); return active; } @@ -1111,6 +1120,8 @@ static void __j1939_session_cancel(struct j1939_session *session, if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); } static void j1939_session_cancel(struct j1939_session *session, @@ -1325,6 +1336,8 @@ static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, session->err = j1939_xtp_abort_to_errno(priv, abort); if (session->sk) j1939_sk_send_loop_abort(session->sk, session->err); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_ABORT); j1939_session_deactivate_activate_next(session); abort_put: @@ -1433,7 +1446,7 @@ j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) if (session->transmission) { if (session->pkt.tx_acked) j1939_sk_errqueue(session, - J1939_ERRQUEUE_SCHED); + J1939_ERRQUEUE_TX_SCHED); j1939_session_txtimer_cancel(session); j1939_tp_schedule_txtimer(session, 0); } @@ -1625,6 +1638,9 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, session->pkt.rx = 0; session->pkt.tx = 0; + session->tskey = priv->rx_tskey++; + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_RTS); + WARN_ON_ONCE(j1939_session_activate(session)); return session; @@ -1747,6 +1763,9 @@ static void j1939_xtp_rx_dpo_one(struct j1939_session *session, session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); session->last_cmd = dat[0]; j1939_tp_set_rxtimeout(session, 750); + + if (!session->transmission) + j1939_sk_errqueue(session, J1939_ERRQUEUE_RX_DPO); } static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, @@ -1874,7 +1893,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, if (!session->transmission) j1939_tp_schedule_txtimer(session, 0); } else { - j1939_tp_set_rxtimeout(session, 250); + j1939_tp_set_rxtimeout(session, 750); } session->last_cmd = 0xff; consume_skb(se_skb); diff --git a/net/can/raw.c b/net/can/raw.c index ed4fcb7ab0c3..7105fa4824e4 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -546,10 +546,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EFAULT; } + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + if (count > 1) + kfree(filter); + err = -ENODEV; + goto out_fil; + } + } if (ro->bound) { /* (try to) register the new filters */ @@ -584,10 +592,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->count = count; out_fil: - if (dev) - dev_put(dev); - + dev_put(dev); release_sock(sk); + rtnl_unlock(); break; @@ -600,10 +607,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, err_mask &= CAN_ERR_MASK; + rtnl_lock(); lock_sock(sk); - if (ro->bound && ro->ifindex) + if (ro->bound && ro->ifindex) { dev = dev_get_by_index(sock_net(sk), ro->ifindex); + if (!dev) { + err = -ENODEV; + goto out_err; + } + } /* remove current error mask */ if (ro->bound) { @@ -623,10 +636,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, ro->err_mask = err_mask; out_err: - if (dev) - dev_put(dev); - + dev_put(dev); release_sock(sk); + rtnl_unlock(); break; diff --git a/net/core/dev.c b/net/core/dev.c index 64e1a5f63f93..88650791c360 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -676,131 +676,6 @@ void dev_remove_offload(struct packet_offload *po) } EXPORT_SYMBOL(dev_remove_offload); -/****************************************************************************** - * - * Device Boot-time Settings Routines - * - ******************************************************************************/ - -/* Boot time configuration table */ -static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; - -/** - * netdev_boot_setup_add - add new setup entry - * @name: name of the device - * @map: configured settings for the device - * - * Adds new setup entry to the dev_boot_setup list. The function - * returns 0 on error and 1 on success. This is a generic routine to - * all netdevices. - */ -static int netdev_boot_setup_add(char *name, struct ifmap *map) -{ - struct netdev_boot_setup *s; - int i; - - s = dev_boot_setup; - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { - memset(s[i].name, 0, sizeof(s[i].name)); - strlcpy(s[i].name, name, IFNAMSIZ); - memcpy(&s[i].map, map, sizeof(s[i].map)); - break; - } - } - - return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; -} - -/** - * netdev_boot_setup_check - check boot time settings - * @dev: the netdevice - * - * Check boot time settings for the device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found, 1 if they are. - */ -int netdev_boot_setup_check(struct net_device *dev) -{ - struct netdev_boot_setup *s = dev_boot_setup; - int i; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { - if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && - !strcmp(dev->name, s[i].name)) { - dev->irq = s[i].map.irq; - dev->base_addr = s[i].map.base_addr; - dev->mem_start = s[i].map.mem_start; - dev->mem_end = s[i].map.mem_end; - return 1; - } - } - return 0; -} -EXPORT_SYMBOL(netdev_boot_setup_check); - - -/** - * netdev_boot_base - get address from boot time settings - * @prefix: prefix for network device - * @unit: id for network device - * - * Check boot time settings for the base address of device. - * The found settings are set for the device to be used - * later in the device probing. - * Returns 0 if no settings found. - */ -unsigned long netdev_boot_base(const char *prefix, int unit) -{ - const struct netdev_boot_setup *s = dev_boot_setup; - char name[IFNAMSIZ]; - int i; - - sprintf(name, "%s%d", prefix, unit); - - /* - * If device already registered then return base of 1 - * to indicate not to probe for this interface - */ - if (__dev_get_by_name(&init_net, name)) - return 1; - - for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) - if (!strcmp(name, s[i].name)) - return s[i].map.base_addr; - return 0; -} - -/* - * Saves at boot time configured settings for any netdevice. - */ -int __init netdev_boot_setup(char *str) -{ - int ints[5]; - struct ifmap map; - - str = get_options(str, ARRAY_SIZE(ints), ints); - if (!str || !*str) - return 0; - - /* Save settings */ - memset(&map, 0, sizeof(map)); - if (ints[0] > 0) - map.irq = ints[1]; - if (ints[0] > 1) - map.base_addr = ints[2]; - if (ints[0] > 2) - map.mem_start = ints[3]; - if (ints[0] > 3) - map.mem_end = ints[4]; - - /* Add new entry to the list */ - return netdev_boot_setup_add(str, &map); -} - -__setup("netdev=", netdev_boot_setup); - /******************************************************************************* * * Device Interface Subroutines @@ -956,8 +831,7 @@ struct net_device *dev_get_by_name(struct net *net, const char *name) rcu_read_lock(); dev = dev_get_by_name_rcu(net, name); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -1030,8 +904,7 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex) rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -3099,6 +2972,50 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues); #endif /** + * netif_set_real_num_queues - set actual number of RX and TX queues used + * @dev: Network device + * @txq: Actual number of TX queues + * @rxq: Actual number of RX queues + * + * Set the real number of both TX and RX queues. + * Does nothing if the number of queues is already correct. + */ +int netif_set_real_num_queues(struct net_device *dev, + unsigned int txq, unsigned int rxq) +{ + unsigned int old_rxq = dev->real_num_rx_queues; + int err; + + if (txq < 1 || txq > dev->num_tx_queues || + rxq < 1 || rxq > dev->num_rx_queues) + return -EINVAL; + + /* Start from increases, so the error path only does decreases - + * decreases can't fail. + */ + if (rxq > dev->real_num_rx_queues) { + err = netif_set_real_num_rx_queues(dev, rxq); + if (err) + return err; + } + if (txq > dev->real_num_tx_queues) { + err = netif_set_real_num_tx_queues(dev, txq); + if (err) + goto undo_rx; + } + if (rxq < dev->real_num_rx_queues) + WARN_ON(netif_set_real_num_rx_queues(dev, rxq)); + if (txq < dev->real_num_tx_queues) + WARN_ON(netif_set_real_num_tx_queues(dev, txq)); + + return 0; +undo_rx: + WARN_ON(netif_set_real_num_rx_queues(dev, old_rxq)); + return err; +} +EXPORT_SYMBOL(netif_set_real_num_queues); + +/** * netif_get_num_default_rss_queues - default number of RSS queues * * This routine should set an upper limit on the number of RSS queues @@ -5878,7 +5795,7 @@ static void flush_all_backlogs(void) */ ASSERT_RTNL(); - get_online_cpus(); + cpus_read_lock(); cpumask_clear(&flush_cpus); for_each_online_cpu(cpu) { @@ -5896,7 +5813,7 @@ static void flush_all_backlogs(void) for_each_cpu(cpu, &flush_cpus) flush_work(per_cpu_ptr(&flush_works, cpu)); - put_online_cpus(); + cpus_read_unlock(); } /* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ @@ -7615,7 +7532,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, { struct netdev_adjacent *lower; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -9380,7 +9297,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } -static u8 dev_xdp_prog_count(struct net_device *dev) +u8 dev_xdp_prog_count(struct net_device *dev) { u8 count = 0; int i; @@ -9390,6 +9307,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev) count++; return count; } +EXPORT_SYMBOL_GPL(dev_xdp_prog_count); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9483,6 +9401,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack { unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; + struct net_device *upper; + struct list_head *iter; enum bpf_xdp_mode mode; bpf_op_t bpf_op; int err; @@ -9521,6 +9441,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EBUSY; } + /* don't allow if an upper device already has a program */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (dev_xdp_prog_count(upper) > 0) { + NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program"); + return -EEXIST; + } + } + cur_prog = dev_xdp_prog(dev, mode); /* can't replace attached prog with link */ if (link && cur_prog) { diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 4035bce06bf8..0e87237fd871 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -379,7 +379,14 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, void __user *data, case SIOCBRDELIF: if (!netif_device_present(dev)) return -ENODEV; - return br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); + if (!netif_is_bridge_master(dev)) + return -EOPNOTSUPP; + dev_hold(dev); + rtnl_unlock(); + err = br_ioctl_call(net, netdev_priv(dev), cmd, ifr, NULL); + dev_put(dev); + rtnl_lock(); + return err; case SIOCSHWTSTAMP: err = net_hwtstamp_validate(ifr); diff --git a/net/core/devlink.c b/net/core/devlink.c index fd2fc2befba9..b02d54ab59ac 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -804,10 +804,11 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; } -static int -devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, bool *msg_updated) +static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { u8 hw_addr[MAX_ADDR_LEN]; int hw_addr_len; @@ -816,7 +817,8 @@ devlink_port_fn_hw_addr_fill(struct devlink *devlink, const struct devlink_ops * if (!ops->port_function_hw_addr_get) return 0; - err = ops->port_function_hw_addr_get(devlink, port, hw_addr, &hw_addr_len, extack); + err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len, + extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -893,12 +895,11 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate) opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED; } -static int -devlink_port_fn_state_fill(struct devlink *devlink, - const struct devlink_ops *ops, - struct devlink_port *port, struct sk_buff *msg, - struct netlink_ext_ack *extack, - bool *msg_updated) +static int devlink_port_fn_state_fill(const struct devlink_ops *ops, + struct devlink_port *port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) { enum devlink_port_fn_opstate opstate; enum devlink_port_fn_state state; @@ -907,7 +908,7 @@ devlink_port_fn_state_fill(struct devlink *devlink, if (!ops->port_fn_state_get) return 0; - err = ops->port_fn_state_get(devlink, port, &state, &opstate, extack); + err = ops->port_fn_state_get(port, &state, &opstate, extack); if (err) { if (err == -EOPNOTSUPP) return 0; @@ -935,7 +936,6 @@ static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { - struct devlink *devlink = port->devlink; const struct devlink_ops *ops; struct nlattr *function_attr; bool msg_updated = false; @@ -945,13 +945,12 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por if (!function_attr) return -EMSGSIZE; - ops = devlink->ops; - err = devlink_port_fn_hw_addr_fill(devlink, ops, port, msg, - extack, &msg_updated); + ops = port->devlink->ops; + err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack, + &msg_updated); if (err) goto out; - err = devlink_port_fn_state_fill(devlink, ops, port, msg, extack, - &msg_updated); + err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); out: if (err || !msg_updated) nla_nest_cancel(msg, function_attr); @@ -1269,31 +1268,33 @@ out: return msg->len; } -static int devlink_port_type_set(struct devlink *devlink, - struct devlink_port *devlink_port, +static int devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type port_type) { int err; - if (devlink->ops->port_type_set) { - if (port_type == devlink_port->type) - return 0; - err = devlink->ops->port_type_set(devlink_port, port_type); - if (err) - return err; - devlink_port->desired_type = port_type; - devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + if (!devlink_port->devlink->ops->port_type_set) + return -EOPNOTSUPP; + + if (port_type == devlink_port->type) return 0; - } - return -EOPNOTSUPP; + + err = devlink_port->devlink->ops->port_type_set(devlink_port, + port_type); + if (err) + return err; + + devlink_port->desired_type = port_type; + devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); + return 0; } -static int -devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_hw_addr_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { - const struct devlink_ops *ops; + const struct devlink_ops *ops = port->devlink->ops; const u8 *hw_addr; int hw_addr_len; @@ -1314,17 +1315,16 @@ devlink_port_function_hw_addr_set(struct devlink *devlink, struct devlink_port * } } - ops = devlink->ops; if (!ops->port_function_hw_addr_set) { NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); return -EOPNOTSUPP; } - return ops->port_function_hw_addr_set(devlink, port, hw_addr, hw_addr_len, extack); + return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, + extack); } -static int devlink_port_fn_state_set(struct devlink *devlink, - struct devlink_port *port, +static int devlink_port_fn_state_set(struct devlink_port *port, const struct nlattr *attr, struct netlink_ext_ack *extack) { @@ -1332,18 +1332,18 @@ static int devlink_port_fn_state_set(struct devlink *devlink, const struct devlink_ops *ops; state = nla_get_u8(attr); - ops = devlink->ops; + ops = port->devlink->ops; if (!ops->port_fn_state_set) { NL_SET_ERR_MSG_MOD(extack, "Function does not support state setting"); return -EOPNOTSUPP; } - return ops->port_fn_state_set(devlink, port, state, extack); + return ops->port_fn_state_set(port, state, extack); } -static int -devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, - const struct nlattr *attr, struct netlink_ext_ack *extack) +static int devlink_port_function_set(struct devlink_port *port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) { struct nlattr *tb[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1]; int err; @@ -1357,7 +1357,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; if (attr) { - err = devlink_port_function_hw_addr_set(devlink, port, attr, extack); + err = devlink_port_function_hw_addr_set(port, attr, extack); if (err) return err; } @@ -1367,7 +1367,7 @@ devlink_port_function_set(struct devlink *devlink, struct devlink_port *port, */ attr = tb[DEVLINK_PORT_FN_ATTR_STATE]; if (attr) - err = devlink_port_fn_state_set(devlink, port, attr, extack); + err = devlink_port_fn_state_set(port, attr, extack); if (!err) devlink_port_notify(port, DEVLINK_CMD_PORT_NEW); @@ -1378,14 +1378,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink_port *devlink_port = info->user_ptr[1]; - struct devlink *devlink = devlink_port->devlink; int err; if (info->attrs[DEVLINK_ATTR_PORT_TYPE]) { enum devlink_port_type port_type; port_type = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_TYPE]); - err = devlink_port_type_set(devlink, devlink_port, port_type); + err = devlink_port_type_set(devlink_port, port_type); if (err) return err; } @@ -1394,7 +1393,7 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb, struct nlattr *attr = info->attrs[DEVLINK_ATTR_PORT_FUNCTION]; struct netlink_ext_ack *extack = info->extack; - err = devlink_port_function_set(devlink, devlink_port, attr, extack); + err = devlink_port_function_set(devlink_port, attr, extack); if (err) return err; } @@ -8769,24 +8768,26 @@ static bool devlink_reload_actions_valid(const struct devlink_ops *ops) * @ops: ops * @priv_size: size of user private data * @net: net namespace + * @dev: parent device * * Allocate new devlink instance resources, including devlink index * and name. */ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net) + size_t priv_size, struct net *net, + struct device *dev) { struct devlink *devlink; - if (WARN_ON(!ops)) - return NULL; - + WARN_ON(!ops || !dev); if (!devlink_reload_actions_valid(ops)) return NULL; devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL); if (!devlink) return NULL; + + devlink->dev = dev; devlink->ops = ops; xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); write_pnet(&devlink->_net, net); @@ -8811,12 +8812,9 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns); * devlink_register - Register devlink instance * * @devlink: devlink - * @dev: parent device */ -int devlink_register(struct devlink *devlink, struct device *dev) +int devlink_register(struct devlink *devlink) { - WARN_ON(devlink->dev); - devlink->dev = dev; mutex_lock(&devlink_mutex); list_add_tail(&devlink->list, &devlink_list); devlink_notify(devlink, DEVLINK_CMD_NEW); @@ -9327,18 +9325,10 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: - case DEVLINK_PORT_FLAVOUR_VIRTUAL: n = snprintf(name, len, "p%u", attrs->phys.port_number); if (n < len && attrs->split) n += snprintf(name + n, len - n, "s%u", attrs->phys.split_subport_number); - if (!attrs->split) - n = snprintf(name, len, "p%u", attrs->phys.port_number); - else - n = snprintf(name, len, "p%us%u", - attrs->phys.port_number, - attrs->phys.split_subport_number); - break; case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: @@ -9380,6 +9370,8 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, n = snprintf(name, len, "pf%usf%u", attrs->pci_sf.pf, attrs->pci_sf.sf); break; + case DEVLINK_PORT_FLAVOUR_VIRTUAL: + return -EOPNOTSUPP; } if (n >= len) diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index ead2a8aa57b4..49442cae6f69 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -850,8 +850,7 @@ net_dm_hw_metadata_copy(const struct devlink_trap_metadata *metadata) } hw_metadata->input_dev = metadata->input_dev; - if (hw_metadata->input_dev) - dev_hold(hw_metadata->input_dev); + dev_hold(hw_metadata->input_dev); return hw_metadata; @@ -867,8 +866,7 @@ free_hw_metadata: static void net_dm_hw_metadata_free(const struct devlink_trap_metadata *hw_metadata) { - if (hw_metadata->input_dev) - dev_put(hw_metadata->input_dev); + dev_put(hw_metadata->input_dev); kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); diff --git a/net/core/dst.c b/net/core/dst.c index fb3bcba87744..497ef9b3fc6a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -49,8 +49,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops, unsigned short flags) { dst->dev = dev; - if (dev) - dev_hold(dev); + dev_hold(dev); dst->ops = ops; dst_init_metrics(dst, dst_default_metrics.metrics, true); dst->expires = 0UL; @@ -118,8 +117,7 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) if (dst->ops->destroy) dst->ops->destroy(dst); - if (dst->dev) - dev_put(dst->dev); + dev_put(dst->dev); lwtstate_put(dst->lwtstate); diff --git a/net/core/filter.c b/net/core/filter.c index 3b4986e96e9c..3aca07c44fad 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2180,17 +2180,9 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, hh_len); - if (unlikely(!skb2)) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); @@ -2214,8 +2206,7 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, } rcu_read_unlock_bh(); if (dst) - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; @@ -2287,17 +2278,9 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, skb->tstamp = 0; if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, hh_len); - if (unlikely(!skb2)) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } rcu_read_lock_bh(); @@ -3950,6 +3933,31 @@ void bpf_clear_redirect_map(struct bpf_map *map) } } +DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); +EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp) +{ + struct net_device *master, *slave; + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); + slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); + if (slave && slave != xdp->rxq->dev) { + /* The target device is different from the receiving device, so + * redirect it to the new device. + * Using XDP_REDIRECT gets the correct behaviour from XDP enabled + * drivers to unmap the packet from their rx ring. + */ + ri->tgt_index = slave->ifindex; + ri->map_id = INT_MAX; + ri->map_type = BPF_MAP_TYPE_UNSPEC; + return XDP_REDIRECT; + } + return XDP_TX; +} +EXPORT_SYMBOL_GPL(xdp_master_redirect); + int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { @@ -5016,6 +5024,40 @@ err_clear: return -EINVAL; } +BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return _bpf_setsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_setsockopt_proto = { + .func = bpf_sk_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level, + int, optname, char *, optval, int, optlen) +{ + return _bpf_getsockopt(sk, level, optname, optval, optlen); +} + +const struct bpf_func_proto bpf_sk_getsockopt_proto = { + .func = bpf_sk_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, int, level, int, optname, char *, optval, int, optlen) { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 39d7be03e568..bac0184cf3de 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1508,7 +1508,7 @@ __be32 flow_get_u32_dst(const struct flow_keys *flow) } EXPORT_SYMBOL(flow_get_u32_dst); -/* Sort the source and destination IP (and the ports if the IP are the same), +/* Sort the source and destination IP and the ports, * to have consistent hash within the two directions */ static inline void __flow_hash_consistentify(struct flow_keys *keys) @@ -1519,11 +1519,11 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) case FLOW_DISSECTOR_KEY_IPV4_ADDRS: addr_diff = (__force u32)keys->addrs.v4addrs.dst - (__force u32)keys->addrs.v4addrs.src; - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); + + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; @@ -1531,13 +1531,13 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) addr_diff = memcmp(&keys->addrs.v6addrs.dst, &keys->addrs.v6addrs.src, sizeof(keys->addrs.v6addrs.dst)); - if ((addr_diff < 0) || - (addr_diff == 0 && - ((__force u16)keys->ports.dst < - (__force u16)keys->ports.src))) { + if (addr_diff < 0) { for (i = 0; i < 4; i++) swap(keys->addrs.v6addrs.src.s6_addr32[i], keys->addrs.v6addrs.dst.s6_addr32[i]); + } + if ((__force u16)keys->ports.dst < + (__force u16)keys->ports.src) { swap(keys->ports.src, keys->ports.dst); } break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 53e85c70c6e5..2d5bc3a75fae 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -741,12 +741,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; - if (dev) - dev_hold(dev); + dev_hold(dev); if (tbl->pconstructor && tbl->pconstructor(n)) { - if (dev) - dev_put(dev); + dev_put(dev); kfree(n); n = NULL; goto out; @@ -778,8 +776,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, write_unlock_bh(&tbl->lock); if (tbl->pdestructor) tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); + dev_put(n->dev); kfree(n); return 0; } @@ -812,8 +809,7 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, n->next = NULL; if (tbl->pdestructor) tbl->pdestructor(n); - if (n->dev) - dev_put(n->dev); + dev_put(n->dev); kfree(n); } return -ENOENT; @@ -1662,8 +1658,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) list_del(&parms->list); parms->dead = 1; write_unlock_bh(&tbl->lock); - if (parms->dev) - dev_put(parms->dev); + dev_put(parms->dev); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); } EXPORT_SYMBOL(neigh_parms_release); @@ -2533,6 +2528,13 @@ static bool neigh_master_filtered(struct net_device *dev, int master_idx) return false; master = dev ? netdev_master_upper_dev_get(dev) : NULL; + + /* 0 is already used to denote NDA_MASTER wasn't passed, therefore need another + * invalid value for ifindex to denote "no master". + */ + if (master_idx == -1) + return !!master; + if (!master || master->ifindex != master_idx) return true; @@ -3315,12 +3317,13 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v) struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); + seq_puts(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); return 0; } - seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " - "%08lx %08lx %08lx %08lx %08lx %08lx\n", + seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " + "%08lx %08lx %08lx " + "%08lx %08lx %08lx\n", atomic_read(&tbl->entries), st->allocs, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 5e4eb45b139c..ac116041b35f 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -24,6 +24,8 @@ #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) +#define BIAS_MAX LONG_MAX + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -67,6 +69,10 @@ static int page_pool_init(struct page_pool *pool, */ } + if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT && + pool->p.flags & PP_FLAG_PAGE_FRAG) + return -EINVAL; + if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) return -ENOMEM; @@ -206,6 +212,19 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page) return true; } +static void page_pool_set_pp_info(struct page_pool *pool, + struct page *page) +{ + page->pp = pool; + page->pp_magic |= PP_SIGNATURE; +} + +static void page_pool_clear_pp_info(struct page *page) +{ + page->pp_magic = 0; + page->pp = NULL; +} + static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { @@ -222,7 +241,7 @@ static struct page *__page_pool_alloc_page_order(struct page_pool *pool, return NULL; } - page->pp_magic |= PP_SIGNATURE; + page_pool_set_pp_info(pool, page); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -266,7 +285,8 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool, put_page(page); continue; } - page->pp_magic |= PP_SIGNATURE; + + page_pool_set_pp_info(pool, page); pool->alloc.cache[pool->alloc.count++] = page; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; @@ -345,7 +365,7 @@ void page_pool_release_page(struct page_pool *pool, struct page *page) DMA_ATTR_SKIP_CPU_SYNC); page_pool_set_dma_addr(page, 0); skip_dma_unmap: - page->pp_magic = 0; + page_pool_clear_pp_info(page); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. @@ -405,6 +425,11 @@ static __always_inline struct page * __page_pool_put_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { + /* It is not the last user for the page frag case */ + if (pool->p.flags & PP_FLAG_PAGE_FRAG && + page_pool_atomic_sub_frag_count_return(page, 1)) + return NULL; + /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. @@ -497,6 +522,84 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, } EXPORT_SYMBOL(page_pool_put_page_bulk); +static struct page *page_pool_drain_frag(struct page_pool *pool, + struct page *page) +{ + long drain_count = BIAS_MAX - pool->frag_users; + + /* Some user is still using the page frag */ + if (likely(page_pool_atomic_sub_frag_count_return(page, + drain_count))) + return NULL; + + if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) { + if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) + page_pool_dma_sync_for_device(pool, page, -1); + + return page; + } + + page_pool_return_page(pool, page); + return NULL; +} + +static void page_pool_free_frag(struct page_pool *pool) +{ + long drain_count = BIAS_MAX - pool->frag_users; + struct page *page = pool->frag_page; + + pool->frag_page = NULL; + + if (!page || + page_pool_atomic_sub_frag_count_return(page, drain_count)) + return; + + page_pool_return_page(pool, page); +} + +struct page *page_pool_alloc_frag(struct page_pool *pool, + unsigned int *offset, + unsigned int size, gfp_t gfp) +{ + unsigned int max_size = PAGE_SIZE << pool->p.order; + struct page *page = pool->frag_page; + + if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) || + size > max_size)) + return NULL; + + size = ALIGN(size, dma_get_cache_alignment()); + *offset = pool->frag_offset; + + if (page && *offset + size > max_size) { + page = page_pool_drain_frag(pool, page); + if (page) + goto frag_reset; + } + + if (!page) { + page = page_pool_alloc_pages(pool, gfp); + if (unlikely(!page)) { + pool->frag_page = NULL; + return NULL; + } + + pool->frag_page = page; + +frag_reset: + pool->frag_users = 1; + *offset = 0; + pool->frag_offset = size; + page_pool_set_frag_count(page, BIAS_MAX); + return page; + } + + pool->frag_users++; + pool->frag_offset = *offset + size; + return page; +} +EXPORT_SYMBOL(page_pool_alloc_frag); + static void page_pool_empty_ring(struct page_pool *pool) { struct page *page; @@ -602,6 +705,8 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_put(pool)) return; + page_pool_free_frag(pool); + if (!page_pool_release(pool)) return; @@ -644,7 +749,6 @@ bool page_pool_return_skb_page(struct page *page) * The page will be returned to the pool here regardless of the * 'flipped' fragment being in use or not. */ - page->pp = NULL; page_pool_put_full_page(pp, page, false); return true; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 7e258d255e90..314f97acf39d 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1190,11 +1190,6 @@ static ssize_t pktgen_if_write(struct file *file, * pktgen_xmit() is called */ pkt_dev->last_ok = 1; - - /* override clone_skb if user passed default value - * at module loading time - */ - pkt_dev->clone_skb = 0; } else if (strcmp(f, "queue_xmit") == 0) { pkt_dev->xmit_mode = M_QUEUE_XMIT; pkt_dev->last_ok = 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e79aaf1f7139..2dcf1c084b20 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1959,6 +1959,13 @@ static bool link_master_filtered(struct net_device *dev, int master_idx) return false; master = netdev_master_upper_dev_get(dev); + + /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need + * another invalid value for ifindex to denote "no master". + */ + if (master_idx == -1) + return !!master; + if (!master || master->ifindex != master_idx) return true; @@ -2257,7 +2264,8 @@ invalid_attr: return -EINVAL; } -static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) +static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], + struct netlink_ext_ack *extack) { if (dev) { if (tb[IFLA_ADDRESS] && @@ -2284,7 +2292,7 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) return -EOPNOTSUPP; if (af_ops->validate_link_af) { - err = af_ops->validate_link_af(dev, af); + err = af_ops->validate_link_af(dev, af, extack); if (err < 0) return err; } @@ -2592,7 +2600,7 @@ static int do_setlink(const struct sk_buff *skb, const struct net_device_ops *ops = dev->netdev_ops; int err; - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; @@ -3290,7 +3298,7 @@ replay: m_ops = master_dev->rtnl_link_ops; } - err = validate_linkmsg(dev, tb); + err = validate_linkmsg(dev, tb, extack); if (err < 0) return err; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fcbd977186b0..9240af2ea8c9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1790,6 +1790,48 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) EXPORT_SYMBOL(skb_realloc_headroom); /** + * skb_expand_head - reallocate header of &sk_buff + * @skb: buffer to reallocate + * @headroom: needed headroom + * + * Unlike skb_realloc_headroom, this one does not allocate a new skb + * if possible; copies skb->sk to new skb as needed + * and frees original skb in case of failures. + * + * It expect increased headroom and generates warning otherwise. + */ + +struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom) +{ + int delta = headroom - skb_headroom(skb); + + if (WARN_ONCE(delta <= 0, + "%s is expecting an increase in the headroom", __func__)) + return skb; + + /* pskb_expand_head() might crash, if skb is shared */ + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (likely(nskb)) { + if (skb->sk) + skb_set_owner_w(nskb, skb->sk); + consume_skb(skb); + } else { + kfree_skb(skb); + } + skb = nskb; + } + if (skb && + pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { + kfree_skb(skb); + skb = NULL; + } + return skb; +} +EXPORT_SYMBOL(skb_expand_head); + +/** * skb_copy_expand - copy and expand sk_buff * @skb: buffer to copy * @newheadroom: new free bytes at head @@ -4327,7 +4369,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ - new_truesize = SKB_TRUESIZE(sizeof(struct sk_buff)); + new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); delta_truesize = skb->truesize - new_truesize; skb->truesize = new_truesize; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 15d71288e741..2d6249b28928 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -584,29 +584,42 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, return sk_psock_skb_ingress(psock, skb); } -static void sock_drop(struct sock *sk, struct sk_buff *skb) +static void sk_psock_skb_state(struct sk_psock *psock, + struct sk_psock_work_state *state, + struct sk_buff *skb, + int len, int off) { - sk_drops_add(sk, skb); - kfree_skb(skb); + spin_lock_bh(&psock->ingress_lock); + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { + state->skb = skb; + state->len = len; + state->off = off; + } else { + sock_drop(psock->sk, skb); + } + spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_backlog(struct work_struct *work) { struct sk_psock *psock = container_of(work, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; - struct sk_buff *skb; + struct sk_buff *skb = NULL; bool ingress; u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (state->skb) { + if (unlikely(state->skb)) { + spin_lock_bh(&psock->ingress_lock); skb = state->skb; len = state->len; off = state->off; state->skb = NULL; - goto start; + spin_unlock_bh(&psock->ingress_lock); } + if (skb) + goto start; while ((skb = skb_dequeue(&psock->ingress_skb))) { len = skb->len; @@ -621,9 +634,8 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - state->skb = skb; - state->len = len; - state->off = off; + sk_psock_skb_state(psock, state, skb, + len, off); goto end; } /* Hard errors break pipe and stop xmit. */ @@ -722,6 +734,11 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } + kfree_skb(psock->work_state.skb); + /* We null the skb here to ensure that calls to sk_psock_backlog + * do not pick up the free'd skb. + */ + psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } @@ -773,8 +790,6 @@ static void sk_psock_destroy(struct work_struct *work) void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - sk_psock_stop(psock, false); - write_lock_bh(&sk->sk_callback_lock); sk_psock_restore_proto(sk, psock); rcu_assign_sk_user_data(sk, NULL); @@ -784,6 +799,8 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) sk_psock_stop_verdict(sk, psock); write_unlock_bh(&sk->sk_callback_lock); + sk_psock_stop(psock, false); + INIT_RCU_WORK(&psock->rwork, sk_psock_destroy); queue_rcu_work(system_wq, &psock->rwork); } diff --git a/net/core/sock.c b/net/core/sock.c index 9671c32e6ef5..aada649e07e8 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1358,6 +1358,15 @@ set_sndbuf: ret = sock_bindtoindex_locked(sk, val); break; + case SO_BUF_LOCK: + if (val & ~SOCK_BUF_LOCK_MASK) { + ret = -EINVAL; + break; + } + sk->sk_userlocks = val | (sk->sk_userlocks & + ~SOCK_BUF_LOCK_MASK); + break; + default: ret = -ENOPROTOOPT; break; @@ -1720,6 +1729,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_net(sk)->net_cookie; break; + case SO_BUF_LOCK: + v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index d1c50a48614b..0ee7d4c0c955 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -521,8 +521,7 @@ int dn_dev_set_default(struct net_device *dev, int force) } spin_unlock(&dndev_lock); - if (old) - dev_put(old); + dev_put(old); return rv; } @@ -536,8 +535,7 @@ static void dn_dev_check_default(struct net_device *dev) } spin_unlock(&dndev_lock); - if (dev) - dev_put(dev); + dev_put(dev); } /* diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 387a7e81dd00..269c029ad74f 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -92,8 +92,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) } change_nexthops(fi) { - if (nh->nh_dev) - dev_put(nh->nh_dev); + dev_put(nh->nh_dev); nh->nh_dev = NULL; } endfor_nexthops(fi); kfree(fi); @@ -389,7 +388,7 @@ link_it: return ofi; } - refcount_inc(&fi->fib_treeref); + refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 729d3de6020d..7e85f2a1ae25 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1026,8 +1026,7 @@ source_ok: if (!fld.daddr) { fld.daddr = fld.saddr; - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); err = -EINVAL; dev_out = init_net.loopback_dev; if (!dev_out->dn_ptr) @@ -1084,8 +1083,7 @@ source_ok: neigh_release(neigh); neigh = NULL; } else { - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); if (dn_dev_islocal(neigh->dev, fld.daddr)) { dev_out = init_net.loopback_dev; res.type = RTN_LOCAL; @@ -1144,8 +1142,7 @@ select_source: if (res.type == RTN_LOCAL) { if (!fld.saddr) fld.saddr = fld.daddr; - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); dev_out = init_net.loopback_dev; dev_hold(dev_out); if (!dev_out->dn_ptr) @@ -1168,8 +1165,7 @@ select_source: if (!fld.saddr) fld.saddr = DN_FIB_RES_PREFSRC(res); - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); dev_out = DN_FIB_RES_DEV(res); dev_hold(dev_out); fld.flowidn_oif = dev_out->ifindex; @@ -1222,8 +1218,7 @@ done: neigh_release(neigh); if (free_res) dn_fib_res_put(&res); - if (dev_out) - dev_put(dev_out); + dev_put(dev_out); out: return err; @@ -1503,8 +1498,7 @@ done: if (free_res) dn_fib_res_put(&res); dev_put(in_dev); - if (out_dev) - dev_put(out_dev); + dev_put(out_dev); out: return err; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index bca1b5d66df2..970906eb5b2c 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -138,6 +138,7 @@ config NET_DSA_TAG_LAN9303 config NET_DSA_TAG_SJA1105 tristate "Tag driver for NXP SJA1105 switches" + depends on (NET_DSA_SJA1105 && NET_DSA_SJA1105_PTP) || !NET_DSA_SJA1105 || !NET_DSA_SJA1105_PTP select PACKING help Say Y or M if you want to enable support for tagging frames with the diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 84cad1be9ce4..1dc45e40f961 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -238,7 +238,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = cpu_dp->rcv(skb, dev, pt); + nskb = cpu_dp->rcv(skb, dev); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c7fa85fb3086..8150e16aaa55 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -311,6 +311,9 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +/* Assign the default CPU port (the first one in the tree) to all ports of the + * fabric which don't already have one as part of their own switch. + */ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { struct dsa_port *cpu_dp, *dp; @@ -321,15 +324,48 @@ static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) return -EINVAL; } - /* Assign the default CPU port to all ports of the fabric */ - list_for_each_entry(dp, &dst->ports, list) + list_for_each_entry(dp, &dst->ports, list) { + if (dp->cpu_dp) + continue; + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) dp->cpu_dp = cpu_dp; + } return 0; } -static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) +/* Perform initial assignment of CPU ports to user ports and DSA links in the + * fabric, giving preference to CPU ports local to each switch. Default to + * using the first CPU port in the switch tree if the port does not have a CPU + * port local to this switch. + */ +static int dsa_tree_setup_cpu_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp, *dp; + + list_for_each_entry(cpu_dp, &dst->ports, list) { + if (!dsa_port_is_cpu(cpu_dp)) + continue; + + list_for_each_entry(dp, &dst->ports, list) { + /* Prefer a local CPU port */ + if (dp->ds != cpu_dp->ds) + continue; + + /* Prefer the first local CPU port found */ + if (dp->cpu_dp) + continue; + + if (dsa_port_is_user(dp) || dsa_port_is_dsa(dp)) + dp->cpu_dp = cpu_dp; + } + } + + return dsa_tree_setup_default_cpu(dst); +} + +static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -710,13 +746,14 @@ static int dsa_switch_setup(struct dsa_switch *ds) /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables */ - ds->devlink = devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv)); + ds->devlink = + devlink_alloc(&dsa_devlink_ops, sizeof(*dl_priv), ds->dev); if (!ds->devlink) return -ENOMEM; dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - err = devlink_register(ds->devlink, ds->dev); + err = devlink_register(ds->devlink); if (err) goto free_devlink; @@ -921,13 +958,13 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) if (!complete) return 0; - err = dsa_tree_setup_default_cpu(dst); + err = dsa_tree_setup_cpu_ports(dst); if (err) return err; err = dsa_tree_setup_switches(dst); if (err) - goto teardown_default_cpu; + goto teardown_cpu_ports; err = dsa_tree_setup_master(dst); if (err) @@ -947,8 +984,8 @@ teardown_master: dsa_tree_teardown_master(dst); teardown_switches: dsa_tree_teardown_switches(dst); -teardown_default_cpu: - dsa_tree_teardown_default_cpu(dst); +teardown_cpu_ports: + dsa_tree_teardown_cpu_ports(dst); return err; } @@ -966,7 +1003,7 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_switches(dst); - dsa_tree_teardown_default_cpu(dst); + dsa_tree_teardown_cpu_ports(dst); list_for_each_entry_safe(dl, next, &dst->rtable, list) { list_del(&dl->list); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index e43c5dc04282..9575cabd3ec3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -199,7 +199,7 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, /* port.c */ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); -int dsa_port_set_state(struct dsa_port *dp, u8 state); +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age); int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy); int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); void dsa_port_disable_rt(struct dsa_port *dp); @@ -241,11 +241,9 @@ int dsa_port_host_mdb_del(const struct dsa_port *dp, int dsa_port_pre_bridge_flags(const struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); diff --git a/net/dsa/port.c b/net/dsa/port.c index b927d94b6934..831d50d28d59 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -30,7 +30,52 @@ static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) return dsa_tree_notify(dp->ds->dst, e, v); } -int dsa_port_set_state(struct dsa_port *dp, u8 state) +static void dsa_port_notify_bridge_fdb_flush(const struct dsa_port *dp) +{ + struct net_device *brport_dev = dsa_port_to_bridge_port(dp); + struct switchdev_notifier_fdb_info info = { + /* flush all VLANs */ + .vid = 0, + }; + + /* When the port becomes standalone it has already left the bridge. + * Don't notify the bridge in that case. + */ + if (!brport_dev) + return; + + call_switchdev_notifiers(SWITCHDEV_FDB_FLUSH_TO_BRIDGE, + brport_dev, &info.info, NULL); +} + +static void dsa_port_fast_age(const struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + + if (!ds->ops->port_fast_age) + return; + + ds->ops->port_fast_age(ds, dp->index); + + dsa_port_notify_bridge_fdb_flush(dp); +} + +static bool dsa_port_can_configure_learning(struct dsa_port *dp) +{ + struct switchdev_brport_flags flags = { + .mask = BR_LEARNING, + }; + struct dsa_switch *ds = dp->ds; + int err; + + if (!ds->ops->port_bridge_flags || !ds->ops->port_pre_bridge_flags) + return false; + + err = ds->ops->port_pre_bridge_flags(ds, dp->index, flags, NULL); + return !err; +} + +int dsa_port_set_state(struct dsa_port *dp, u8 state, bool do_fast_age) { struct dsa_switch *ds = dp->ds; int port = dp->index; @@ -40,10 +85,14 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) ds->ops->port_stp_state_set(ds, port, state); - if (ds->ops->port_fast_age) { + if (!dsa_port_can_configure_learning(dp) || + (do_fast_age && dp->learning)) { /* Fast age FDB entries or flush appropriate forwarding database * for the given port, if we are moving it from Learning or * Forwarding state, to Disabled or Blocking or Listening state. + * Ports that were standalone before the STP state change don't + * need to fast age the FDB, since address learning is off in + * standalone mode. */ if ((dp->stp_state == BR_STATE_LEARNING || @@ -51,7 +100,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) (state == BR_STATE_DISABLED || state == BR_STATE_BLOCKING || state == BR_STATE_LISTENING)) - ds->ops->port_fast_age(ds, port); + dsa_port_fast_age(dp); } dp->stp_state = state; @@ -59,11 +108,12 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state) return 0; } -static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state, + bool do_fast_age) { int err; - err = dsa_port_set_state(dp, state); + err = dsa_port_set_state(dp, state, do_fast_age); if (err) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } @@ -81,7 +131,7 @@ int dsa_port_enable_rt(struct dsa_port *dp, struct phy_device *phy) } if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, false); if (dp->pl) phylink_start(dp->pl); @@ -109,7 +159,7 @@ void dsa_port_disable_rt(struct dsa_port *dp) phylink_stop(dp->pl); if (!dp->bridge_dev) - dsa_port_set_state_now(dp, BR_STATE_DISABLED); + dsa_port_set_state_now(dp, BR_STATE_DISABLED, false); if (ds->ops->port_disable) ds->ops->port_disable(ds, port); @@ -178,7 +228,7 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, if (err) return err; - err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev)); + err = dsa_port_set_state(dp, br_port_get_stp_state(brport_dev), false); if (err && err != -EOPNOTSUPP) return err; @@ -186,10 +236,6 @@ static int dsa_port_switchdev_sync_attrs(struct dsa_port *dp, if (err && err != -EOPNOTSUPP) return err; - err = dsa_port_mrouter(dp->cpu_dp, br_multicast_router(br), extack); - if (err && err != -EOPNOTSUPP) - return err; - err = dsa_port_ageing_time(dp, br_get_ageing_time(br)); if (err && err != -EOPNOTSUPP) return err; @@ -215,16 +261,10 @@ static void dsa_port_switchdev_unsync_attrs(struct dsa_port *dp) /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - dsa_port_set_state_now(dp, BR_STATE_FORWARDING); + dsa_port_set_state_now(dp, BR_STATE_FORWARDING, true); /* VLAN filtering is handled by dsa_switch_bridge_leave */ - /* Some drivers treat the notification for having a local multicast - * router by allowing multicast to be flooded to the CPU, so we should - * allow this in standalone mode too. - */ - dsa_port_mrouter(dp->cpu_dp, true, NULL); - /* Ageing time may be global to the switch chip, so don't change it * here because we have no good reason (or value) to change it to. */ @@ -639,27 +679,35 @@ int dsa_port_pre_bridge_flags(const struct dsa_port *dp, return ds->ops->port_pre_bridge_flags(ds, dp->index, flags, extack); } -int dsa_port_bridge_flags(const struct dsa_port *dp, +int dsa_port_bridge_flags(struct dsa_port *dp, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack) { struct dsa_switch *ds = dp->ds; + int err; if (!ds->ops->port_bridge_flags) return -EOPNOTSUPP; - return ds->ops->port_bridge_flags(ds, dp->index, flags, extack); -} + err = ds->ops->port_bridge_flags(ds, dp->index, flags, extack); + if (err) + return err; -int dsa_port_mrouter(struct dsa_port *dp, bool mrouter, - struct netlink_ext_ack *extack) -{ - struct dsa_switch *ds = dp->ds; + if (flags.mask & BR_LEARNING) { + bool learning = flags.val & BR_LEARNING; - if (!ds->ops->port_set_mrouter) - return -EOPNOTSUPP; + if (learning == dp->learning) + return 0; + + if ((dp->learning && !learning) && + (dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING)) + dsa_port_fast_age(dp); - return ds->ops->port_set_mrouter(ds, dp->index, mrouter, extack); + dp->learning = learning; + } + + return 0; } int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 6e1135d3ee33..acf73db5cafc 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -286,7 +286,7 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) return -EOPNOTSUPP; - ret = dsa_port_set_state(dp, attr->u.stp_state); + ret = dsa_port_set_state(dp, attr->u.stp_state, true); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) @@ -314,12 +314,6 @@ static int dsa_slave_port_attr_set(struct net_device *dev, const void *ctx, ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; - case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: - if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) - return -EOPNOTSUPP; - - ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack); - break; default: ret = -EOPNOTSUPP; break; diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 0efae1a372b3..8a02ac44282f 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -44,8 +44,7 @@ static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb, } static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb, - struct net_device *ndev, - struct packet_type *pt) + struct net_device *ndev) { u8 ver, port; u16 hdr; diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index a27f5096777a..96e93b544a0d 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -136,7 +136,6 @@ static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, */ static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, unsigned int offset) { int source_port; @@ -182,13 +181,12 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, } -static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev) { struct sk_buff *nskb; /* skb->data points to the EtherType, the tag is right before it */ - nskb = brcm_tag_rcv_ll(skb, dev, pt, 2); + nskb = brcm_tag_rcv_ll(skb, dev, 2); if (!nskb) return nskb; @@ -251,8 +249,7 @@ static struct sk_buff *brcm_leg_tag_xmit(struct sk_buff *skb, } static struct sk_buff *brcm_leg_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int source_port; u8 *brcm_tag; @@ -302,11 +299,10 @@ static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, } static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* tag is prepended to the packet */ - return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); + return brcm_tag_rcv_ll(skb, dev, ETH_HLEN); } static const struct dsa_device_ops brcm_prepend_netdev_ops = { diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 3607499d0697..e32f8160e895 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -332,8 +332,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) return dsa_xmit_ll(skb, dev, 0); } -static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev) { if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) return NULL; @@ -373,8 +372,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev) { if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) return NULL; diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 5985dab06ab8..df7140984da3 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -75,8 +75,7 @@ static struct sk_buff *gswip_tag_xmit(struct sk_buff *skb, } static struct sk_buff *gswip_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { int port; u8 *gswip_tag; diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index c41208cbd936..f64b805303cd 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -29,8 +29,7 @@ static struct sk_buff *hellcreek_xmit(struct sk_buff *skb, } static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - HELLCREEK_TAG_LEN; diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 1c2dfa80f9b0..fa1d60d13ad9 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -67,8 +67,7 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; @@ -134,8 +133,7 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) { /* Tag decoding */ u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index cf7cf2fa1240..58d3a0e712d2 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -74,8 +74,7 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev) { __be16 *lan9303_tag; u16 lan9303_tag1; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 3fb80e43f3a5..bbf37c031d44 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -61,8 +61,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, return skb; } -static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev) { u16 hdr; int port; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 3252634a29b8..d37ab98e7fe1 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -55,8 +55,7 @@ static struct sk_buff *seville_xmit(struct sk_buff *skb, } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { u64 src_port, qos_class; u64 vlan_tci, tag_type; diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index c95de71d13b0..3038a257ba05 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -38,8 +38,7 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb, } static struct sk_buff *ocelot_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { int src_port, switch_id; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 693bda013065..6e3136990491 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -48,8 +48,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev) { u8 ver; u16 hdr; diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index f6b63aad6551..aaddca3c0245 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -64,8 +64,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, } static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb, - struct net_device *dev, - struct packet_type *pt) + struct net_device *dev) { u16 protport; __be16 *p; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 664cb802b71a..38b2792f971d 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -391,8 +391,7 @@ static void sja1105_vlan_rcv(struct sk_buff *skb, int *source_port, } static struct sk_buff *sja1105_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { int source_port = -1, switch_id = -1; struct sja1105_meta meta = {0}; @@ -546,12 +545,11 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, } static struct sk_buff *sja1110_rcv(struct sk_buff *skb, - struct net_device *netdev, - struct packet_type *pt) + struct net_device *netdev) { int source_port = -1, switch_id = -1; bool host_only = false; - u16 vid; + u16 vid = 0; if (sja1110_skb_has_inband_control_extension(skb)) { skb = sja1110_rcv_inband_control_extension(skb, &source_port, diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index ba73804340a5..5749ba85c2b8 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -24,8 +24,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev) { u8 *trailer; int source_port; diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index da231c16ac82..ff442b8af636 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -25,8 +25,7 @@ static struct sk_buff *xrs700x_xmit(struct sk_buff *skb, struct net_device *dev) return skb; } -static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) +static struct sk_buff *xrs700x_rcv(struct sk_buff *skb, struct net_device *dev) { int source_port; u8 *trailer; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 171ba75b74c9..73fce9467467 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,8 +62,6 @@ #include <linux/uaccess.h> #include <net/pkt_sched.h> -__setup("ether=", netdev_boot_setup); - /** * eth_header - create the Ethernet header * @skb: buffer to alter diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index b0fa2b00ad43..81fa36a4c9c4 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -24,6 +24,7 @@ #include <linux/rtnetlink.h> #include <linux/sched/signal.h> #include <linux/net.h> +#include <linux/pm_runtime.h> #include <net/devlink.h> #include <net/xdp_sock_drv.h> #include <net/flow_offload.h> @@ -2692,7 +2693,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) int rc; netdev_features_t old_features; - if (!dev || !netif_device_present(dev)) + if (!dev) return -ENODEV; if (copy_from_user(ðcmd, useraddr, sizeof(ethcmd))) @@ -2748,10 +2749,18 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) return -EPERM; } + if (dev->dev.parent) + pm_runtime_get_sync(dev->dev.parent); + + if (!netif_device_present(dev)) { + rc = -ENODEV; + goto out; + } + if (dev->ethtool_ops->begin) { rc = dev->ethtool_ops->begin(dev); - if (rc < 0) - return rc; + if (rc < 0) + goto out; } old_features = dev->features; @@ -2970,6 +2979,9 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *useraddr) if (old_features != dev->features) netdev_features_change(dev); +out: + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); return rc; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 73e0f5b626bf..1797a0a90019 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -2,6 +2,7 @@ #include <net/sock.h> #include <linux/ethtool_netlink.h> +#include <linux/pm_runtime.h> #include "netlink.h" static struct genl_family ethtool_genl_family; @@ -29,6 +30,44 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; +int ethnl_ops_begin(struct net_device *dev) +{ + int ret; + + if (!dev) + return -ENODEV; + + if (dev->dev.parent) + pm_runtime_get_sync(dev->dev.parent); + + if (!netif_device_present(dev)) { + ret = -ENODEV; + goto err; + } + + if (dev->ethtool_ops->begin) { + ret = dev->ethtool_ops->begin(dev); + if (ret) + goto err; + } + + return 0; +err: + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); + + return ret; +} + +void ethnl_ops_complete(struct net_device *dev) +{ + if (dev->ethtool_ops->complete) + dev->ethtool_ops->complete(dev); + + if (dev->dev.parent) + pm_runtime_put(dev->dev.parent); +} + /** * ethnl_parse_header_dev_get() - parse request header * @req_info: structure to put results into @@ -101,12 +140,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } - if (dev && !netif_device_present(dev)) { - dev_put(dev); - NL_SET_ERR_MSG(extack, "device not present"); - return -ENODEV; - } - req_info->dev = dev; req_info->flags = flags; return 0; @@ -365,8 +398,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ops->cleanup_data(reply_data); genlmsg_end(rskb, reply_payload); - if (req_info->dev) - dev_put(req_info->dev); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return genlmsg_reply(rskb, info); @@ -378,8 +410,7 @@ err_cleanup: if (ops->cleanup_data) ops->cleanup_data(reply_data); err_dev: - if (req_info->dev) - dev_put(req_info->dev); + dev_put(req_info->dev); kfree(reply_data); kfree(req_info); return ret; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 3fc395c86702..077aac3929a8 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -247,19 +247,8 @@ struct ethnl_reply_data { struct net_device *dev; }; -static inline int ethnl_ops_begin(struct net_device *dev) -{ - if (dev && dev->ethtool_ops->begin) - return dev->ethtool_ops->begin(dev); - else - return 0; -} - -static inline void ethnl_ops_complete(struct net_device *dev) -{ - if (dev && dev->ethtool_ops->complete) - dev->ethtool_ops->complete(dev); -} +int ethnl_ops_begin(struct net_device *dev); +void ethnl_ops_complete(struct net_device *dev); /** * struct ethnl_request_ops - unified handling of GET requests diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 88215b5c93aa..dd5a45f8a78a 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -340,8 +340,7 @@ nla_put_failure: out_dev: wpan_phy_put(phy); out: - if (dev) - dev_put(dev); + dev_put(dev); return rc; } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 0cf2374c143b..277124f206e0 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -2226,8 +2226,7 @@ static void nl802154_post_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL802154_FLAG_NEED_WPAN_DEV) { struct wpan_dev *wpan_dev = info->user_ptr[1]; - if (wpan_dev->netdev) - dev_put(wpan_dev->netdev); + dev_put(wpan_dev->netdev); } else { dev_put(info->user_ptr[1]); } diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index f5077de3619e..90233efa1f6b 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -41,8 +41,7 @@ ieee802154_get_dev(struct net *net, const struct ieee802154_addr *addr) ieee802154_devaddr_to_raw(hwaddr, addr->extended_addr); rcu_read_lock(); dev = dev_getbyhwaddr_rcu(net, ARPHRD_IEEE802154, hwaddr); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); break; case IEEE802154_ADDR_SHORT: diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c82aded8da7d..f4468980b675 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1950,7 +1950,8 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { }; static int inet_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; @@ -1959,7 +1960,7 @@ static int inet_validate_link_af(const struct net_device *dev, return -EAFNOSUPPORT; err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, - inet_af_policy, NULL); + inet_af_policy, extack); if (err < 0) return err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index fa19f4cdf3a4..b42c429cebbe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -208,9 +208,7 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) void fib_nh_common_release(struct fib_nh_common *nhc) { - if (nhc->nhc_dev) - dev_put(nhc->nhc_dev); - + dev_put(nhc->nhc_dev); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); @@ -1551,7 +1549,7 @@ link_it: return ofi; } - refcount_inc(&fi->fib_treeref); + refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c695d294a5df..8b30cadff708 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1095,8 +1095,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) sizeof(struct in6_addr)) goto send_mal_query; dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); - if (dev) - dev_hold(dev); + dev_hold(dev); break; #endif default: diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 03589a04f9aa..7e5072722f05 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2233,7 +2233,7 @@ static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml, iml->sfmode, psf->sl_count, psf->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psf, sl_addr, psf->sl_max), &sk->sk_omem_alloc); kfree_rcu(psf, rcu); return err; } @@ -2382,7 +2382,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -2393,7 +2394,8 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } rcu_assign_pointer(pmc->sflist, newpsl); @@ -2468,19 +2470,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) goto done; } if (msf->imsf_numsrc) { - newpsl = sock_kmalloc(sk, IP_SFLSIZE(msf->imsf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + msf->imsf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; } newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc; - memcpy(newpsl->sl_addr, msf->imsf_slist, - msf->imsf_numsrc * sizeof(msf->imsf_slist[0])); + memcpy(newpsl->sl_addr, msf->imsf_slist_flex, + flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc)); err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr, msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { - sock_kfree_s(sk, newpsl, IP_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, + struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } } else { @@ -2493,7 +2498,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); /* decrease mem now to avoid the memleak warning */ - atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else (void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode, @@ -2551,14 +2557,14 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf, count = psl->sl_count; } copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc; - len = copycount * sizeof(psl->sl_addr[0]); + len = flex_array_size(psl, sl_addr, copycount); msf->imsf_numsrc = count; if (put_user(IP_MSFILTER_SIZE(copycount), optlen) || copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) { return -EFAULT; } if (len && - copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len)) + copy_to_user(&optval->imsf_slist_flex[0], psl->sl_addr, len)) return -EFAULT; return 0; done: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a202dcec0dc2..6b04a88466b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -198,19 +198,10 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s } else if (rt->rt_type == RTN_BROADCAST) IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len); - /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { - struct sk_buff *skb2; - - skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); - if (!skb2) { - kfree_skb(skb); + skb = skb_expand_head(skb, hh_len); + if (!skb) return -ENOMEM; - } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (lwtunnel_xmit_redirect(dst->lwtstate)) { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ec6036713e2c..b297bb28556e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -663,12 +663,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, struct sockaddr_storage *group, struct sockaddr_storage *list) { - int msize = IP_MSFILTER_SIZE(numsrc); struct ip_msfilter *msf; struct sockaddr_in *psin; int err, i; - msf = kmalloc(msize, GFP_KERNEL); + msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL); if (!msf) return -ENOBUFS; @@ -684,7 +683,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex, if (psin->sin_family != AF_INET) goto Eaddrnotavail; - msf->imsf_slist[i] = psin->sin_addr.s_addr; + msf->imsf_slist_flex[i] = psin->sin_addr.s_addr; } err = ip_mc_msfilter(sk, msf, ifindex); kfree(msf); @@ -791,7 +790,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) goto out_free_gsf; err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc, - gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist); + gsf->gf_fmode, &gsf->gf_group, + gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return err; @@ -800,7 +800,7 @@ out_free_gsf: static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; unsigned int n; void *p; @@ -814,7 +814,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, p = kmalloc(optlen + 4, GFP_KERNEL); if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ err = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) @@ -827,7 +827,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_gsf; err = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_gsf; /* numsrc >= (4G-140)/128 overflow in 32 bits */ @@ -835,7 +835,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf) goto out_free_gsf; err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode, - &gf32->gf_group, gf32->gf_slist); + &gf32->gf_group, gf32->gf_slist_flex); out_free_gsf: kfree(p); return err; @@ -1456,7 +1456,7 @@ static bool getsockopt_needs_rtnl(int optname) static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1468,7 +1468,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, return -EFAULT; num = gsf.gf_numsrc; - err = ip_mc_gsfget(sk, &gsf, p->gf_slist); + err = ip_mc_gsfget(sk, &gsf, p->gf_slist_flex); if (err) return err; if (gsf.gf_numsrc < num) @@ -1482,7 +1482,7 @@ static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval, static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1499,7 +1499,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval, num = gf.gf_numsrc = gf32.gf_numsrc; gf.gf_group = gf32.gf_group; - err = ip_mc_gsfget(sk, &gf, p->gf_slist); + err = ip_mc_gsfget(sk, &gf, p->gf_slist_flex); if (err) return err; if (gf.gf_numsrc < num) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7f0e810c06f4..fe9101d3d69e 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -390,7 +390,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, tunnel->i_seqno = ntohl(tpi->seq) + 1; } - skb_reset_network_header(skb); + skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0); err = IP_ECN_decapsulate(iph, skb); if (unlikely(err)) { diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 04754d55b3c1..b181773d7ad3 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -276,12 +276,13 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) struct rt_cache_stat *st = v; if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); + seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); return 0; } - seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " - " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x\n", dst_entries_get_slow(&ipv4_dst_ops), 0, /* st->in_hit */ st->in_slow_tot, @@ -2812,8 +2813,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or new->output = dst_discard_out; new->dev = net->loopback_dev; - if (new->dev) - dev_hold(new->dev); + dev_hold(new->dev); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 84db1c9ee92a..2e62e0d6373a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2277,51 +2277,72 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -/* - * Get next listener socket follow cur. If cur is NULL, get first socket - * starting from bucket given in st->bucket; when st->bucket is zero the - * very first socket in the hash table is returned. +static unsigned short seq_file_family(const struct seq_file *seq); + +static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) +{ + unsigned short family = seq_file_family(seq); + + /* AF_UNSPEC is used as a match all */ + return ((family == AF_UNSPEC || family == sk->sk_family) && + net_eq(sock_net(sk), seq_file_net(seq))); +} + +/* Find a non empty bucket (starting from st->bucket) + * and return the first sk from it. */ -static void *listening_get_next(struct seq_file *seq, void *cur) +static void *listening_get_first(struct seq_file *seq) { - struct tcp_seq_afinfo *afinfo; struct tcp_iter_state *st = seq->private; - struct net *net = seq_file_net(seq); - struct inet_listen_hashbucket *ilb; - struct hlist_nulls_node *node; - struct sock *sk = cur; - if (st->bpf_seq_afinfo) - afinfo = st->bpf_seq_afinfo; - else - afinfo = PDE_DATA(file_inode(seq->file)); + st->offset = 0; + for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { + struct inet_listen_hashbucket *ilb2; + struct inet_connection_sock *icsk; + struct sock *sk; - if (!sk) { -get_head: - ilb = &tcp_hashinfo.listening_hash[st->bucket]; - spin_lock(&ilb->lock); - sk = sk_nulls_head(&ilb->nulls_head); - st->offset = 0; - goto get_sk; + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + if (hlist_empty(&ilb2->head)) + continue; + + spin_lock(&ilb2->lock); + inet_lhash2_for_each_icsk(icsk, &ilb2->head) { + sk = (struct sock *)icsk; + if (seq_sk_match(seq, sk)) + return sk; + } + spin_unlock(&ilb2->lock); } - ilb = &tcp_hashinfo.listening_hash[st->bucket]; + + return NULL; +} + +/* Find the next sk of "cur" within the same bucket (i.e. st->bucket). + * If "cur" is the last one in the st->bucket, + * call listening_get_first() to return the first sk of the next + * non empty bucket. + */ +static void *listening_get_next(struct seq_file *seq, void *cur) +{ + struct tcp_iter_state *st = seq->private; + struct inet_listen_hashbucket *ilb2; + struct inet_connection_sock *icsk; + struct sock *sk = cur; + ++st->num; ++st->offset; - sk = sk_nulls_next(sk); -get_sk: - sk_nulls_for_each_from(sk, node) { - if (!net_eq(sock_net(sk), net)) - continue; - if (afinfo->family == AF_UNSPEC || - sk->sk_family == afinfo->family) + icsk = inet_csk(sk); + inet_lhash2_for_each_icsk_continue(icsk) { + sk = (struct sock *)icsk; + if (seq_sk_match(seq, sk)) return sk; } - spin_unlock(&ilb->lock); - st->offset = 0; - if (++st->bucket < INET_LHTABLE_SIZE) - goto get_head; - return NULL; + + ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + spin_unlock(&ilb2->lock); + ++st->bucket; + return listening_get_first(seq); } static void *listening_get_idx(struct seq_file *seq, loff_t *pos) @@ -2331,7 +2352,7 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) st->bucket = 0; st->offset = 0; - rc = listening_get_next(seq, NULL); + rc = listening_get_first(seq); while (rc && *pos) { rc = listening_get_next(seq, rc); @@ -2351,15 +2372,7 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) */ static void *established_get_first(struct seq_file *seq) { - struct tcp_seq_afinfo *afinfo; struct tcp_iter_state *st = seq->private; - struct net *net = seq_file_net(seq); - void *rc = NULL; - - if (st->bpf_seq_afinfo) - afinfo = st->bpf_seq_afinfo; - else - afinfo = PDE_DATA(file_inode(seq->file)); st->offset = 0; for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { @@ -2373,32 +2386,20 @@ static void *established_get_first(struct seq_file *seq) spin_lock_bh(lock); sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { - if ((afinfo->family != AF_UNSPEC && - sk->sk_family != afinfo->family) || - !net_eq(sock_net(sk), net)) { - continue; - } - rc = sk; - goto out; + if (seq_sk_match(seq, sk)) + return sk; } spin_unlock_bh(lock); } -out: - return rc; + + return NULL; } static void *established_get_next(struct seq_file *seq, void *cur) { - struct tcp_seq_afinfo *afinfo; struct sock *sk = cur; struct hlist_nulls_node *node; struct tcp_iter_state *st = seq->private; - struct net *net = seq_file_net(seq); - - if (st->bpf_seq_afinfo) - afinfo = st->bpf_seq_afinfo; - else - afinfo = PDE_DATA(file_inode(seq->file)); ++st->num; ++st->offset; @@ -2406,9 +2407,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) sk = sk_nulls_next(sk); sk_nulls_for_each_from(sk, node) { - if ((afinfo->family == AF_UNSPEC || - sk->sk_family == afinfo->family) && - net_eq(sock_net(sk), net)) + if (seq_sk_match(seq, sk)) return sk; } @@ -2451,17 +2450,18 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_seek_last_pos(struct seq_file *seq) { struct tcp_iter_state *st = seq->private; + int bucket = st->bucket; int offset = st->offset; int orig_num = st->num; void *rc = NULL; switch (st->state) { case TCP_SEQ_STATE_LISTENING: - if (st->bucket >= INET_LHTABLE_SIZE) + if (st->bucket > tcp_hashinfo.lhash2_mask) break; st->state = TCP_SEQ_STATE_LISTENING; - rc = listening_get_next(seq, NULL); - while (offset-- && rc) + rc = listening_get_first(seq); + while (offset-- && rc && bucket == st->bucket) rc = listening_get_next(seq, rc); if (rc) break; @@ -2472,7 +2472,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) if (st->bucket > tcp_hashinfo.ehash_mask) break; rc = established_get_first(seq); - while (offset-- && rc) + while (offset-- && rc && bucket == st->bucket) rc = established_get_next(seq, rc); } @@ -2542,7 +2542,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock(&tcp_hashinfo.listening_hash[st->bucket].lock); + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) @@ -2687,6 +2687,15 @@ out: } #ifdef CONFIG_BPF_SYSCALL +struct bpf_tcp_iter_state { + struct tcp_iter_state state; + unsigned int cur_sk; + unsigned int end_sk; + unsigned int max_sk; + struct sock **batch; + bool st_bucket_done; +}; + struct bpf_iter__tcp { __bpf_md_ptr(struct bpf_iter_meta *, meta); __bpf_md_ptr(struct sock_common *, sk_common); @@ -2705,16 +2714,204 @@ static int tcp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, return bpf_iter_run_prog(prog, &ctx); } +static void bpf_iter_tcp_put_batch(struct bpf_tcp_iter_state *iter) +{ + while (iter->cur_sk < iter->end_sk) + sock_put(iter->batch[iter->cur_sk++]); +} + +static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, + unsigned int new_batch_sz) +{ + struct sock **new_batch; + + new_batch = kvmalloc(sizeof(*new_batch) * new_batch_sz, + GFP_USER | __GFP_NOWARN); + if (!new_batch) + return -ENOMEM; + + bpf_iter_tcp_put_batch(iter); + kvfree(iter->batch); + iter->batch = new_batch; + iter->max_sk = new_batch_sz; + + return 0; +} + +static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, + struct sock *start_sk) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct inet_connection_sock *icsk; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + icsk = inet_csk(start_sk); + inet_lhash2_for_each_icsk_continue(icsk) { + sk = (struct sock *)icsk; + if (seq_sk_match(seq, sk)) { + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + expected++; + } + } + spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); + + return expected; +} + +static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq, + struct sock *start_sk) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct hlist_nulls_node *node; + unsigned int expected = 1; + struct sock *sk; + + sock_hold(start_sk); + iter->batch[iter->end_sk++] = start_sk; + + sk = sk_nulls_next(start_sk); + sk_nulls_for_each_from(sk, node) { + if (seq_sk_match(seq, sk)) { + if (iter->end_sk < iter->max_sk) { + sock_hold(sk); + iter->batch[iter->end_sk++] = sk; + } + expected++; + } + } + spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + + return expected; +} + +static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + unsigned int expected; + bool resized = false; + struct sock *sk; + + /* The st->bucket is done. Directly advance to the next + * bucket instead of having the tcp_seek_last_pos() to skip + * one by one in the current bucket and eventually find out + * it has to advance to the next bucket. + */ + if (iter->st_bucket_done) { + st->offset = 0; + st->bucket++; + if (st->state == TCP_SEQ_STATE_LISTENING && + st->bucket > tcp_hashinfo.lhash2_mask) { + st->state = TCP_SEQ_STATE_ESTABLISHED; + st->bucket = 0; + } + } + +again: + /* Get a new batch */ + iter->cur_sk = 0; + iter->end_sk = 0; + iter->st_bucket_done = false; + + sk = tcp_seek_last_pos(seq); + if (!sk) + return NULL; /* Done */ + + if (st->state == TCP_SEQ_STATE_LISTENING) + expected = bpf_iter_tcp_listening_batch(seq, sk); + else + expected = bpf_iter_tcp_established_batch(seq, sk); + + if (iter->end_sk == expected) { + iter->st_bucket_done = true; + return sk; + } + + if (!resized && !bpf_iter_tcp_realloc_batch(iter, expected * 3 / 2)) { + resized = true; + goto again; + } + + return sk; +} + +static void *bpf_iter_tcp_seq_start(struct seq_file *seq, loff_t *pos) +{ + /* bpf iter does not support lseek, so it always + * continue from where it was stop()-ped. + */ + if (*pos) + return bpf_iter_tcp_batch(seq); + + return SEQ_START_TOKEN; +} + +static void *bpf_iter_tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_tcp_iter_state *iter = seq->private; + struct tcp_iter_state *st = &iter->state; + struct sock *sk; + + /* Whenever seq_next() is called, the iter->cur_sk is + * done with seq_show(), so advance to the next sk in + * the batch. + */ + if (iter->cur_sk < iter->end_sk) { + /* Keeping st->num consistent in tcp_iter_state. + * bpf_iter_tcp does not use st->num. + * meta.seq_num is used instead. + */ + st->num++; + /* Move st->offset to the next sk in the bucket such that + * the future start() will resume at st->offset in + * st->bucket. See tcp_seek_last_pos(). + */ + st->offset++; + sock_put(iter->batch[iter->cur_sk++]); + } + + if (iter->cur_sk < iter->end_sk) + sk = iter->batch[iter->cur_sk]; + else + sk = bpf_iter_tcp_batch(seq); + + ++*pos; + /* Keeping st->last_pos consistent in tcp_iter_state. + * bpf iter does not do lseek, so st->last_pos always equals to *pos. + */ + st->last_pos = *pos; + return sk; +} + static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) { struct bpf_iter_meta meta; struct bpf_prog *prog; struct sock *sk = v; + bool slow; uid_t uid; + int ret; if (v == SEQ_START_TOKEN) return 0; + if (sk_fullsock(sk)) + slow = lock_sock_fast(sk); + + if (unlikely(sk_unhashed(sk))) { + ret = SEQ_SKIP; + goto unlock; + } + if (sk->sk_state == TCP_TIME_WAIT) { uid = 0; } else if (sk->sk_state == TCP_NEW_SYN_RECV) { @@ -2728,11 +2925,18 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v) meta.seq = seq; prog = bpf_iter_get_info(&meta, false); - return tcp_prog_seq_show(prog, &meta, v, uid); + ret = tcp_prog_seq_show(prog, &meta, v, uid); + +unlock: + if (sk_fullsock(sk)) + unlock_sock_fast(sk, slow); + return ret; + } static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) { + struct bpf_tcp_iter_state *iter = seq->private; struct bpf_iter_meta meta; struct bpf_prog *prog; @@ -2743,16 +2947,33 @@ static void bpf_iter_tcp_seq_stop(struct seq_file *seq, void *v) (void)tcp_prog_seq_show(prog, &meta, v, 0); } - tcp_seq_stop(seq, v); + if (iter->cur_sk < iter->end_sk) { + bpf_iter_tcp_put_batch(iter); + iter->st_bucket_done = false; + } } static const struct seq_operations bpf_iter_tcp_seq_ops = { .show = bpf_iter_tcp_seq_show, - .start = tcp_seq_start, - .next = tcp_seq_next, + .start = bpf_iter_tcp_seq_start, + .next = bpf_iter_tcp_seq_next, .stop = bpf_iter_tcp_seq_stop, }; #endif +static unsigned short seq_file_family(const struct seq_file *seq) +{ + const struct tcp_seq_afinfo *afinfo; + +#ifdef CONFIG_BPF_SYSCALL + /* Iterated from bpf_iter. Let the bpf prog to filter instead. */ + if (seq->op == &bpf_iter_tcp_seq_ops) + return AF_UNSPEC; +#endif + + /* Iterated from proc fs */ + afinfo = PDE_DATA(file_inode(seq->file)); + return afinfo->family; +} static const struct seq_operations tcp4_seq_ops = { .show = tcp4_seq_show, @@ -3002,39 +3223,55 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, struct sock_common *sk_common, uid_t uid) +#define INIT_BATCH_SZ 16 + static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { - struct tcp_iter_state *st = priv_data; - struct tcp_seq_afinfo *afinfo; - int ret; + struct bpf_tcp_iter_state *iter = priv_data; + int err; - afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN); - if (!afinfo) - return -ENOMEM; + err = bpf_iter_init_seq_net(priv_data, aux); + if (err) + return err; - afinfo->family = AF_UNSPEC; - st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data, aux); - if (ret) - kfree(afinfo); - return ret; + err = bpf_iter_tcp_realloc_batch(iter, INIT_BATCH_SZ); + if (err) { + bpf_iter_fini_seq_net(priv_data); + return err; + } + + return 0; } static void bpf_iter_fini_tcp(void *priv_data) { - struct tcp_iter_state *st = priv_data; + struct bpf_tcp_iter_state *iter = priv_data; - kfree(st->bpf_seq_afinfo); bpf_iter_fini_seq_net(priv_data); + kvfree(iter->batch); } static const struct bpf_iter_seq_info tcp_seq_info = { .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, .fini_seq_private = bpf_iter_fini_tcp, - .seq_priv_size = sizeof(struct tcp_iter_state), + .seq_priv_size = sizeof(struct bpf_tcp_iter_state), }; +static const struct bpf_func_proto * +bpf_iter_tcp_get_func_proto(enum bpf_func_id func_id, + const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_setsockopt: + return &bpf_sk_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_sk_getsockopt_proto; + default: + return NULL; + } +} + static struct bpf_iter_reg tcp_reg_info = { .target = "tcp", .ctx_arg_info_size = 1, @@ -3042,6 +3279,7 @@ static struct bpf_iter_reg tcp_reg_info = { { offsetof(struct bpf_iter__tcp, sk_common), PTR_TO_BTF_ID_OR_NULL }, }, + .get_func_proto = bpf_iter_tcp_get_func_proto, .seq_info = &tcp_seq_info, }; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index e09147ac9a99..fc61cd3fea65 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -298,6 +298,9 @@ int tcp_gro_complete(struct sk_buff *skb) if (th->cwr) skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } EXPORT_SYMBOL(tcp_gro_complete); diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 9dde1e5fb449..1380a6b6f4ff 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -624,6 +624,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index db0a89810f28..8381288a0d6e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -701,8 +701,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, errout: if (in6_dev) in6_dev_put(in6_dev); - if (dev) - dev_put(dev); + dev_put(dev); return err; } @@ -5417,8 +5416,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, errout_ifa: in6_ifa_put(ifa); errout: - if (dev) - dev_put(dev); + dev_put(dev); if (fillargs.netnsid >= 0) put_net(tgt_net); @@ -5792,7 +5790,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net, } static int inet6_validate_link_af(const struct net_device *dev, - const struct nlattr *nla) + const struct nlattr *nla, + struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_INET6_MAX + 1]; struct inet6_dev *idev = NULL; @@ -5805,7 +5804,7 @@ static int inet6_validate_link_af(const struct net_device *dev, } err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, - inet6_af_policy, NULL); + inet6_af_policy, extack); if (err) return err; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index d897faa4e9e6..3a871a09f962 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -55,19 +55,6 @@ #include <linux/uaccess.h> -/* - * Parsing tlv encoded headers. - * - * Parsing function "func" returns true, if parsing succeed - * and false, if it failed. - * It MUST NOT touch skb->h. - */ - -struct tlvtype_proc { - int type; - bool (*func)(struct sk_buff *skb, int offset); -}; - /********************* Generic functions *********************/ @@ -112,16 +99,23 @@ drop: return false; } +static bool ipv6_hop_ra(struct sk_buff *skb, int optoff); +static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff); +static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff); +static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff); +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static bool ipv6_dest_hao(struct sk_buff *skb, int optoff); +#endif + /* Parse tlv encoded option header (hop-by-hop or destination) */ -static bool ip6_parse_tlv(const struct tlvtype_proc *procs, +static bool ip6_parse_tlv(bool hopbyhop, struct sk_buff *skb, int max_count) { int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); - const struct tlvtype_proc *curr; bool disallow_unknowns = false; int tlv_count = 0; int padlen = 0; @@ -176,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, if (tlv_count > max_count) goto bad; - for (curr = procs; curr->type >= 0; curr++) { - if (curr->type == nh[off]) { - /* type specific length/alignment - checks will be performed in the - func(). */ - if (curr->func(skb, off) == false) + if (hopbyhop) { + switch (nh[off]) { + case IPV6_TLV_ROUTERALERT: + if (!ipv6_hop_ra(skb, off)) + return false; + break; + case IPV6_TLV_IOAM: + if (!ipv6_hop_ioam(skb, off)) + return false; + break; + case IPV6_TLV_JUMBO: + if (!ipv6_hop_jumbo(skb, off)) + return false; + break; + case IPV6_TLV_CALIPSO: + if (!ipv6_hop_calipso(skb, off)) + return false; + break; + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) + return false; + break; + } + } else { + switch (nh[off]) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPV6_TLV_HAO: + if (!ipv6_dest_hao(skb, off)) + return false; + break; +#endif + default: + if (!ip6_tlvopt_unknown(skb, off, + disallow_unknowns)) return false; break; } } - if (curr->type < 0 && - !ip6_tlvopt_unknown(skb, off, disallow_unknowns)) - return false; - padlen = 0; } off += optlen; @@ -267,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) } #endif -static const struct tlvtype_proc tlvprocdestopt_lst[] = { -#if IS_ENABLED(CONFIG_IPV6_MIP6) - { - .type = IPV6_TLV_HAO, - .func = ipv6_dest_hao, - }, -#endif - {-1, NULL} -}; - static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); @@ -307,8 +316,7 @@ fail_and_free: dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb, - net->ipv6.sysctl.max_dst_opts_cnt)) { + if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) @@ -1051,26 +1059,6 @@ drop: return false; } -static const struct tlvtype_proc tlvprochopopt_lst[] = { - { - .type = IPV6_TLV_ROUTERALERT, - .func = ipv6_hop_ra, - }, - { - .type = IPV6_TLV_IOAM, - .func = ipv6_hop_ioam, - }, - { - .type = IPV6_TLV_JUMBO, - .func = ipv6_hop_jumbo, - }, - { - .type = IPV6_TLV_CALIPSO, - .func = ipv6_hop_calipso, - }, - { -1, } -}; - int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); @@ -1096,8 +1084,7 @@ fail_and_free: goto fail_and_free; opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(tlvprochopopt_lst, skb, - net->ipv6.sysctl.max_hbh_opts_cnt)) { + if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index b7b27d94ef61..12f985f43bcc 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); - int delta = hh_len - skb_headroom(skb); - const struct in6_addr *nexthop; + const struct in6_addr *daddr, *nexthop; + struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ - if (unlikely(delta > 0) && dev->header_ops) { - /* pskb_expand_head() might crash, if skb is shared */ - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); - - if (likely(nskb)) { - if (skb->sk) - skb_set_owner_w(nskb, skb->sk); - consume_skb(skb); - } else { - kfree_skb(skb); - } - skb = nskb; - } - if (skb && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) { - kfree_skb(skb); - skb = NULL; - } + if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + skb = skb_expand_head(skb, hh_len); if (!skb) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOMEM; } } - if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - + hdr = ipv6_hdr(skb); + daddr = &hdr->daddr; + if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || - ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr))) { + ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing @@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); - if (ipv6_hdr(skb)->hop_limit == 0) { + if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); - - if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= - IPV6_ADDR_SCOPE_NODELOCAL && + if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; @@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); - neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + nexthop = rt6_nexthop((struct rt6_info *)dst, daddr); + neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) - neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); @@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } rcu_read_unlock_bh(); - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); + struct net_device *dev = dst->dev; + struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; @@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, int hlimit = -1; u32 mtu; - head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev); + head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; - if (unlikely(skb_headroom(skb) < head_room)) { - struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (!skb2) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUTDISCARDS); - kfree_skb(skb); + if (unlikely(head_room > skb_headroom(skb))) { + skb = skb_expand_head(skb, head_room); + if (!skb) { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); return -ENOBUFS; } - if (skb->sk) - skb_set_owner_w(skb2, skb->sk); - consume_skb(skb); - skb = skb2; } if (opt) { @@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { - IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_OUT, skb->len); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, - net, (struct sock *)sk, skb, NULL, dst->dev, + net, (struct sock *)sk, skb, NULL, dev, dst_output); } - skb->dev = dst->dev; + skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -549,9 +525,10 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->proxy_ndp && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); - if (proxied > 0) + if (proxied > 0) { + hdr->hop_limit--; return ip6_input(skb); - else if (proxied < 0) { + } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 06b0d2c329b9..36ed9efb8825 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) reg_dev = mrt->vif_table[reg_vif_num].dev; - if (reg_dev) - dev_hold(reg_dev); + dev_hold(reg_dev); read_unlock(&mrt_lock); if (!reg_dev) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index a6804a7e34c1..e4bdb09c5586 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) goto out_free_gsf; - ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist); + ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex); out_free_gsf: kfree(gsf); return ret; @@ -234,7 +234,7 @@ out_free_gsf: static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter *gf32; void *p; int ret; @@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, if (!p) return -ENOMEM; - gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */ + gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */ ret = -EFAULT; if (copy_from_sockptr(gf32, optval, optlen)) goto out_free_p; @@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval, goto out_free_p; ret = -EINVAL; - if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) + if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen) goto out_free_p; ret = ip6_mc_msfilter(sk, &(struct group_filter){ .gf_interface = gf32->gf_interface, .gf_group = gf32->gf_group, .gf_fmode = gf32->gf_fmode, - .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist); + .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex); out_free_p: kfree(p); @@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, static int ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen, int len) { - const int size0 = offsetof(struct group_filter, gf_slist); + const int size0 = offsetof(struct group_filter, gf_slist_flex); struct group_filter __user *p = optval; struct group_filter gsf; int num; @@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; num = gsf.gf_numsrc; lock_sock(sk); - err = ip6_mc_msfget(sk, &gsf, p->gf_slist); + err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex); if (!err) { if (num > gsf.gf_numsrc) num = gsf.gf_numsrc; @@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval, static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, int __user *optlen) { - const int size0 = offsetof(struct compat_group_filter, gf_slist); + const int size0 = offsetof(struct compat_group_filter, gf_slist_flex); struct compat_group_filter __user *p = optval; struct compat_group_filter gf32; struct group_filter gf; @@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval, return -EADDRNOTAVAIL; lock_sock(sk); - err = ip6_mc_msfget(sk, &gf, p->gf_slist); + err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex); release_sock(sk); if (err) return err; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 54ec163fbafa..cd951faa2fac 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) count += psl->sl_max; - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk, if (psl) { for (i = 0; i < psl->sl_count; i++) newpsl->sl_addr[i] = psl->sl_addr[i]; - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } psl = newpsl; @@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, goto done; } if (gsf->gf_numsrc) { - newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc), - GFP_KERNEL); + newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, + gsf->gf_numsrc), + GFP_KERNEL); if (!newpsl) { err = -ENOBUFS; goto done; @@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, newpsl->sl_count, newpsl->sl_addr, 0); if (err) { mutex_unlock(&idev->mc_lock); - sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max)); + sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr, + newpsl->sl_max)); goto done; } mutex_unlock(&idev->mc_lock); @@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf, if (psl) { ip6_mc_del_src(idev, group, pmc->sfmode, psl->sl_count, psl->sl_addr, 0); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } else { ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); @@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, psl->sl_count, psl->sl_addr, 0); RCU_INIT_POINTER(iml->sflist, NULL); - atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc); + atomic_sub(struct_size(psl, sl_addr, psl->sl_max), + &sk->sk_omem_alloc); kfree_rcu(psl, rcu); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6b8051106aba..6cf4bb89ca69 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3626,8 +3626,7 @@ out: if (err) { lwtstate_put(fib6_nh->fib_nh_lws); fib6_nh->fib_nh_lws = NULL; - if (dev) - dev_put(dev); + dev_put(dev); } return err; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 44453b35c7b7..18316ee3c692 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1044,7 +1044,7 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (err == 0) { atomic_dec(&iucv->skbs_in_xmit); skb_unlink(skb, &iucv->send_skb_q); - kfree_skb(skb); + consume_skb(skb); } /* this error should never happen since the */ @@ -1293,7 +1293,7 @@ static int iucv_sock_recvmsg(struct socket *sock, struct msghdr *msg, } } - kfree_skb(skb); + consume_skb(skb); if (iucv->transport == AF_IUCV_TRANS_HIPER) { atomic_inc(&iucv->msg_recv); if (atomic_read(&iucv->msg_recv) > iucv->msglimit) { @@ -1756,7 +1756,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); if (this) { - kfree_skb(this); + consume_skb(this); /* wake up any process waiting for sending */ iucv_sock_wake_msglim(sk); } @@ -1903,17 +1903,17 @@ static int afiucv_hs_callback_synack(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); - if (!iucv) - goto out; - if (sk->sk_state != IUCV_BOUND) - goto out; + if (!iucv || sk->sk_state != IUCV_BOUND) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); iucv->msglimit_peer = iucv_trans_hdr(skb)->window; sk->sk_state = IUCV_CONNECTED; sk->sk_state_change(sk); bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -1924,16 +1924,16 @@ static int afiucv_hs_callback_synfin(struct sock *sk, struct sk_buff *skb) { struct iucv_sock *iucv = iucv_sk(sk); - if (!iucv) - goto out; - if (sk->sk_state != IUCV_BOUND) - goto out; + if (!iucv || sk->sk_state != IUCV_BOUND) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -1945,16 +1945,18 @@ static int afiucv_hs_callback_fin(struct sock *sk, struct sk_buff *skb) struct iucv_sock *iucv = iucv_sk(sk); /* other end of connection closed */ - if (!iucv) - goto out; + if (!iucv) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + bh_lock_sock(sk); if (sk->sk_state == IUCV_CONNECTED) { sk->sk_state = IUCV_DISCONN; sk->sk_state_change(sk); } bh_unlock_sock(sk); -out: - kfree_skb(skb); + consume_skb(skb); return NET_RX_SUCCESS; } @@ -2107,7 +2109,7 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, case (AF_IUCV_FLAG_WIN): err = afiucv_hs_callback_win(sk, skb); if (skb->len == sizeof(struct af_iucv_trans_hdr)) { - kfree_skb(skb); + consume_skb(skb); break; } fallthrough; /* and receive non-zero length data */ @@ -2262,21 +2264,11 @@ static struct packet_type iucv_packet_type = { .func = afiucv_hs_rcv, }; -static int afiucv_iucv_init(void) -{ - return pr_iucv->iucv_register(&af_iucv_handler, 0); -} - -static void afiucv_iucv_exit(void) -{ - pr_iucv->iucv_unregister(&af_iucv_handler, 0); -} - static int __init afiucv_init(void) { int err; - if (MACHINE_IS_VM) { + if (MACHINE_IS_VM && IS_ENABLED(CONFIG_IUCV)) { cpcmd("QUERY USERID", iucv_userid, sizeof(iucv_userid), &err); if (unlikely(err)) { WARN_ON(err); @@ -2284,11 +2276,7 @@ static int __init afiucv_init(void) goto out; } - pr_iucv = try_then_request_module(symbol_get(iucv_if), "iucv"); - if (!pr_iucv) { - printk(KERN_WARNING "iucv_if lookup failed\n"); - memset(&iucv_userid, 0, sizeof(iucv_userid)); - } + pr_iucv = &iucv_if; } else { memset(&iucv_userid, 0, sizeof(iucv_userid)); pr_iucv = NULL; @@ -2302,7 +2290,7 @@ static int __init afiucv_init(void) goto out_proto; if (pr_iucv) { - err = afiucv_iucv_init(); + err = pr_iucv->iucv_register(&af_iucv_handler, 0); if (err) goto out_sock; } @@ -2316,23 +2304,19 @@ static int __init afiucv_init(void) out_notifier: if (pr_iucv) - afiucv_iucv_exit(); + pr_iucv->iucv_unregister(&af_iucv_handler, 0); out_sock: sock_unregister(PF_IUCV); out_proto: proto_unregister(&iucv_proto); out: - if (pr_iucv) - symbol_put(iucv_if); return err; } static void __exit afiucv_exit(void) { - if (pr_iucv) { - afiucv_iucv_exit(); - symbol_put(iucv_if); - } + if (pr_iucv) + pr_iucv->iucv_unregister(&af_iucv_handler, 0); unregister_netdevice_notifier(&afiucv_netdev_notifier); dev_remove_pack(&iucv_packet_type); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index e6795d5a546a..f3343a8541a5 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -286,19 +286,19 @@ static union iucv_param *iucv_param_irq[NR_CPUS]; */ static inline int __iucv_call_b2f0(int command, union iucv_param *parm) { - register unsigned long reg0 asm ("0"); - register unsigned long reg1 asm ("1"); - int ccode; + int cc; - reg0 = command; - reg1 = (unsigned long)parm; asm volatile( - " .long 0xb2f01000\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "=m" (*parm), "+d" (reg0), "+a" (reg1) - : "m" (*parm) : "cc"); - return ccode; + " lgr 0,%[reg0]\n" + " lgr 1,%[reg1]\n" + " .long 0xb2f01000\n" + " ipm %[cc]\n" + " srl %[cc],28\n" + : [cc] "=&d" (cc), "+m" (*parm) + : [reg0] "d" ((unsigned long)command), + [reg1] "d" ((unsigned long)parm) + : "cc", "0", "1"); + return cc; } static inline int iucv_call_b2f0(int command, union iucv_param *parm) @@ -319,19 +319,21 @@ static inline int iucv_call_b2f0(int command, union iucv_param *parm) */ static int __iucv_query_maxconn(void *param, unsigned long *max_pathid) { - register unsigned long reg0 asm ("0"); - register unsigned long reg1 asm ("1"); - int ccode; + unsigned long reg1 = (unsigned long)param; + int cc; - reg0 = IUCV_QUERY; - reg1 = (unsigned long) param; asm volatile ( + " lghi 0,%[cmd]\n" + " lgr 1,%[reg1]\n" " .long 0xb2f01000\n" - " ipm %0\n" - " srl %0,28\n" - : "=d" (ccode), "+d" (reg0), "+d" (reg1) : : "cc"); + " ipm %[cc]\n" + " srl %[cc],28\n" + " lgr %[reg1],1\n" + : [cc] "=&d" (cc), [reg1] "+&d" (reg1) + : [cmd] "K" (IUCV_QUERY) + : "cc", "0", "1"); *max_pathid = reg1; - return ccode; + return cc; } static int iucv_query_maxconn(void) @@ -500,14 +502,14 @@ static void iucv_setmask_mp(void) { int cpu; - get_online_cpus(); + cpus_read_lock(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpumask_test_cpu(cpu, &iucv_buffer_cpumask) && !cpumask_test_cpu(cpu, &iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, NULL, 1); - put_online_cpus(); + cpus_read_unlock(); } /** @@ -540,7 +542,7 @@ static int iucv_enable(void) size_t alloc_size; int cpu, rc; - get_online_cpus(); + cpus_read_lock(); rc = -ENOMEM; alloc_size = iucv_max_pathid * sizeof(struct iucv_path); iucv_path_table = kzalloc(alloc_size, GFP_KERNEL); @@ -553,12 +555,12 @@ static int iucv_enable(void) if (cpumask_empty(&iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out; - put_online_cpus(); + cpus_read_unlock(); return 0; out: kfree(iucv_path_table); iucv_path_table = NULL; - put_online_cpus(); + cpus_read_unlock(); return rc; } @@ -571,11 +573,11 @@ out: */ static void iucv_disable(void) { - get_online_cpus(); + cpus_read_lock(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); kfree(iucv_path_table); iucv_path_table = NULL; - put_online_cpus(); + cpus_read_unlock(); } static int iucv_cpu_dead(unsigned int cpu) @@ -784,7 +786,7 @@ static int iucv_reboot_event(struct notifier_block *this, if (cpumask_empty(&iucv_irq_cpumask)) return NOTIFY_DONE; - get_online_cpus(); + cpus_read_lock(); on_each_cpu_mask(&iucv_irq_cpumask, iucv_block_cpu, NULL, 1); preempt_disable(); for (i = 0; i < iucv_max_pathid; i++) { @@ -792,7 +794,7 @@ static int iucv_reboot_event(struct notifier_block *this, iucv_sever_pathid(i, NULL); } preempt_enable(); - put_online_cpus(); + cpus_read_unlock(); iucv_disable(); return NOTIFY_DONE; } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 7180979114e4..3086f4a6ae68 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr) { u8 rc = LLC_PDU_LEN_U; - if (addr->sllc_test || addr->sllc_xid) + if (addr->sllc_test) rc = LLC_PDU_LEN_U; + else if (addr->sllc_xid) + /* We need to expand header to sizeof(struct llc_xid_info) + * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header + * as XID PDU. In llc_ui_sendmsg() we reserved header size and then + * filled all other space with user data. If we won't reserve this + * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data + */ + rc = LLC_PDU_LEN_U_XID; else if (sk->sk_type == SOCK_STREAM) rc = LLC_PDU_LEN_I; return rc; @@ -216,8 +224,7 @@ static int llc_ui_release(struct socket *sock) } else { release_sock(sk); } - if (llc->dev) - dev_put(llc->dev); + dev_put(llc->dev); sock_put(sk); llc_sk_free(sk); out: @@ -355,8 +362,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen) } else llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd, addr->sllc_mac); - if (llc->dev) - dev_hold(llc->dev); + dev_hold(llc->dev); rcu_read_unlock(); if (!llc->dev) goto out; diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c index b554f26c68ee..79d1cef8f15a 100644 --- a/net/llc/llc_s_ac.c +++ b/net/llc/llc_s_ac.c @@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb) struct llc_sap_state_ev *ev = llc_sap_ev(skb); int rc; - llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap, + llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap, ev->daddr.lsap, LLC_PDU_CMD); llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0); rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 84cc7733ea66..4e6f11e63df3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -152,6 +152,8 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct vif_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; int ret; ret = ieee80211_if_change_type(sdata, type); @@ -162,7 +164,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy, RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); ieee80211_check_fast_rx_iface(sdata); } else if (type == NL80211_IFTYPE_STATION && params->use_4addr >= 0) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + if (params->use_4addr == ifmgd->use_4addr) + return 0; + sdata->u.mgd.use_4addr = params->use_4addr; + if (!ifmgd->associated) + return 0; + + mutex_lock(&local->sta_mtx); + sta = sta_info_get(sdata, ifmgd->bssid); + if (sta) + drv_sta_set_4addr(local, sdata, &sta->sta, + params->use_4addr); + mutex_unlock(&local->sta_mtx); + + if (params->use_4addr) + ieee80211_send_4addr_nullfunc(local, sdata); } if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 22549b95d1aa..30ce6d2ec7ce 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -2201,6 +2201,8 @@ void ieee80211_dynamic_ps_timer(struct timer_list *t); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, bool powersave); +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr, bool ack, u16 tx_time); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a00f11a33699..c0ea3b1aa9e1 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1095,8 +1095,8 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, ieee80211_tx_skb(sdata, skb); } -static void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata) +void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { struct sk_buff *skb; struct ieee80211_hdr *nullfunc; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 771921c057e8..2563473b5cf1 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -730,7 +730,8 @@ ieee80211_make_monitor_skb(struct ieee80211_local *local, * Need to make a copy and possibly remove radiotap header * and FCS from the original. */ - skb = skb_copy_expand(*origskb, needed_headroom, 0, GFP_ATOMIC); + skb = skb_copy_expand(*origskb, needed_headroom + NET_SKB_PAD, + 0, GFP_ATOMIC); if (!skb) return NULL; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index e96981144358..8509778ff31f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1147,6 +1147,29 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, return queued; } +static void +ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) +{ + struct rate_control_ref *ref = sdata->local->rate_ctrl; + u16 tid; + + if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) + return; + + if (!sta || !sta->sta.ht_cap.ht_supported || + !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || + skb->protocol == sdata->control_port_protocol) + return; + + tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + if (likely(sta->ampdu_mlme.tid_tx[tid])) + return; + + ieee80211_start_tx_ba_session(&sta->sta, tid, 0); +} + /* * initialises @tx * pass %NULL for the station if unknown, a valid pointer if known @@ -1160,6 +1183,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + bool aggr_check = false; int tid; memset(tx, 0, sizeof(*tx)); @@ -1188,8 +1212,10 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, } else if (tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } - if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) + if (!tx->sta && !is_multicast_ether_addr(hdr->addr1)) { tx->sta = sta_info_get(sdata, hdr->addr1); + aggr_check = true; + } } if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && @@ -1199,8 +1225,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, struct tid_ampdu_tx *tid_tx; tid = ieee80211_get_tid(hdr); - tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + if (!tid_tx && aggr_check) { + ieee80211_aggr_check(sdata, tx->sta, skb); + tid_tx = rcu_dereference(tx->sta->ampdu_mlme.tid_tx[tid]); + } + if (tid_tx) { bool queued; @@ -4120,29 +4150,6 @@ void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) } EXPORT_SYMBOL(ieee80211_txq_schedule_start); -static void -ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, - struct sta_info *sta, - struct sk_buff *skb) -{ - struct rate_control_ref *ref = sdata->local->rate_ctrl; - u16 tid; - - if (!ref || !(ref->ops->capa & RATE_CTRL_CAPA_AMPDU_TRIGGER)) - return; - - if (!sta || !sta->sta.ht_cap.ht_supported || - !sta->sta.wme || skb_get_queue_mapping(skb) == IEEE80211_AC_VO || - skb->protocol == sdata->control_port_protocol) - return; - - tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - if (likely(sta->ampdu_mlme.tid_tx[tid])) - return; - - ieee80211_start_tx_ba_session(&sta->sta, tid, 0); -} - void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags, diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index 84f722d31fd7..a9526ac29dff 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -170,7 +170,6 @@ static int mctp_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, /* TODO: expand mctp_skb_cb for header fields? */ struct mctp_hdr *hdr = mctp_hdr(skb); - hdr = mctp_hdr(skb); addr = msg->msg_name; addr->smctp_family = AF_MCTP; addr->smctp_network = cb->net; diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index d2591ebf01d9..56263c2c4014 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -27,7 +27,6 @@ struct mptcp_pm_addr_entry { struct mptcp_addr_info addr; u8 flags; int ifindex; - struct rcu_head rcu; struct socket *lsk; }; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 83c52df85870..5c03e5106751 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -670,8 +670,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) return false; tstamp = nf_conn_tstamp_find(ct); - if (tstamp && tstamp->stop == 0) + if (tstamp) { + s32 timeout = ct->timeout - nfct_time_stamp; + tstamp->stop = ktime_get_real_ns(); + if (timeout < 0) + tstamp->stop -= jiffies_to_nsecs(-timeout); + } if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index ec3dd1c9c8f4..a106721a0bd7 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -321,7 +321,11 @@ EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow) { - flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + u32 timeout; + + timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); + if (READ_ONCE(flow->timeout) != timeout) + WRITE_ONCE(flow->timeout, timeout); if (likely(!nf_flowtable_hw_offload(flow_table))) return; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index f92006cec94c..2bfd9f1b8f11 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -251,8 +251,7 @@ static int flow_offload_eth_src(struct net *net, flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8, &val, &mask); - if (dev) - dev_put(dev); + dev_put(dev); return 0; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4903da82dc04..6d12afabfe8a 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -51,18 +51,14 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) struct nf_hook_state *state = &entry->state; /* Release those devices we held, or Alexey will kill me. */ - if (state->in) - dev_put(state->in); - if (state->out) - dev_put(state->out); + dev_put(state->in); + dev_put(state->out); if (state->sk) sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->physin) - dev_put(entry->physin); - if (entry->physout) - dev_put(entry->physout); + dev_put(entry->physin); + dev_put(entry->physout); #endif } @@ -95,18 +91,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; - if (state->in) - dev_hold(state->in); - if (state->out) - dev_hold(state->out); + dev_hold(state->in); + dev_hold(state->out); if (state->sk) sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - if (entry->physin) - dev_hold(entry->physin); - if (entry->physout) - dev_hold(entry->physout); + dev_hold(entry->physin); + dev_hold(entry->physout); #endif } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index de182d1f7c4e..081437dd75b7 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8445,6 +8445,16 @@ static int nf_tables_commit_audit_alloc(struct list_head *adl, return 0; } +static void nf_tables_commit_audit_free(struct list_head *adl) +{ + struct nft_audit_data *adp, *adn; + + list_for_each_entry_safe(adp, adn, adl, list) { + list_del(&adp->list); + kfree(adp); + } +} + static void nf_tables_commit_audit_collect(struct list_head *adl, struct nft_table *table, u32 op) { @@ -8509,6 +8519,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } if (trans->msg_type == NFT_MSG_NEWRULE || @@ -8518,6 +8529,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { nf_tables_commit_chain_prepare_cancel(net); + nf_tables_commit_audit_free(&adl); return ret; } } diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 50b4e3c9347a..202f57d17bab 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -174,7 +174,9 @@ static const struct nf_hook_entries * nfnl_hook_entries_head(u8 pf, unsigned int hook, struct net *net, const char *dev) { const struct nf_hook_entries *hook_head = NULL; +#ifdef CONFIG_NETFILTER_INGRESS struct net_device *netdev; +#endif switch (pf) { case NFPROTO_IPV4: diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 8088b99f2ee3..304e33cbed9b 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -48,24 +48,30 @@ static void nft_last_eval(const struct nft_expr *expr, { struct nft_last_priv *priv = nft_expr_priv(expr); - priv->last_jiffies = jiffies; - priv->last_set = 1; + if (READ_ONCE(priv->last_jiffies) != jiffies) + WRITE_ONCE(priv->last_jiffies, jiffies); + if (READ_ONCE(priv->last_set) == 0) + WRITE_ONCE(priv->last_set, 1); } static int nft_last_dump(struct sk_buff *skb, const struct nft_expr *expr) { struct nft_last_priv *priv = nft_expr_priv(expr); + unsigned long last_jiffies = READ_ONCE(priv->last_jiffies); + u32 last_set = READ_ONCE(priv->last_set); __be64 msecs; - if (time_before(jiffies, priv->last_jiffies)) - priv->last_set = 0; + if (time_before(jiffies, last_jiffies)) { + WRITE_ONCE(priv->last_set, 0); + last_set = 0; + } - if (priv->last_set) - msecs = nf_jiffies64_to_msecs(jiffies - priv->last_jiffies); + if (last_set) + msecs = nf_jiffies64_to_msecs(jiffies - last_jiffies); else msecs = 0; - if (nla_put_be32(skb, NFTA_LAST_SET, htonl(priv->last_set)) || + if (nla_put_be32(skb, NFTA_LAST_SET, htonl(last_set)) || nla_put_be64(skb, NFTA_LAST_MSECS, msecs, NFTA_LAST_PAD)) goto nla_put_failure; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 0840c635b752..be1595d6979d 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -201,7 +201,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, alen = sizeof_field(struct nf_nat_range, min_addr.ip6); break; default: - return -EAFNOSUPPORT; + if (tb[NFTA_NAT_REG_ADDR_MIN]) + return -EAFNOSUPPORT; + break; } priv->family = family; diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 2483df0bbd7c..566ba4397ee4 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -492,8 +492,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, netlbl_af4list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr->s_addr, mask->s_addr); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { @@ -553,8 +552,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, netlbl_af6list_audit_addr(audit_buf, 1, (dev != NULL ? dev->name : NULL), addr, mask); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, &secctx, &secctx_len) == 0) { diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index a880dd33e901..511819fbfa67 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -59,8 +59,7 @@ static void nr_loopback_timer(struct timer_list *unused) if (dev == NULL || nr_rx_frame(skb, dev) == 0) kfree_skb(skb); - if (dev != NULL) - dev_put(dev); + dev_put(dev); if (!skb_queue_empty(&loopback_queue) && !nr_loopback_running()) mod_timer(&loopback_timer, jiffies + 10); diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index de0456073dc0..ddd5cbd455e3 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -582,8 +582,7 @@ struct net_device *nr_dev_first(void) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } - if (first) - dev_hold(first); + dev_hold(first); rcu_read_unlock(); return first; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 80a5c2a8e9fa..82ab39d80726 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -95,8 +95,8 @@ static void nci_req_cancel(struct nci_dev *ndev, int err) /* Execute request and wait for completion. */ static int __nci_request(struct nci_dev *ndev, - void (*req)(struct nci_dev *ndev, unsigned long opt), - unsigned long opt, __u32 timeout) + void (*req)(struct nci_dev *ndev, const void *opt), + const void *opt, __u32 timeout) { int rc = 0; long completion_rc; @@ -139,8 +139,8 @@ static int __nci_request(struct nci_dev *ndev, inline int nci_request(struct nci_dev *ndev, void (*req)(struct nci_dev *ndev, - unsigned long opt), - unsigned long opt, __u32 timeout) + const void *opt), + const void *opt, __u32 timeout) { int rc; @@ -155,7 +155,7 @@ inline int nci_request(struct nci_dev *ndev, return rc; } -static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) +static void nci_reset_req(struct nci_dev *ndev, const void *opt) { struct nci_core_reset_cmd cmd; @@ -163,17 +163,17 @@ static void nci_reset_req(struct nci_dev *ndev, unsigned long opt) nci_send_cmd(ndev, NCI_OP_CORE_RESET_CMD, 1, &cmd); } -static void nci_init_req(struct nci_dev *ndev, unsigned long opt) +static void nci_init_req(struct nci_dev *ndev, const void *opt) { u8 plen = 0; if (opt) plen = sizeof(struct nci_core_init_v2_cmd); - nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, (void *)opt); + nci_send_cmd(ndev, NCI_OP_CORE_INIT_CMD, plen, opt); } -static void nci_init_complete_req(struct nci_dev *ndev, unsigned long opt) +static void nci_init_complete_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_disc_map_cmd cmd; struct disc_map_config *cfg = cmd.mapping_configs; @@ -215,10 +215,9 @@ struct nci_set_config_param { const __u8 *val; }; -static void nci_set_config_req(struct nci_dev *ndev, unsigned long opt) +static void nci_set_config_req(struct nci_dev *ndev, const void *opt) { - const struct nci_set_config_param *param = - (struct nci_set_config_param *)opt; + const struct nci_set_config_param *param = opt; struct nci_core_set_config_cmd cmd; BUG_ON(param->len > NCI_MAX_PARAM_LEN); @@ -236,10 +235,9 @@ struct nci_rf_discover_param { __u32 tm_protocols; }; -static void nci_rf_discover_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_discover_req(struct nci_dev *ndev, const void *opt) { - const struct nci_rf_discover_param *param = - (struct nci_rf_discover_param *)opt; + const struct nci_rf_discover_param *param = opt; struct nci_rf_disc_cmd cmd; cmd.num_disc_configs = 0; @@ -302,10 +300,9 @@ struct nci_rf_discover_select_param { __u8 rf_protocol; }; -static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_discover_select_req(struct nci_dev *ndev, const void *opt) { - const struct nci_rf_discover_select_param *param = - (struct nci_rf_discover_select_param *)opt; + const struct nci_rf_discover_select_param *param = opt; struct nci_rf_discover_select_cmd cmd; cmd.rf_discovery_id = param->rf_discovery_id; @@ -329,11 +326,11 @@ static void nci_rf_discover_select_req(struct nci_dev *ndev, unsigned long opt) sizeof(struct nci_rf_discover_select_cmd), &cmd); } -static void nci_rf_deactivate_req(struct nci_dev *ndev, unsigned long opt) +static void nci_rf_deactivate_req(struct nci_dev *ndev, const void *opt) { struct nci_rf_deactivate_cmd cmd; - cmd.type = opt; + cmd.type = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_RF_DEACTIVATE_CMD, sizeof(struct nci_rf_deactivate_cmd), &cmd); @@ -345,10 +342,9 @@ struct nci_cmd_param { const __u8 *payload; }; -static void nci_generic_req(struct nci_dev *ndev, unsigned long opt) +static void nci_generic_req(struct nci_dev *ndev, const void *opt) { - const struct nci_cmd_param *param = - (struct nci_cmd_param *)opt; + const struct nci_cmd_param *param = opt; nci_send_cmd(ndev, param->opcode, param->len, param->payload); } @@ -361,7 +357,7 @@ int nci_prop_cmd(struct nci_dev *ndev, __u8 oid, size_t len, const __u8 *payload param.len = len; param.payload = payload; - return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, ¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_prop_cmd); @@ -375,21 +371,21 @@ int nci_core_cmd(struct nci_dev *ndev, __u16 opcode, size_t len, param.len = len; param.payload = payload; - return __nci_request(ndev, nci_generic_req, (unsigned long)¶m, + return __nci_request(ndev, nci_generic_req, ¶m, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_cmd); int nci_core_reset(struct nci_dev *ndev) { - return __nci_request(ndev, nci_reset_req, 0, + return __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } EXPORT_SYMBOL(nci_core_reset); int nci_core_init(struct nci_dev *ndev) { - return __nci_request(ndev, nci_init_req, 0, + return __nci_request(ndev, nci_init_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } EXPORT_SYMBOL(nci_core_init); @@ -399,9 +395,9 @@ struct nci_loopback_data { struct sk_buff *data; }; -static void nci_send_data_req(struct nci_dev *ndev, unsigned long opt) +static void nci_send_data_req(struct nci_dev *ndev, const void *opt) { - const struct nci_loopback_data *data = (struct nci_loopback_data *)opt; + const struct nci_loopback_data *data = opt; nci_send_data(ndev, data->conn_id, data->data); } @@ -462,7 +458,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, const void *data, size_t data_len, loopback_data.data = skb; ndev->cur_conn_id = conn_id; - r = nci_request(ndev, nci_send_data_req, (unsigned long)&loopback_data, + r = nci_request(ndev, nci_send_data_req, &loopback_data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK && resp) *resp = conn_info->rx_skb; @@ -495,7 +491,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = ndev->ops->init(ndev); if (!rc) { - rc = __nci_request(ndev, nci_reset_req, 0, + rc = __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); } @@ -508,10 +504,10 @@ static int nci_open_device(struct nci_dev *ndev) .feature1 = NCI_FEATURE_DISABLE, .feature2 = NCI_FEATURE_DISABLE }; - unsigned long opt = 0; + const void *opt = NULL; if (ndev->nci_ver & NCI_VER_2_MASK) - opt = (unsigned long)&nci_init_v2_cmd; + opt = &nci_init_v2_cmd; rc = __nci_request(ndev, nci_init_req, opt, msecs_to_jiffies(NCI_INIT_TIMEOUT)); @@ -521,7 +517,7 @@ static int nci_open_device(struct nci_dev *ndev) rc = ndev->ops->post_setup(ndev); if (!rc) { - rc = __nci_request(ndev, nci_init_complete_req, 0, + rc = __nci_request(ndev, nci_init_complete_req, (void *)0, msecs_to_jiffies(NCI_INIT_TIMEOUT)); } @@ -571,7 +567,7 @@ static int nci_close_device(struct nci_dev *ndev) atomic_set(&ndev->cmd_cnt, 1); set_bit(NCI_INIT, &ndev->flags); - __nci_request(ndev, nci_reset_req, 0, + __nci_request(ndev, nci_reset_req, (void *)0, msecs_to_jiffies(NCI_RESET_TIMEOUT)); /* After this point our queues are empty @@ -637,15 +633,15 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, const __u8 *val) param.len = len; param.val = val; - return __nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + return __nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } EXPORT_SYMBOL(nci_set_config); -static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) +static void nci_nfcee_discover_req(struct nci_dev *ndev, const void *opt) { struct nci_nfcee_discover_cmd cmd; - __u8 action = opt; + __u8 action = (unsigned long)opt; cmd.discovery_action = action; @@ -654,15 +650,16 @@ static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt) int nci_nfcee_discover(struct nci_dev *ndev, u8 action) { - return __nci_request(ndev, nci_nfcee_discover_req, action, + unsigned long opt = action; + + return __nci_request(ndev, nci_nfcee_discover_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_discover); -static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt) +static void nci_nfcee_mode_set_req(struct nci_dev *ndev, const void *opt) { - const struct nci_nfcee_mode_set_cmd *cmd = - (struct nci_nfcee_mode_set_cmd *)opt; + const struct nci_nfcee_mode_set_cmd *cmd = opt; nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD, sizeof(struct nci_nfcee_mode_set_cmd), cmd); @@ -675,16 +672,14 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode) cmd.nfcee_id = nfcee_id; cmd.nfcee_mode = nfcee_mode; - return __nci_request(ndev, nci_nfcee_mode_set_req, - (unsigned long)&cmd, + return __nci_request(ndev, nci_nfcee_mode_set_req, &cmd, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_nfcee_mode_set); -static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt) +static void nci_core_conn_create_req(struct nci_dev *ndev, const void *opt) { - const struct core_conn_create_data *data = - (struct core_conn_create_data *)opt; + const struct core_conn_create_data *data = opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd); } @@ -721,24 +716,26 @@ int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type, } ndev->cur_dest_type = destination_type; - r = __nci_request(ndev, nci_core_conn_create_req, (unsigned long)&data, + r = __nci_request(ndev, nci_core_conn_create_req, &data, msecs_to_jiffies(NCI_CMD_TIMEOUT)); kfree(cmd); return r; } EXPORT_SYMBOL(nci_core_conn_create); -static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt) +static void nci_core_conn_close_req(struct nci_dev *ndev, const void *opt) { - __u8 conn_id = opt; + __u8 conn_id = (unsigned long)opt; nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id); } int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id) { + unsigned long opt = conn_id; + ndev->cur_conn_id = conn_id; - return __nci_request(ndev, nci_core_conn_close_req, conn_id, + return __nci_request(ndev, nci_core_conn_close_req, (void *)opt, msecs_to_jiffies(NCI_CMD_TIMEOUT)); } EXPORT_SYMBOL(nci_core_conn_close); @@ -758,14 +755,14 @@ static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev) param.id = NCI_PN_ATR_REQ_GEN_BYTES; - rc = nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + rc = nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); if (rc) return rc; param.id = NCI_LN_ATR_RES_GEN_BYTES; - return nci_request(ndev, nci_set_config_req, (unsigned long)¶m, + return nci_request(ndev, nci_set_config_req, ¶m, msecs_to_jiffies(NCI_SET_CONFIG_TIMEOUT)); } @@ -815,7 +812,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, pr_debug("target active or w4 select, implicitly deactivate\n"); rc = nci_request(ndev, nci_rf_deactivate_req, - NCI_DEACTIVATE_TYPE_IDLE_MODE, + (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); if (rc) return -EBUSY; @@ -837,7 +834,7 @@ static int nci_start_poll(struct nfc_dev *nfc_dev, param.im_protocols = im_protocols; param.tm_protocols = tm_protocols; - rc = nci_request(ndev, nci_rf_discover_req, (unsigned long)¶m, + rc = nci_request(ndev, nci_rf_discover_req, ¶m, msecs_to_jiffies(NCI_RF_DISC_TIMEOUT)); if (!rc) @@ -856,7 +853,8 @@ static void nci_stop_poll(struct nfc_dev *nfc_dev) return; } - nci_request(ndev, nci_rf_deactivate_req, NCI_DEACTIVATE_TYPE_IDLE_MODE, + nci_request(ndev, nci_rf_deactivate_req, + (void *)NCI_DEACTIVATE_TYPE_IDLE_MODE, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } @@ -915,8 +913,7 @@ static int nci_activate_target(struct nfc_dev *nfc_dev, else param.rf_protocol = NCI_RF_PROTOCOL_NFC_DEP; - rc = nci_request(ndev, nci_rf_discover_select_req, - (unsigned long)¶m, + rc = nci_request(ndev, nci_rf_discover_select_req, ¶m, msecs_to_jiffies(NCI_RF_DISC_SELECT_TIMEOUT)); } @@ -931,7 +928,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, __u8 mode) { struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); - u8 nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; + unsigned long nci_mode = NCI_DEACTIVATE_TYPE_IDLE_MODE; pr_debug("entry\n"); @@ -949,7 +946,7 @@ static void nci_deactivate_target(struct nfc_dev *nfc_dev, } if (atomic_read(&ndev->state) == NCI_POLL_ACTIVE) { - nci_request(ndev, nci_rf_deactivate_req, nci_mode, + nci_request(ndev, nci_rf_deactivate_req, (void *)nci_mode, msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } } @@ -987,8 +984,8 @@ static int nci_dep_link_down(struct nfc_dev *nfc_dev) } else { if (atomic_read(&ndev->state) == NCI_LISTEN_ACTIVE || atomic_read(&ndev->state) == NCI_DISCOVERY) { - nci_request(ndev, nci_rf_deactivate_req, 0, - msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); + nci_request(ndev, nci_rf_deactivate_req, (void *)0, + msecs_to_jiffies(NCI_RF_DEACTIVATE_TIMEOUT)); } rc = nfc_tm_deactivated(nfc_dev); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index a8ff794a8084..e199912ee1e5 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -195,9 +195,9 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return i; } -static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt) +static void nci_hci_send_data_req(struct nci_dev *ndev, const void *opt) { - const struct nci_data *data = (struct nci_data *)opt; + const struct nci_data *data = opt; nci_hci_send_data(ndev, data->pipe, data->cmd, data->data, data->data_len); @@ -240,7 +240,7 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, data.data = param; data.data_len = param_len; - r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; @@ -511,9 +511,8 @@ int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe) data.data = NULL; data.data_len = 0; - return nci_request(ndev, nci_hci_send_data_req, - (unsigned long)&data, - msecs_to_jiffies(NCI_DATA_TIMEOUT)); + return nci_request(ndev, nci_hci_send_data_req, &data, + msecs_to_jiffies(NCI_DATA_TIMEOUT)); } EXPORT_SYMBOL(nci_hci_open_pipe); @@ -587,8 +586,7 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, data.data = tmp; data.data_len = param_len + 1; - r = nci_request(ndev, nci_hci_send_data_req, - (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { message = (struct nci_hcp_message *)conn_info->rx_skb->data; @@ -627,7 +625,7 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, data.data = &idx; data.data_len = 1; - r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, + r = nci_request(ndev, nci_hci_send_data_req, &data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); if (r == NCI_STATUS_OK) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 57a1971f29e5..543365f58e97 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -250,8 +250,7 @@ static struct net_device *packet_cached_dev_get(struct packet_sock *po) rcu_read_lock(); dev = rcu_dereference(po->cached_dev); - if (likely(dev)) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; @@ -3024,8 +3023,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) out_free: kfree_skb(skb); out_unlock: - if (dev) - dev_put(dev); + dev_put(dev); out: return err; } @@ -3158,8 +3156,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, } } - if (dev) - dev_hold(dev); + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -3196,8 +3193,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, packet_cached_dev_assign(po, dev); } } - if (dev_curr) - dev_put(dev_curr); + dev_put(dev_curr); if (proto == 0 || !need_rehook) goto out_unlock; @@ -4109,8 +4105,7 @@ static int packet_notifier(struct notifier_block *this, if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); WRITE_ONCE(po->ifindex, -1); - if (po->prot_hook.dev) - dev_put(po->prot_hook.dev); + dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; } spin_unlock(&po->bind_lock); diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index ca6ae4c59433..65218b7ce9f9 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -275,8 +275,7 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb, drop: kfree_skb(skb); - if (dev) - dev_put(dev); + dev_put(dev); return err; } EXPORT_SYMBOL(pn_skb_send); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 876d0ae5f9fd..cde671d29d5d 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -122,8 +122,7 @@ struct net_device *phonet_device_get(struct net *net) break; dev = NULL; } - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; } @@ -411,8 +410,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr) daddr >>= 2; rcu_read_lock(); dev = rcu_dereference(routes->table[daddr]); - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); if (!dev) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 2599235d592e..71e2caf6ab85 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -379,8 +379,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, saddr = PN_NO_ADDR; release_sock(sk); - if (dev) - dev_put(dev); + dev_put(dev); if (saddr == PN_NO_ADDR) return -EHOSTUNREACH; diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index fa611678af05..1dc955ca57d3 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -15,6 +15,7 @@ struct qrtr_mhi_dev { struct qrtr_endpoint ep; struct mhi_device *mhi_dev; struct device *dev; + struct completion ready; }; /* From MHI to QRTR */ @@ -50,6 +51,10 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; + rc = wait_for_completion_interruptible(&qdev->ready); + if (rc) + goto free_skb; + if (skb->sk) sock_hold(skb->sk); @@ -79,7 +84,7 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, int rc; /* start channels */ - rc = mhi_prepare_for_transfer(mhi_dev); + rc = mhi_prepare_for_transfer(mhi_dev, 0); if (rc) return rc; @@ -96,6 +101,15 @@ static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev, if (rc) return rc; + /* start channels */ + rc = mhi_prepare_for_transfer(mhi_dev, MHI_CH_INBOUND_ALLOC_BUFS); + if (rc) { + qrtr_endpoint_unregister(&qdev->ep); + dev_set_drvdata(&mhi_dev->dev, NULL); + return rc; + } + + complete_all(&qdev->ready); dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n"); return 0; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index e71847877248..6c61b7b1838f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -518,8 +518,10 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (!ipc) goto err; - if (sock_queue_rcv_skb(&ipc->sk, skb)) + if (sock_queue_rcv_skb(&ipc->sk, skb)) { + qrtr_port_put(ipc); goto err; + } qrtr_port_put(ipc); } @@ -839,6 +841,8 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ + if (ipc) + qrtr_port_put(ipc); kfree_skb(skb); return -ENODEV; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 998a2374f7ae..7dd3a2dc5fa4 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -495,7 +495,7 @@ int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; - p->tcfa_flags = flags; + p->tcfa_flags = flags & TCA_ACT_FLAGS_USER_MASK; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, @@ -941,7 +941,7 @@ void tcf_idr_insert_many(struct tc_action *actions[]) } } -struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, +struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police, bool rtnl_held, struct netlink_ext_ack *extack) { @@ -951,7 +951,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct nlattr *kind; int err; - if (name == NULL) { + if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) @@ -967,7 +967,7 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, return ERR_PTR(err); } } else { - if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { + if (strlcpy(act_name, "police", IFNAMSIZ) >= IFNAMSIZ) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } @@ -1004,12 +1004,11 @@ struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, struct tc_action_ops *a_o, int *init_res, - bool rtnl_held, - struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { - struct nla_bitfield32 flags = { 0, 0 }; + bool police = flags & TCA_ACT_FLAGS_POLICE; + struct nla_bitfield32 userflags = { 0, 0 }; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_cookie *cookie = NULL; @@ -1017,7 +1016,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, int err; /* backward compatibility for policer */ - if (name == NULL) { + if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) @@ -1032,22 +1031,22 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) - flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); - err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, tp, flags.value, extack); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, + userflags.value | flags, extack); } else { - err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - tp, flags.value, extack); + err = a_o->init(net, nla, est, &a, tp, userflags.value | flags, + extack); } if (err < 0) goto err_out; *init_res = err; - if (!name && tb[TCA_ACT_COOKIE]) + if (!police && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); - if (!name) + if (!police) a->hw_stats = hw_stats; return a; @@ -1063,9 +1062,9 @@ err_out: /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, int bind, - struct tc_action *actions[], int init_res[], size_t *attr_size, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr *est, struct tc_action *actions[], + int init_res[], size_t *attr_size, u32 flags, + struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1082,7 +1081,9 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack); + a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE, + !(flags & TCA_ACT_FLAGS_NO_RTNL), + extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; @@ -1091,9 +1092,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - ops[i - 1], &init_res[i - 1], rtnl_held, - extack); + act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1], + &init_res[i - 1], flags, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1113,7 +1113,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, goto err_mod; err: - tcf_action_destroy(actions, bind); + tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { if (ops[i]) @@ -1495,7 +1495,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], } static int tcf_action_add(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 portid, int ovr, + struct nlmsghdr *n, u32 portid, u32 flags, struct netlink_ext_ack *extack) { size_t attr_size = 0; @@ -1504,8 +1504,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { - ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, - actions, init_res, &attr_size, true, extack); + ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, + &attr_size, flags, extack); if (ret != -EAGAIN) break; } @@ -1535,7 +1535,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = NETLINK_CB(skb).portid; - int ret = 0, ovr = 0; + u32 flags = 0; + int ret = 0; if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) @@ -1561,8 +1562,8 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, * is zero) then just set this */ if (n->nlmsg_flags & NLM_F_REPLACE) - ovr = 1; - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr, + flags = TCA_ACT_FLAGS_REPLACE; + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags, extack); break; case RTM_DELACTION: diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index e409a0005717..040807aa15b9 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -275,11 +275,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, - int replace, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; @@ -317,7 +317,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - if (!replace) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*act, bind); return -EEXIST; } diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index e19885d7fe2c..94e78ac7a748 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -96,12 +96,12 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; @@ -144,7 +144,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci = to_connmark(*a); if (bind) return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 4fa4fcb842ba..a15ec95e69c3 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -41,11 +41,12 @@ static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -78,7 +79,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 1b4b3514c94f..ad9df0cb4b98 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -1235,11 +1235,11 @@ static int tcf_ct_fill_params(struct net *net, static int tcf_ct_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int replace, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ct_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_ct_params *params = NULL; struct nlattr *tb[TCA_CT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -1279,7 +1279,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, if (bind) return 0; - if (!replace) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index b20c8ce59905..549374a2d008 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -154,11 +154,11 @@ static const struct nla_policy ctinfo_policy[TCA_CTINFO_MAX + 1] = { static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; struct tcf_ctinfo_params *cp_new; @@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind) /* don't override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 73c3926358a0..d8dce173df37 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -52,11 +52,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; @@ -109,7 +109,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index a78cb7965718..7df72a4197a3 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -295,12 +295,12 @@ static void gate_setup_timer(struct tcf_gate *gact, u64 basetime, static int tcf_gate_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gate_net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; struct tcf_chain *goto_ch = NULL; u64 cycletime = 0, basetime = 0; @@ -364,7 +364,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a2ddea04183a..7064a365a1a9 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -479,11 +479,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -532,7 +532,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, kfree(p); return err; } - err = load_metalist(tb2, rtnl_held); + err = load_metalist(tb2, !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) { kfree(p); return err; @@ -560,7 +560,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); kfree(p); return -EEXIST; @@ -600,7 +600,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = populate_metalist(ife, tb2, exists, rtnl_held); + err = populate_metalist(ife, tb2, exists, + !(flags & TCA_ACT_FLAGS_NO_RTNL)); if (err) goto metadata_parse_err; } else { diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index ac7297f42355..265b1443e252 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -94,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind, + const struct tc_action_ops *ops, struct tcf_proto *tp, u32 flags) { struct tc_action_net *tn = net_generic(net, id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; struct xt_entry_target *td, *t; @@ -154,7 +155,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (bind)/* dont override defaults */ return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } @@ -201,21 +202,21 @@ err1: } static int tcf_ipt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind, tp, flags); + return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, + tp, flags); } static int tcf_xt_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind, tp, flags); + return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, + tp, flags); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 7153c67f641e..37f51d778728 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -78,8 +78,7 @@ static void tcf_mirred_release(struct tc_action *a) /* last reference to action, no need to lock */ dev = rcu_dereference_protected(m->tcfm_dev, 1); - if (dev) - dev_put(dev); + dev_put(dev); } static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { @@ -91,11 +90,11 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; @@ -155,7 +154,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } @@ -180,8 +179,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, mac_header_xmit = dev_is_mac_header_xmit(dev); dev = rcu_replace_pointer(m->tcfm_dev, dev, lockdep_is_held(&m->tcf_lock)); - if (dev) - dev_put(dev); + dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index d1486ea496a2..e4529b428cf4 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -152,11 +152,11 @@ static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = { static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mpls_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MPLS_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tcf_mpls_params *p; @@ -255,7 +255,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 1ebd2a86d980..7dd6b586ba7f 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -34,11 +34,11 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { }; static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, - struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct tcf_proto *tp, + struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; @@ -70,7 +70,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else if (err > 0) { if (bind) return 0; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index b45304446e13..c6c862c459cc 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -136,11 +136,11 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; @@ -198,7 +198,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else if (err > 0) { if (bind) goto out_free; - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { ret = -EEXIST; goto out_release; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0fab8de176d2..832157a840fc 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -48,11 +48,11 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_police *parm; @@ -97,7 +97,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; spin_lock_init(&(to_police(*a)->tcfp_lock)); - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 6a0c16e4351d..230501eb9e06 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -34,11 +34,12 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { }; static int tcf_sample_init(struct net *net, struct nlattr *nla, - struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, struct tcf_proto *tp, + struct nlattr *est, struct tc_action **a, + struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; u32 psample_group_num, rate, index; @@ -75,7 +76,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, return ret; } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 726cc956d06f..cbbe1861d3a2 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -85,11 +85,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; @@ -147,7 +147,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - if (!ovr) { + if (!(flags & TCA_ACT_FLAGS_REPLACE)) { err = -EEXIST; goto release_idr; } diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e5f3fb8b00e3..605418538347 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -96,11 +96,11 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); + bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -186,7 +186,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - if (!ovr) { + if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 762ceec3e6f6..ecb9ee666095 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -100,11 +100,12 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); + bool ovr = flags & TCA_ACT_FLAGS_REPLACE; + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; struct tcf_chain *goto_ch = NULL; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 85c0d0d5b9da..d9cd174eecb7 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -355,11 +355,11 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; @@ -504,7 +504,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(act_flags & TCA_ACT_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "TC IDR already exists"); ret = -EEXIST; goto release_tun_meta; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 71f2015c70ca..e4dc5a555bd8 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -114,11 +114,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - int ovr, int bind, bool rtnl_held, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); + bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; bool push_prio_exists = false; @@ -223,7 +223,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } ret = ACT_P_CREATED; - } else if (!ovr) { + } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) { tcf_idr_release(*a, bind); return -EEXIST; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7be5b9d2aead..4a7043a4e5d6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1949,6 +1949,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, int err; int tp_created; bool rtnl_held = false; + u32 flags; if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -1969,6 +1970,7 @@ replay: tp = NULL; cl = 0; block = NULL; + flags = 0; if (prio == 0) { /* If no priority is provided by the user, @@ -2112,9 +2114,12 @@ replay: goto errout; } + if (!(n->nlmsg_flags & NLM_F_CREATE)) + flags |= TCA_ACT_FLAGS_REPLACE; + if (!rtnl_held) + flags |= TCA_ACT_FLAGS_NO_RTNL; err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh, - n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE, - rtnl_held, extack); + flags, extack); if (err == 0) { tfilter_notify(net, skb, n, tp, block, q, parent, fh, RTM_NEWTFILTER, false, rtnl_held); @@ -3020,8 +3025,8 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr *rate_tlv, struct tcf_exts *exts, + u32 flags, struct netlink_ext_ack *extack) { #ifdef CONFIG_NET_CLS_ACT { @@ -3032,13 +3037,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { struct tc_action_ops *a_o; - a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack); + a_o = tc_action_load_ops(tb[exts->police], true, + !(flags & TCA_ACT_FLAGS_NO_RTNL), + extack); if (IS_ERR(a_o)) return PTR_ERR(a_o); + flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND; act = tcf_action_init_1(net, tp, tb[exts->police], - rate_tlv, "police", ovr, - TCA_ACT_BIND, a_o, init_res, - rtnl_held, extack); + rate_tlv, a_o, init_res, flags, + extack); module_put(a_o->owner); if (IS_ERR(act)) return PTR_ERR(act); @@ -3050,10 +3057,10 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } else if (exts->action && tb[exts->action]) { int err; + flags |= TCA_ACT_FLAGS_BIND; err = tcf_action_init(net, tp, tb[exts->action], - rate_tlv, NULL, ovr, TCA_ACT_BIND, - exts->actions, init_res, - &attr_size, rtnl_held, extack); + rate_tlv, exts->actions, init_res, + &attr_size, flags, extack); if (err < 0) return err; exts->nr_actions = err; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index f256a7c69093..8158fc9ee1ab 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -145,12 +145,12 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -169,8 +169,8 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, + u32 flags, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); @@ -216,7 +216,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, goto errout; } - err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr, + err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags, extack); if (err < 0) { if (!fold) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index fa739efa59f4..3b472bafdc9d 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -404,7 +404,7 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, struct cls_bpf_prog *prog, unsigned long base, - struct nlattr **tb, struct nlattr *est, bool ovr, + struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { bool is_bpf, is_ebpf, have_exts = false; @@ -416,7 +416,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true, + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags, extack); if (ret < 0) return ret; @@ -455,7 +455,7 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -500,7 +500,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; prog->handle = handle; - ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr, + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags, extack); if (ret < 0) goto errout_idr; diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index fb881144fa01..ed00001b528a 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -76,7 +76,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work) static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; @@ -108,8 +108,8 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags, + extack); if (err < 0) goto errout; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 87398af2715a..972303aa8edd 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -387,7 +387,7 @@ static void flow_destroy_filter_work(struct work_struct *work) static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct flow_head *head = rtnl_dereference(tp->root); @@ -442,8 +442,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto err2; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, flags, + extack); if (err < 0) goto err2; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index d7869a984881..23b21253b3c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -1915,23 +1915,22 @@ errout_cleanup: static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct cls_fl_filter *f, struct fl_flow_mask *mask, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, - struct fl_flow_tmplt *tmplt, bool rtnl_held, + struct nlattr *est, + struct fl_flow_tmplt *tmplt, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held, - extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); - if (!rtnl_held) + if (flags & TCA_ACT_FLAGS_NO_RTNL) rtnl_lock(); tcf_bind_filter(tp, &f->res, base); - if (!rtnl_held) + if (flags & TCA_ACT_FLAGS_NO_RTNL) rtnl_unlock(); } @@ -1975,10 +1974,11 @@ static int fl_ht_insert_unique(struct cls_fl_filter *fnew, static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_fl_head *head = fl_head_dereference(tp); + bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct fl_flow_mask *mask; @@ -2034,8 +2034,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, } } - err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr, - tp->chain->tmplt_priv, rtnl_held, extack); + err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], + tp->chain->tmplt_priv, flags, extack); if (err) goto errout; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index ec945294626a..8654b0ce997c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -198,15 +198,15 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_set_parms(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, - struct nlattr **tca, unsigned long base, bool ovr, + struct nlattr **tca, unsigned long base, u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); u32 mask; int err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr, - true, extack); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, flags, + extack); if (err < 0) return err; @@ -237,8 +237,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp, static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, - bool ovr, bool rtnl_held, - struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = *arg; @@ -277,7 +276,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr, extack); + err = fw_set_parms(net, tp, fnew, tb, tca, base, flags, extack); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -326,7 +325,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_set_parms(net, tp, f, tb, tca, base, ovr, extack); + err = fw_set_parms(net, tp, f, tb, tca, base, flags, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index cafb84480bab..24f0046ce0b3 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -163,13 +163,12 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { static int mall_set_parms(struct net *net, struct tcf_proto *tp, struct cls_mall_head *head, unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true, - extack); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, flags, extack); if (err < 0) return err; @@ -183,13 +182,13 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct cls_mall_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_MATCHALL_MAX + 1]; struct cls_mall_head *new; - u32 flags = 0; + u32 userflags = 0; int err; if (!tca[TCA_OPTIONS]) @@ -204,8 +203,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, return err; if (tb[TCA_MATCHALL_FLAGS]) { - flags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); - if (!tc_flags_valid(flags)) + userflags = nla_get_u32(tb[TCA_MATCHALL_FLAGS]); + if (!tc_flags_valid(userflags)) return -EINVAL; } @@ -220,14 +219,14 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (!handle) handle = 1; new->handle = handle; - new->flags = flags; + new->flags = userflags; new->pf = alloc_percpu(struct tc_matchall_pcnt); if (!new->pf) { err = -ENOMEM; goto err_alloc_percpu; } - err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr, + err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], flags, extack); if (err) goto err_set_parms; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 5efa3e7ace15..a35ab8c27866 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -382,7 +382,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct route4_filter *f, u32 handle, struct route4_head *head, struct nlattr **tb, struct nlattr *est, int new, - bool ovr, struct netlink_ext_ack *extack) + u32 flags, struct netlink_ext_ack *extack) { u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; @@ -390,7 +390,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_bucket *b; int err; - err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; @@ -464,8 +464,8 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, u32 flags, + struct netlink_ext_ack *extack) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -510,7 +510,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], new, ovr, extack); + tca[TCA_RATE], new, flags, extack); if (err < 0) goto errout; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 27a4b6dbcf57..5cd9d6b143c4 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -470,9 +470,8 @@ static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = { static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - void **arg, bool ovr, bool rtnl_held, + u32 handle, struct nlattr **tca, + void **arg, u32 flags, struct netlink_ext_ack *extack) { struct rsvp_head *data = rtnl_dereference(tp->root); @@ -499,7 +498,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true, + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags, extack); if (err < 0) goto errout2; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index e9a8a2c86bbd..742c7d49a958 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -330,7 +330,7 @@ static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, - struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_data *cp = NULL, *oldp; @@ -342,7 +342,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack); if (err < 0) goto errout; @@ -529,8 +529,8 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, - bool rtnl_held, struct netlink_ext_ack *extack) + struct nlattr **tca, void **arg, u32 flags, + struct netlink_ext_ack *extack) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; @@ -551,7 +551,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, return err; return tcindex_set_parms(net, tp, base, handle, p, r, tb, - tca[TCA_RATE], ovr, extack); + tca[TCA_RATE], flags, extack); } static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6e1abe805448..4272814487f0 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -709,12 +709,12 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { static int u32_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tc_u_knode *n, struct nlattr **tb, - struct nlattr *est, bool ovr, + struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; - err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, flags, extack); if (err < 0) return err; @@ -840,7 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, void **arg, bool ovr, bool rtnl_held, + struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct tc_u_common *tp_c = tp->data; @@ -849,7 +849,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct tc_u32_sel *s; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_U32_MAX + 1]; - u32 htid, flags = 0; + u32 htid, userflags = 0; size_t sel_size; int err; @@ -868,8 +868,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return err; if (tb[TCA_U32_FLAGS]) { - flags = nla_get_u32(tb[TCA_U32_FLAGS]); - if (!tc_flags_valid(flags)) { + userflags = nla_get_u32(tb[TCA_U32_FLAGS]); + if (!tc_flags_valid(userflags)) { NL_SET_ERR_MSG_MOD(extack, "Invalid filter flags"); return -EINVAL; } @@ -884,7 +884,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - if ((n->flags ^ flags) & + if ((n->flags ^ userflags) & ~(TCA_CLS_FLAGS_IN_HW | TCA_CLS_FLAGS_NOT_IN_HW)) { NL_SET_ERR_MSG_MOD(extack, "Key node flags do not match passed flags"); return -EINVAL; @@ -895,7 +895,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -ENOMEM; err = u32_set_parms(net, tp, base, new, tb, - tca[TCA_RATE], ovr, extack); + tca[TCA_RATE], flags, extack); if (err) { u32_destroy_key(new, false); @@ -955,9 +955,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->handle = handle; ht->prio = tp->prio; idr_init(&ht->handle_idr); - ht->flags = flags; + ht->flags = userflags; - err = u32_replace_hw_hnode(tp, ht, flags, extack); + err = u32_replace_hw_hnode(tp, ht, userflags, extack); if (err) { idr_remove(&tp_c->handle_idr, handle); kfree(ht); @@ -1038,7 +1038,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; - n->flags = flags; + n->flags = userflags; err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) @@ -1060,7 +1060,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], ovr, + err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE], flags, extack); if (err == 0) { struct tc_u_knode __rcu **ins; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d9ac60ffe927..a8dd06c74e31 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -913,7 +913,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, /* seqlock has the same scope of busylock, for NOLOCK qdisc */ spin_lock_init(&sch->seqlock); - lockdep_set_class(&sch->busylock, + lockdep_set_class(&sch->seqlock, dev->qdisc_tx_busylock ?: &qdisc_tx_busylock); seqcount_init(&sch->running); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 07b30d0601d7..9c79374457a0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1739,8 +1739,6 @@ static void taprio_attach(struct Qdisc *sch) if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; old = dev_graft_qdisc(qdisc->dev_queue, qdisc); - if (ntx < dev->real_num_tx_queues) - qdisc_hash_add(qdisc, false); } else { old = dev_graft_qdisc(qdisc->dev_queue, sch); qdisc_refcount_inc(sch); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index fe74c5f95630..db6b7373d16c 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -857,14 +857,18 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength); cur_key->key = key; - if (replace) { - list_del_init(&shkey->key_list); - sctp_auth_shkey_release(shkey); - if (asoc && asoc->active_key_id == auth_key->sca_keynumber) - sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + if (!replace) { + list_add(&cur_key->key_list, sh_keys); + return 0; } + + list_del_init(&shkey->key_list); + sctp_auth_shkey_release(shkey); list_add(&cur_key->key_list, sh_keys); + if (asoc && asoc->active_key_id == auth_key->sca_keynumber) + sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL); + return 0; } diff --git a/net/sctp/input.c b/net/sctp/input.c index eb3c2a34a31c..5ef86fdb1176 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -1203,7 +1203,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( if (unlikely(!af)) return NULL; - if (af->from_addr_param(&paddr, param, peer_port, 0)) + if (!af->from_addr_param(&paddr, param, peer_port, 0)) return NULL; return __sctp_lookup_association(net, laddr, &paddr, transportp); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e48dd909dee5..470dbdc27d58 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -100,8 +100,9 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, list_for_each_entry_safe(addr, temp, &net->sctp.local_addr_list, list) { if (addr->a.sa.sa_family == AF_INET6 && - ipv6_addr_equal(&addr->a.v6.sin6_addr, - &ifa->addr)) { + ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr) && + addr->a.v6.sin6_scope_id == ifa->idev->dev->ifindex) { sctp_addr_wq_mgmt(net, addr, SCTP_ADDR_DEL); found = 1; addr->valid = 0; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 09a8f23ec709..32df65f68c12 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1109,12 +1109,12 @@ enum sctp_disposition sctp_sf_send_probe(struct net *net, if (!sctp_transport_pl_enabled(transport)) return SCTP_DISPOSITION_CONSUME; - sctp_transport_pl_send(transport); - - reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); - if (!reply) - return SCTP_DISPOSITION_NOMEM; - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + if (sctp_transport_pl_send(transport)) { + reply = sctp_make_heartbeat(asoc, transport, transport->pl.probe_size); + if (!reply) + return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(reply)); + } sctp_add_cmd_sf(commands, SCTP_CMD_PROBE_TIMER_UPDATE, SCTP_TRANSPORT(transport)); @@ -1274,8 +1274,7 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, !sctp_transport_pl_enabled(link)) return SCTP_DISPOSITION_DISCARD; - sctp_transport_pl_recv(link); - if (link->pl.state == SCTP_PL_COMPLETE) + if (sctp_transport_pl_recv(link)) return SCTP_DISPOSITION_CONSUME; return sctp_sf_send_probe(net, ep, asoc, type, link, commands); diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 397a6244dd97..a3d3ca6dd63d 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -258,16 +258,13 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) sctp_transport_pl_update(transport); } -void sctp_transport_pl_send(struct sctp_transport *t) +bool sctp_transport_pl_send(struct sctp_transport *t) { - pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", - __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); - - if (t->pl.probe_count < SCTP_MAX_PROBES) { - t->pl.probe_count++; - return; - } + if (t->pl.probe_count < SCTP_MAX_PROBES) + goto out; + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; + t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { if (t->pl.probe_size == SCTP_BASE_PLPMTU) { /* BASE_PLPMTU Confirmation Failed */ t->pl.state = SCTP_PL_ERROR; /* Base -> Error */ @@ -299,14 +296,27 @@ void sctp_transport_pl_send(struct sctp_transport *t) sctp_assoc_sync_pmtu(t->asoc); } } - t->pl.probe_count = 1; + +out: + if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count < 30 && + !t->pl.probe_count && t->pl.last_rtx_chunks == t->asoc->rtx_data_chunks) { + t->pl.raise_count++; + return false; + } + + pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", + __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + + t->pl.probe_count++; + return true; } -void sctp_transport_pl_recv(struct sctp_transport *t) +bool sctp_transport_pl_recv(struct sctp_transport *t) { pr_debug("%s: PLPMTUD: transport: %p, state: %d, pmtu: %d, size: %d, high: %d\n", __func__, t, t->pl.state, t->pl.pmtu, t->pl.probe_size, t->pl.probe_high); + t->pl.last_rtx_chunks = t->asoc->rtx_data_chunks; t->pl.pmtu = t->pl.probe_size; t->pl.probe_count = 0; if (t->pl.state == SCTP_PL_BASE) { @@ -323,7 +333,7 @@ void sctp_transport_pl_recv(struct sctp_transport *t) if (!t->pl.probe_high) { t->pl.probe_size = min(t->pl.probe_size + SCTP_PL_BIG_STEP, SCTP_MAX_PLPMTU); - return; + return false; } t->pl.probe_size += SCTP_PL_MIN_STEP; if (t->pl.probe_size >= t->pl.probe_high) { @@ -335,14 +345,13 @@ void sctp_transport_pl_recv(struct sctp_transport *t) t->pathmtu = t->pl.pmtu + sctp_transport_pl_hlen(t); sctp_assoc_sync_pmtu(t->asoc); } - } else if (t->pl.state == SCTP_PL_COMPLETE) { - t->pl.raise_count++; - if (t->pl.raise_count == 30) { - /* Raise probe_size again after 30 * interval in Search Complete */ - t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ - t->pl.probe_size += SCTP_PL_MIN_STEP; - } + } else if (t->pl.state == SCTP_PL_COMPLETE && t->pl.raise_count == 30) { + /* Raise probe_size again after 30 * interval in Search Complete */ + t->pl.state = SCTP_PL_SEARCH; /* Search Complete -> Search */ + t->pl.probe_size += SCTP_PL_MIN_STEP; } + + return t->pl.state == SCTP_PL_COMPLETE; } static bool sctp_transport_pl_toobig(struct sctp_transport *t, u32 pmtu) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index cd0d7c908b2a..edc8962364f3 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1752,21 +1752,30 @@ out: return rc; } -/* convert the RMB size into the compressed notation - minimum 16K. +#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ +#define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */ + +/* convert the RMB size into the compressed notation (minimum 16K, see + * SMCD/R_DMBE_SIZES. * In contrast to plain ilog2, this rounds towards the next power of 2, * so the socket application gets at least its desired sndbuf / rcvbuf size. */ -static u8 smc_compress_bufsize(int size) +static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) { + const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE; u8 compressed; if (size <= SMC_BUF_MIN_SIZE) return 0; - size = (size - 1) >> 14; - compressed = ilog2(size) + 1; - if (compressed >= SMC_RMBE_SIZES) - compressed = SMC_RMBE_SIZES - 1; + size = (size - 1) >> 14; /* convert to 16K multiple */ + compressed = min_t(u8, ilog2(size) + 1, + is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES); + + if (!is_smcd && is_rmb) + /* RMBs are backed by & limited to max size of scatterlists */ + compressed = min_t(u8, compressed, ilog2(max_scat >> 14)); + return compressed; } @@ -1982,17 +1991,12 @@ out: return rc; } -#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ - static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, bool is_dmb, int bufsize) { struct smc_buf_desc *buf_desc; int rc; - if (smc_compress_bufsize(bufsize) > SMCD_DMBE_SIZES) - return ERR_PTR(-EAGAIN); - /* try to alloc a new DMB */ buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); if (!buf_desc) @@ -2041,9 +2045,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) /* use socket send buffer size (w/o overhead) as start value */ sk_buf_size = smc->sk.sk_sndbuf / 2; - for (bufsize_short = smc_compress_bufsize(sk_buf_size); + for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { - if (is_rmb) { lock = &lgr->rmbs_lock; buf_list = &lgr->rmbs[bufsize_short]; @@ -2052,8 +2055,6 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) buf_list = &lgr->sndbufs[bufsize_short]; } bufsize = smc_uncompress_bufsize(bufsize_short); - if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) - continue; /* check for reusable slot in the link group */ buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list); diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 7d7ba0320d5a..a8845343d183 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -753,8 +753,7 @@ void smc_ib_ndev_change(struct net_device *ndev, unsigned long event) if (!libdev->ops.get_netdev) continue; lndev = libdev->ops.get_netdev(libdev, i + 1); - if (lndev) - dev_put(lndev); + dev_put(lndev); if (lndev != ndev) continue; if (event == NETDEV_REGISTER) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 6f6d33edb135..4a964e9190b0 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -394,8 +394,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, return 0; out_put: - if (ndev) - dev_put(ndev); + dev_put(ndev); return rc; } diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 0ae3478561f4..0b2c18efc079 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -809,3 +809,51 @@ int switchdev_handle_port_attr_set(struct net_device *dev, return err; } EXPORT_SYMBOL_GPL(switchdev_handle_port_attr_set); + +int switchdev_bridge_port_offload(struct net_device *brport_dev, + struct net_device *dev, const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb, + bool tx_fwd_offload, + struct netlink_ext_ack *extack) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .dev = dev, + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + .tx_fwd_offload = tx_fwd_offload, + }, + }; + int err; + + ASSERT_RTNL(); + + err = call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_OFFLOADED, + brport_dev, &brport_info.info, + extack); + return notifier_to_errno(err); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_offload); + +void switchdev_bridge_port_unoffload(struct net_device *brport_dev, + const void *ctx, + struct notifier_block *atomic_nb, + struct notifier_block *blocking_nb) +{ + struct switchdev_notifier_brport_info brport_info = { + .brport = { + .ctx = ctx, + .atomic_nb = atomic_nb, + .blocking_nb = blocking_nb, + }, + }; + + ASSERT_RTNL(); + + call_switchdev_blocking_notifiers(SWITCHDEV_BRPORT_UNOFFLOADED, + brport_dev, &brport_info.info, + NULL); +} +EXPORT_SYMBOL_GPL(switchdev_bridge_port_unoffload); diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index e5c43d4d5a75..c9391d38de85 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -898,16 +898,10 @@ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, if (unlikely(!aead)) return -ENOKEY; - /* Cow skb data if needed */ - if (likely(!skb_cloned(skb) && - (!skb_is_nonlinear(skb) || !skb_has_frag_list(skb)))) { - nsg = 1 + skb_shinfo(skb)->nr_frags; - } else { - nsg = skb_cow_data(skb, 0, &unused); - if (unlikely(nsg < 0)) { - pr_err("RX: skb_cow_data() returned %d\n", nsg); - return nsg; - } + nsg = skb_cow_data(skb, 0, &unused); + if (unlikely(nsg < 0)) { + pr_err("RX: skb_cow_data() returned %d\n", nsg); + return nsg; } /* Allocate memory for the AEAD operation */ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b0dd183a4dbc..b15b2b1b2f38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -158,6 +158,7 @@ static void tipc_sk_remove(struct tipc_sock *tsk); static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz); static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz); static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack); +static int tipc_wait_for_connect(struct socket *sock, long *timeo_p); static const struct proto_ops packet_ops; static const struct proto_ops stream_ops; @@ -1515,8 +1516,13 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = 0; } - if (unlikely(syn && !rc)) + if (unlikely(syn && !rc)) { tipc_set_sk_state(sk, TIPC_CONNECTING); + if (timeout) { + timeout = msecs_to_jiffies(timeout); + tipc_wait_for_connect(sock, &timeout); + } + } return rc ? rc : dlen; } @@ -1564,7 +1570,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) return -EMSGSIZE; /* Handle implicit connection setup */ - if (unlikely(dest)) { + if (unlikely(dest && sk->sk_state == TIPC_OPEN)) { rc = __tipc_sendmsg(sock, m, dlen); if (dlen && dlen == rc) { tsk->peer_caps = tipc_node_get_capabilities(net, dnode); @@ -2666,7 +2672,7 @@ static int tipc_listen(struct socket *sock, int len) static int tipc_wait_for_accept(struct socket *sock, long timeo) { struct sock *sk = sock->sk; - DEFINE_WAIT(wait); + DEFINE_WAIT_FUNC(wait, woken_wake_function); int err; /* True wake-one mechanism for incoming connections: only @@ -2675,12 +2681,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * anymore, the common case will execute the loop only once. */ for (;;) { - prepare_to_wait_exclusive(sk_sleep(sk), &wait, - TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { + add_wait_queue(sk_sleep(sk), &wait); release_sock(sk); - timeo = schedule_timeout(timeo); + timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock(sk); + remove_wait_queue(sk_sleep(sk), &wait); } err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) @@ -2692,7 +2698,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) if (signal_pending(current)) break; } - finish_wait(sk_sleep(sk), &wait); return err; } @@ -2709,9 +2714,10 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern) { struct sock *new_sk, *sk = sock->sk; - struct sk_buff *buf; struct tipc_sock *new_tsock; + struct msghdr m = {NULL,}; struct tipc_msg *msg; + struct sk_buff *buf; long timeo; int res; @@ -2757,19 +2763,17 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, } /* - * Respond to 'SYN-' by discarding it & returning 'ACK'-. - * Respond to 'SYN+' by queuing it on new socket. + * Respond to 'SYN-' by discarding it & returning 'ACK'. + * Respond to 'SYN+' by queuing it on new socket & returning 'ACK'. */ if (!msg_data_sz(msg)) { - struct msghdr m = {NULL,}; - tsk_advance_rx_queue(sk); - __tipc_sendstream(new_sock, &m, 0); } else { __skb_dequeue(&sk->sk_receive_queue); __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: release_sock(sk); diff --git a/net/unix/Kconfig b/net/unix/Kconfig index b6c4282899ec..b7f811216820 100644 --- a/net/unix/Kconfig +++ b/net/unix/Kconfig @@ -25,6 +25,11 @@ config UNIX_SCM depends on UNIX default y +config AF_UNIX_OOB + bool + depends on UNIX + default y + config UNIX_DIAG tristate "UNIX: socket monitoring interface" depends on UNIX diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 89927678c0dc..ec02e70a549b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -503,6 +503,12 @@ static void unix_sock_destructor(struct sock *sk) skb_queue_purge(&sk->sk_receive_queue); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (u->oob_skb) { + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } +#endif WARN_ON(refcount_read(&sk->sk_wmem_alloc)); WARN_ON(!sk_unhashed(sk)); WARN_ON(sk->sk_socket); @@ -1543,6 +1549,53 @@ out: return err; } +static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->fp = scm_fp_dup(UNIXCB(skb).fp); + + /* + * Garbage collection of unix sockets starts by selecting a set of + * candidate sockets which have reference only from being in flight + * (total_refs == inflight_refs). This condition is checked once during + * the candidate collection phase, and candidates are marked as such, so + * that non-candidates can later be ignored. While inflight_refs is + * protected by unix_gc_lock, total_refs (file count) is not, hence this + * is an instantaneous decision. + * + * Once a candidate, however, the socket must not be reinstalled into a + * file descriptor while the garbage collection is in progress. + * + * If the above conditions are met, then the directed graph of + * candidates (*) does not change while unix_gc_lock is held. + * + * Any operations that changes the file count through file descriptors + * (dup, close, sendmsg) does not change the graph since candidates are + * not installed in fds. + * + * Dequeing a candidate via recvmsg would install it into an fd, but + * that takes unix_gc_lock to decrement the inflight count, so it's + * serialized with garbage collection. + * + * MSG_PEEK is special in that it does not change the inflight count, + * yet does install the socket into an fd. The following lock/unlock + * pair is to ensure serialization with garbage collection. It must be + * done between incrementing the file count and installing the file into + * an fd. + * + * If garbage collection starts after the barrier provided by the + * lock/unlock, then it will see the elevated refcount and not mark this + * as a candidate. If a garbage collection is already in progress + * before the file count was incremented, then the lock/unlock pair will + * ensure that garbage collection is finished before progressing to + * installing the fd. + * + * (*) A -> B where B is on the queue of A or B is on the queue of C + * which is on the queue of listening socket A. + */ + spin_lock(&unix_gc_lock); + spin_unlock(&unix_gc_lock); +} + static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; @@ -1842,6 +1895,46 @@ out: */ #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768)) +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) +static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other) +{ + struct unix_sock *ousk = unix_sk(other); + struct sk_buff *skb; + int err = 0; + + skb = sock_alloc_send_skb(sock->sk, 1, msg->msg_flags & MSG_DONTWAIT, &err); + + if (!skb) + return err; + + skb_put(skb, 1); + skb->len = 1; + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, 1); + + if (err) { + kfree_skb(skb); + return err; + } + + unix_state_lock(other); + maybe_add_creds(skb, sock, other); + skb_get(skb); + + if (ousk->oob_skb) + kfree_skb(ousk->oob_skb); + + ousk->oob_skb = skb; + + scm_stat_add(other, skb); + skb_queue_tail(&other->sk_receive_queue, skb); + sk_send_sigurg(other); + unix_state_unlock(other); + other->sk_data_ready(other); + + return err; +} +#endif + static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { @@ -1860,8 +1953,14 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, return err; err = -EOPNOTSUPP; - if (msg->msg_flags&MSG_OOB) - goto out_err; + if (msg->msg_flags & MSG_OOB) { +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (len) + len--; + else +#endif + goto out_err; + } if (msg->msg_namelen) { err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; @@ -1926,6 +2025,15 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, sent += size; } +#if (IS_ENABLED(CONFIG_AF_UNIX_OOB)) + if (msg->msg_flags & MSG_OOB) { + err = queue_oob(sock, msg, other); + if (err) + goto out_err; + sent++; + } +#endif + scm_destroy(&scm); return sent; @@ -2192,7 +2300,7 @@ int __unix_dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t size, sk_peek_offset_fwd(sk, size); if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); } err = (flags & MSG_TRUNC) ? skb->len - skip : size; @@ -2311,6 +2419,59 @@ struct unix_stream_read_state { unsigned int splice_flags; }; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +static int unix_stream_recv_urg(struct unix_stream_read_state *state) +{ + struct socket *sock = state->socket; + struct sock *sk = sock->sk; + struct unix_sock *u = unix_sk(sk); + int chunk = 1; + + if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) + return -EINVAL; + + chunk = state->recv_actor(u->oob_skb, 0, chunk, state); + if (chunk < 0) + return -EFAULT; + + if (!(state->flags & MSG_PEEK)) { + UNIXCB(u->oob_skb).consumed += 1; + kfree_skb(u->oob_skb); + u->oob_skb = NULL; + } + state->msg->msg_flags |= MSG_OOB; + return 1; +} + +static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, + int flags, int copied) +{ + struct unix_sock *u = unix_sk(sk); + + if (!unix_skb_len(skb) && !(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = NULL; + } else { + if (skb == u->oob_skb) { + if (copied) { + skb = NULL; + } else if (sock_flag(sk, SOCK_URGINLINE)) { + if (!(flags & MSG_PEEK)) { + u->oob_skb = NULL; + consume_skb(skb); + } + } else if (!(flags & MSG_PEEK)) { + skb_unlink(skb, &sk->sk_receive_queue); + consume_skb(skb); + skb = skb_peek(&sk->sk_receive_queue); + } + } + } + return skb; +} +#endif + static int unix_stream_read_generic(struct unix_stream_read_state *state, bool freezable) { @@ -2336,6 +2497,15 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, if (unlikely(flags & MSG_OOB)) { err = -EOPNOTSUPP; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + mutex_lock(&u->iolock); + unix_state_lock(sk); + + err = unix_stream_recv_urg(state); + + unix_state_unlock(sk); + mutex_unlock(&u->iolock); +#endif goto out; } @@ -2364,6 +2534,18 @@ redo: } last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; + +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + if (skb) { + skb = manage_oob(skb, sk, flags, copied); + if (!skb) { + unix_state_unlock(sk); + if (copied) + break; + goto redo; + } + } +#endif again: if (skb == NULL) { if (copied >= target) @@ -2482,7 +2664,7 @@ unlock: /* It is questionable, see note in unix_dgram_recvmsg. */ if (UNIXCB(skb).fp) - scm.fp = scm_fp_dup(UNIXCB(skb).fp); + unix_peek_fds(&scm, skb); sk_peek_offset_fwd(sk, chunk); @@ -2699,6 +2881,20 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCUNIXFILE: err = unix_open_file(sk); break; +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) + case SIOCATMARK: + { + struct sk_buff *skb; + struct unix_sock *u = unix_sk(sk); + int answ = 0; + + skb = skb_peek(&sk->sk_receive_queue); + if (skb && skb == u->oob_skb) + answ = 1; + err = put_user(answ, (int __user *)arg); + } + break; +#endif default: err = -ENOIOCTLCMD; break; diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index db0cda29fb2f..20f53575b5c9 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -44,7 +44,7 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, { struct unix_sock *u = unix_sk(sk); struct sk_psock *psock; - int copied, ret; + int copied; psock = sk_psock_get(sk); if (unlikely(!psock)) @@ -53,8 +53,9 @@ static int unix_dgram_bpf_recvmsg(struct sock *sk, struct msghdr *msg, mutex_lock(&u->iolock); if (!skb_queue_empty(&sk->sk_receive_queue) && sk_psock_queue_empty(psock)) { - ret = __unix_dgram_recvmsg(sk, msg, len, flags); - goto out; + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return __unix_dgram_recvmsg(sk, msg, len, flags); } msg_bytes_ready: @@ -68,16 +69,15 @@ msg_bytes_ready: if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; - ret = __unix_dgram_recvmsg(sk, msg, len, flags); - goto out; + mutex_unlock(&u->iolock); + sk_psock_put(sk, psock); + return __unix_dgram_recvmsg(sk, msg, len, flags); } copied = -EAGAIN; } - ret = copied; -out: mutex_unlock(&u->iolock); sk_psock_put(sk, psock); - return ret; + return copied; } static struct proto *unix_prot_saved __read_mostly; @@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops) int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { + if (sk->sk_type != SOCK_DGRAM) + return -EOPNOTSUPP; + if (restore) { sk->sk_write_space = psock->saved_write_space; WRITE_ONCE(sk->sk_prot, psock->sk_proto); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 169ba8b72a63..081e7ae93cb1 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1079,6 +1079,9 @@ virtio_transport_recv_connected(struct sock *sk, virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; + case VIRTIO_VSOCK_OP_CREDIT_REQUEST: + virtio_transport_send_credit_update(vsk); + break; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: sk->sk_write_space(sk); break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 50eb405b0690..dceed5b5b226 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2351,7 +2351,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, goto nla_put_failure; for (band = state->band_start; - band < NUM_NL80211_BANDS; band++) { + band < (state->split ? + NUM_NL80211_BANDS : + NL80211_BAND_60GHZ + 1); + band++) { struct ieee80211_supported_band *sband; /* omit higher bands for ancient software */ @@ -6524,8 +6527,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) err = rdev_change_station(rdev, dev, mac_addr, ¶ms); out_put_vlan: - if (params.vlan) - dev_put(params.vlan); + dev_put(params.vlan); return err; } @@ -6760,8 +6762,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) err = rdev_add_station(rdev, dev, mac_addr, ¶ms); - if (params.vlan) - dev_put(params.vlan); + dev_put(params.vlan); return err; } @@ -8486,8 +8487,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out_free; nl80211_send_scan_start(rdev, wdev); - if (wdev->netdev) - dev_hold(wdev->netdev); + dev_hold(wdev->netdev); return 0; @@ -14857,9 +14857,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, return -ENETDOWN; } - if (dev) - dev_hold(dev); - + dev_hold(dev); info->user_ptr[0] = rdev; } @@ -14881,8 +14879,7 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, if (ops->internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; - if (wdev->netdev) - dev_put(wdev->netdev); + dev_put(wdev->netdev); } else { dev_put(info->user_ptr[1]); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index f03c7ac8e184..11c68b159324 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -975,8 +975,7 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, } #endif - if (wdev->netdev) - dev_put(wdev->netdev); + dev_put(wdev->netdev); kfree(rdev->int_scan_req); rdev->int_scan_req = NULL; @@ -1754,16 +1753,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, * be grouped with this beacon for updates ... */ if (!cfg80211_combine_bsses(rdev, new)) { - kfree(new); + bss_ref_put(rdev, new); goto drop; } } if (rdev->bss_entries >= bss_entries_limit && !cfg80211_bss_expire_oldest(rdev)) { - if (!list_empty(&new->hidden_list)) - list_del(&new->hidden_list); - kfree(new); + bss_ref_put(rdev, new); goto drop; } diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index a20aec9d7393..2bf269390163 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -298,8 +298,16 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src) len = nlmsg_attrlen(nlh_src, xfrm_msg_min[type]); nla_for_each_attr(nla, attrs, len, remaining) { - int err = xfrm_xlate64_attr(dst, nla); + int err; + switch (type) { + case XFRM_MSG_NEWSPDINFO: + err = xfrm_nla_cpy(dst, nla, nla_len(nla)); + break; + default: + err = xfrm_xlate64_attr(dst, nla); + break; + } if (err) return err; } @@ -341,7 +349,8 @@ static int xfrm_alloc_compat(struct sk_buff *skb, const struct nlmsghdr *nlh_src /* Calculates len of translated 64-bit message. */ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, - struct nlattr *attrs[XFRMA_MAX+1]) + struct nlattr *attrs[XFRMA_MAX + 1], + int maxtype) { size_t len = nlmsg_len(src); @@ -358,10 +367,20 @@ static size_t xfrm_user_rcv_calculate_len64(const struct nlmsghdr *src, case XFRM_MSG_POLEXPIRE: len += 8; break; + case XFRM_MSG_NEWSPDINFO: + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + return len; default: break; } + /* Unexpected for anything, but XFRM_MSG_NEWSPDINFO, please + * correct both 64=>32-bit and 32=>64-bit translators to copy + * new attributes. + */ + if (WARN_ON_ONCE(maxtype)) + return len; + if (attrs[XFRMA_SA]) len += 4; if (attrs[XFRMA_POLICY]) @@ -440,7 +459,8 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, struct nlattr *attrs[XFRMA_MAX+1], - size_t size, u8 type, struct netlink_ext_ack *extack) + size_t size, u8 type, int maxtype, + struct netlink_ext_ack *extack) { size_t pos; int i; @@ -520,6 +540,25 @@ static int xfrm_xlate32(struct nlmsghdr *dst, const struct nlmsghdr *src, } pos = dst->nlmsg_len; + if (maxtype) { + /* attirbutes are xfrm_spdattr_type_t, not xfrm_attr_type_t */ + WARN_ON_ONCE(src->nlmsg_type != XFRM_MSG_NEWSPDINFO); + + for (i = 1; i <= maxtype; i++) { + int err; + + if (!attrs[i]) + continue; + + /* just copy - no need for translation */ + err = xfrm_attr_cpy32(dst, &pos, attrs[i], size, + nla_len(attrs[i]), nla_len(attrs[i])); + if (err) + return err; + } + return 0; + } + for (i = 1; i < XFRMA_MAX + 1; i++) { int err; @@ -564,7 +603,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (err < 0) return ERR_PTR(err); - len = xfrm_user_rcv_calculate_len64(h32, attrs); + len = xfrm_user_rcv_calculate_len64(h32, attrs, maxtype); /* The message doesn't need translation */ if (len == nlmsg_len(h32)) return NULL; @@ -574,7 +613,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, if (!h64) return ERR_PTR(-ENOMEM); - err = xfrm_xlate32(h64, h32, attrs, len, type, extack); + err = xfrm_xlate32(h64, h32, attrs, len, type, maxtype, extack); if (err < 0) { kvfree(h64); return ERR_PTR(err); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 2e8afe078d61..cb40ff0ff28d 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -241,7 +241,7 @@ static void ipcomp_free_tfms(struct crypto_comp * __percpu *tfms) break; } - WARN_ON(!pos); + WARN_ON(list_entry_is_head(pos, &ipcomp_tfms_list, list)); if (--pos->users) return; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 827d84255021..7f881f5a5897 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -155,7 +155,6 @@ static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; -static __read_mostly seqcount_mutex_t xfrm_policy_hash_generation; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; @@ -585,7 +584,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); @@ -596,7 +595,7 @@ static void xfrm_bydst_resize(struct net *net, int dir) rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); @@ -1245,7 +1244,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); - write_seqcount_begin(&xfrm_policy_hash_generation); + write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. @@ -1354,7 +1353,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) out_unlock: __xfrm_policy_inexact_flush(net); - write_seqcount_end(&xfrm_policy_hash_generation); + write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); @@ -2091,15 +2090,12 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, if (unlikely(!daddr || !saddr)) return NULL; - retry: - sequence = read_seqcount_begin(&xfrm_policy_hash_generation); rcu_read_lock(); - - chain = policy_hash_direct(net, daddr, saddr, family, dir); - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); - goto retry; - } + retry: + do { + sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { @@ -2130,15 +2126,11 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } skip_inexact: - if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) { - rcu_read_unlock(); + if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; - } - if (ret && !xfrm_pol_hold_rcu(ret)) { - rcu_read_unlock(); + if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; - } fail: rcu_read_unlock(); @@ -4089,6 +4081,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); + seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); rv = xfrm_statistics_init(net); @@ -4133,7 +4126,6 @@ void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); - seqcount_mutex_init(&xfrm_policy_hash_generation, &hash_resize_mutex); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b47d613409b7..7aff641c717d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2811,6 +2811,16 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, err = link->doit(skb, nlh, attrs); + /* We need to free skb allocated in xfrm_alloc_compat() before + * returning from this function, because consume_skb() won't take + * care of frag_list since netlink destructor sets + * sbk->head to NULL. (see netlink_skb_destructor()) + */ + if (skb_has_frag_list(skb)) { + kfree_skb(skb_shinfo(skb)->frag_list); + skb_shinfo(skb)->frag_list = NULL; + } + err: kvfree(nlh64); return err; diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore index 0b9548ea8477..fcba217f0ae2 100644 --- a/samples/bpf/.gitignore +++ b/samples/bpf/.gitignore @@ -45,11 +45,13 @@ xdp_monitor xdp_redirect xdp_redirect_cpu xdp_redirect_map +xdp_redirect_map_multi xdp_router_ipv4 xdp_rxq_info xdp_sample_pkts xdp_tx_iptunnel xdpsock +xdpsock_ctrl_proc xsk_fwd testfile.img hbm_out.log diff --git a/samples/bpf/test_override_return.sh b/samples/bpf/test_override_return.sh index e68b9ee6814b..35db26f736b9 100755 --- a/samples/bpf/test_override_return.sh +++ b/samples/bpf/test_override_return.sh @@ -1,5 +1,6 @@ #!/bin/bash +rm -r tmpmnt rm -f testfile.img dd if=/dev/zero of=testfile.img bs=1M seek=1000 count=1 DEVICE=$(losetup --show -f testfile.img) diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c index fdcd6580dd73..8be7ce18d3ba 100644 --- a/samples/bpf/tracex7_user.c +++ b/samples/bpf/tracex7_user.c @@ -14,6 +14,11 @@ int main(int argc, char **argv) int ret = 0; FILE *f; + if (!argv[1]) { + fprintf(stderr, "ERROR: Run with the btrfs device argument!\n"); + return 0; + } + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); obj = bpf_object__open_file(filename, NULL); if (libbpf_get_error(obj)) { diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index 34b64394ed9c..f0c5d95084de 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index c787f4b49646..d8a64ab077b0 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index d3ecdc18b9c1..9e225c96b77e 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -841,7 +841,7 @@ int main(int argc, char **argv) memset(cpu, 0, n_cpus * sizeof(int)); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n", long_options, &longindex)) != -1) { switch (opt) { case 'd': diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 33d0bdebbed8..49d7a6ad7e39 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2018 Intel Corporation. */ -#include <asm/barrier.h> #include <errno.h> #include <getopt.h> #include <libgen.h> #include <linux/bpf.h> -#include <linux/compiler.h> #include <linux/if_link.h> #include <linux/if_xdp.h> #include <linux/if_ether.h> @@ -653,17 +651,15 @@ out: return result; } -__sum16 ip_fast_csum(const void *iph, unsigned int ihl); - /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * This function code has been taken from * Linux kernel lib/checksum.c */ -__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - return (__force __sum16)~do_csum(iph, ihl * 4); + return (__sum16)~do_csum(iph, ihl * 4); } /* @@ -673,11 +669,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ static inline __sum16 csum_fold(__wsum csum) { - u32 sum = (__force u32)csum; + u32 sum = (u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; + return (__sum16)~sum; } /* @@ -703,16 +699,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { - unsigned long long s = (__force u32)sum; + unsigned long long s = (u32)sum; - s += (__force u32)saddr; - s += (__force u32)daddr; + s += (u32)saddr; + s += (u32)daddr; #ifdef __BIG_ENDIAN__ s += proto + len; #else s += (proto + len) << 8; #endif - return (__force __wsum)from64to32(s); + return (__wsum)from64to32(s); } /* diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index c17e48020ec3..8f6b13ae46bf 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -173,39 +173,6 @@ my $mcount_regex; # Find the call site to mcount (return offset) my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command -my $can_use_local = 0; # If we can use local function references - -# Shut up recordmcount if user has older objcopy -my $quiet_recordmcount = ".tmp_quiet_recordmcount"; -my $print_warning = 1; -$print_warning = 0 if ( -f $quiet_recordmcount); - -## -# check_objcopy - whether objcopy supports --globalize-symbols -# -# --globalize-symbols came out in 2.17, we must test the version -# of objcopy, and if it is less than 2.17, then we can not -# record local functions. -sub check_objcopy -{ - open (IN, "$objcopy --version |") or die "error running $objcopy"; - while (<IN>) { - if (/objcopy.*\s(\d+)\.(\d+)/) { - $can_use_local = 1 if ($1 > 2 || ($1 == 2 && $2 >= 17)); - last; - } - } - close (IN); - - if (!$can_use_local && $print_warning) { - print STDERR "WARNING: could not find objcopy version or version " . - "is less than 2.17.\n" . - "\tLocal function references are disabled.\n"; - open (QUIET, ">$quiet_recordmcount"); - printf QUIET "Disables the warning from recordmcount.pl\n"; - close QUIET; - } -} if ($arch =~ /(x86(_64)?)|(i386)/) { if ($bits == 64) { @@ -434,8 +401,6 @@ if ($filename =~ m,^(.*)(\.\S),) { my $mcount_s = $dirname . "/.tmp_mc_" . $prefix . ".s"; my $mcount_o = $dirname . "/.tmp_mc_" . $prefix . ".o"; -check_objcopy(); - # # Step 1: find all the local (static functions) and weak symbols. # 't' is local, 'w/W' is weak @@ -473,11 +438,6 @@ sub update_funcs # is this function static? If so, note this fact. if (defined $locals{$ref_func}) { - - # only use locals if objcopy supports globalize-symbols - if (!$can_use_local) { - return; - } $convert{$ref_func} = 1; } diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 74f8aadfd4cb..7011fbe003ff 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -17,7 +17,7 @@ Usage: $ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func Wait some times but not too much, the script is a bit slow. Break the pipe (Ctrl + Z) - $ scripts/draw_functrace.py < raw_trace_func > draw_functrace + $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace Then you have your drawn trace in draw_functrace """ @@ -103,10 +103,10 @@ def parseLine(line): line = line.strip() if line.startswith("#"): raise CommentLineException - m = re.match("[^]]+?\\] +([0-9.]+): (\\w+) <-(\\w+)", line) + m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line) if m is None: raise BrokenLineException - return (m.group(1), m.group(2), m.group(3)) + return (m.group(2), m.group(3), m.group(4)) def main(): diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index defc5ef35c66..0ae1b718194a 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -874,7 +874,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) rc = sidtab_init(s); if (rc) { pr_err("SELinux: out of memory on SID table init\n"); - goto out; + return rc; } head = p->ocontexts[OCON_ISID]; @@ -885,7 +885,7 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) if (sid == SECSID_NULL) { pr_err("SELinux: SID 0 was assigned a context.\n"); sidtab_destroy(s); - goto out; + return -EINVAL; } /* Ignore initial SIDs unused by this kernel. */ @@ -897,12 +897,10 @@ int policydb_load_isids(struct policydb *p, struct sidtab *s) pr_err("SELinux: unable to load initial SID %s.\n", name); sidtab_destroy(s); - goto out; + return rc; } } - rc = 0; -out: - return rc; + return 0; } int policydb_class_isvalid(struct policydb *p, unsigned int class) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 14e32825c339..6a2971a7e6a1 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -246,12 +246,18 @@ static bool hw_support_mmap(struct snd_pcm_substream *substream) if (!(substream->runtime->hw.info & SNDRV_PCM_INFO_MMAP)) return false; - if (substream->ops->mmap || - (substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV && - substream->dma_buffer.dev.type != SNDRV_DMA_TYPE_DEV_UC)) + if (substream->ops->mmap) return true; - return dma_can_mmap(substream->dma_buffer.dev.dev); + switch (substream->dma_buffer.dev.type) { + case SNDRV_DMA_TYPE_UNKNOWN: + return false; + case SNDRV_DMA_TYPE_CONTINUOUS: + case SNDRV_DMA_TYPE_VMALLOC: + return true; + default: + return dma_can_mmap(substream->dma_buffer.dev.dev); + } } static int constrain_mask_params(struct snd_pcm_substream *substream, @@ -3063,9 +3069,14 @@ static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream, boundary = 0x7fffffff; snd_pcm_stream_lock_irq(substream); /* FIXME: we should consider the boundary for the sync from app */ - if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) - control->appl_ptr = scontrol.appl_ptr; - else + if (!(sflags & SNDRV_PCM_SYNC_PTR_APPL)) { + err = pcm_lib_apply_appl_ptr(substream, + scontrol.appl_ptr); + if (err < 0) { + snd_pcm_stream_unlock_irq(substream); + return err; + } + } else scontrol.appl_ptr = control->appl_ptr % boundary; if (!(sflags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) control->avail_min = scontrol.avail_min; @@ -3664,6 +3675,8 @@ static vm_fault_t snd_pcm_mmap_data_fault(struct vm_fault *vmf) return VM_FAULT_SIGBUS; if (substream->ops->page) page = substream->ops->page(substream, offset); + else if (!snd_pcm_get_dma_buf(substream)) + page = virt_to_page(runtime->dma_area + offset); else page = snd_sgbuf_get_page(snd_pcm_get_dma_buf(substream), offset); if (!page) diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index d8be146793ee..c9d0ba353463 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -319,6 +319,10 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, .device = 0x4b55, }, + { + .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC, + .device = 0x4b58, + }, #endif /* Alder Lake */ diff --git a/sound/isa/sb/sb16_csp.c b/sound/isa/sb/sb16_csp.c index 5bbe6695689d..7ad8c5f7b664 100644 --- a/sound/isa/sb/sb16_csp.c +++ b/sound/isa/sb/sb16_csp.c @@ -816,6 +816,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7); + spin_unlock_irqrestore(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); set_mode_register(p->chip, 0xc0); /* c0 = STOP */ @@ -855,6 +856,7 @@ static int snd_sb_csp_start(struct snd_sb_csp * p, int sample_width, int channel spin_unlock(&p->chip->reg_lock); /* restore PCM volume */ + spin_lock_irqsave(&p->chip->mixer_lock, flags); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR); spin_unlock_irqrestore(&p->chip->mixer_lock, flags); @@ -880,6 +882,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p) mixR = snd_sbmixer_read(p->chip, SB_DSP4_PCM_DEV + 1); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL & 0x7); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR & 0x7); + spin_unlock_irqrestore(&p->chip->mixer_lock, flags); spin_lock(&p->chip->reg_lock); if (p->running & SNDRV_SB_CSP_ST_QSOUND) { @@ -894,6 +897,7 @@ static int snd_sb_csp_stop(struct snd_sb_csp * p) spin_unlock(&p->chip->reg_lock); /* restore PCM volume */ + spin_lock_irqsave(&p->chip->mixer_lock, flags); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV, mixL); snd_sbmixer_write(p->chip, SB_DSP4_PCM_DEV + 1, mixR); spin_unlock_irqrestore(&p->chip->mixer_lock, flags); diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 4b2cc8cb55c4..e143e69d8184 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1940,6 +1940,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) static const struct snd_pci_quirk force_connect_list[] = { SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), + SND_PCI_QUIRK(0x1462, 0xec94, "MS-7C94", 1), + SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", 1), {} }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1389cfd5e0db..caaf0e8aac11 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -8626,6 +8626,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3151, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c index 84e3906abd4f..9449fb40a956 100644 --- a/sound/soc/amd/acp-da7219-max98357a.c +++ b/sound/soc/amd/acp-da7219-max98357a.c @@ -576,6 +576,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { | SND_SOC_DAIFMT_CBM_CFM, .init = cz_rt5682_init, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_play_ops, SND_SOC_DAILINK_REG(designware1, rt5682, platform), }, @@ -585,6 +586,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_cap_ops, SND_SOC_DAILINK_REG(designware2, rt5682, platform), }, @@ -594,6 +596,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_playback = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_max_play_ops, SND_SOC_DAILINK_REG(designware3, mx, platform), }, @@ -604,6 +607,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_dmic0_cap_ops, SND_SOC_DAILINK_REG(designware3, adau, platform), }, @@ -614,6 +618,7 @@ static struct snd_soc_dai_link cz_dai_5682_98357[] = { .dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBM_CFM, .dpcm_capture = 1, + .stop_dma_first = 1, .ops = &cz_rt5682_dmic1_cap_ops, SND_SOC_DAILINK_REG(designware2, adau, platform), }, diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig index 7ebae3f09435..a3b784ed4f70 100644 --- a/sound/soc/codecs/Kconfig +++ b/sound/soc/codecs/Kconfig @@ -1325,7 +1325,7 @@ config SND_SOC_SSM2305 high-efficiency mono Class-D audio power amplifiers. config SND_SOC_SSM2518 - tristate + tristate "Analog Devices SSM2518 Class-D Amplifier" depends on I2C config SND_SOC_SSM2602 @@ -1557,6 +1557,7 @@ config SND_SOC_WCD934X Qualcomm SoCs like SDM845. config SND_SOC_WCD938X + depends on SND_SOC_WCD938X_SDW tristate config SND_SOC_WCD938X_SDW @@ -1813,11 +1814,6 @@ config SND_SOC_ZL38060 which consists of a Digital Signal Processor (DSP), several Digital Audio Interfaces (DAIs), analog outputs, and a block of 14 GPIOs. -config SND_SOC_ZX_AUD96P22 - tristate "ZTE ZX AUD96P22 CODEC" - depends on I2C - select REGMAP_I2C - # Amp config SND_SOC_LM4857 tristate diff --git a/sound/soc/codecs/rt5631.c b/sound/soc/codecs/rt5631.c index 3000bc128b5b..38356ea2bd6e 100644 --- a/sound/soc/codecs/rt5631.c +++ b/sound/soc/codecs/rt5631.c @@ -1695,6 +1695,8 @@ static const struct regmap_config rt5631_regmap_config = { .reg_defaults = rt5631_reg, .num_reg_defaults = ARRAY_SIZE(rt5631_reg), .cache_type = REGCACHE_RBTREE, + .use_single_read = true, + .use_single_write = true, }; static int rt5631_i2c_probe(struct i2c_client *i2c, diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index e4c91571abae..abcd6f483788 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -973,10 +973,14 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, RT5682_TRIG_JD_MASK, RT5682_TRIG_JD_LOW); - if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS")) + if (!snd_soc_dapm_get_pin_status(dapm, "MICBIAS") && + !snd_soc_dapm_get_pin_status(dapm, "PLL1") && + !snd_soc_dapm_get_pin_status(dapm, "PLL2B")) snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_MB, 0); - if (!snd_soc_dapm_get_pin_status(dapm, "Vref2")) + if (!snd_soc_dapm_get_pin_status(dapm, "Vref2") && + !snd_soc_dapm_get_pin_status(dapm, "PLL1") && + !snd_soc_dapm_get_pin_status(dapm, "PLL2B")) snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2, 0); snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3, diff --git a/sound/soc/codecs/tlv320aic31xx.c b/sound/soc/codecs/tlv320aic31xx.c index 51870d50f419..b504d63385b3 100644 --- a/sound/soc/codecs/tlv320aic31xx.c +++ b/sound/soc/codecs/tlv320aic31xx.c @@ -1604,6 +1604,8 @@ static int aic31xx_i2c_probe(struct i2c_client *i2c, ret); return ret; } + regcache_cache_only(aic31xx->regmap, true); + aic31xx->dev = &i2c->dev; aic31xx->irq = i2c->irq; diff --git a/sound/soc/codecs/tlv320aic31xx.h b/sound/soc/codecs/tlv320aic31xx.h index 81952984613d..2513922a0292 100644 --- a/sound/soc/codecs/tlv320aic31xx.h +++ b/sound/soc/codecs/tlv320aic31xx.h @@ -151,8 +151,8 @@ struct aic31xx_pdata { #define AIC31XX_WORD_LEN_24BITS 0x02 #define AIC31XX_WORD_LEN_32BITS 0x03 #define AIC31XX_IFACE1_MASTER_MASK GENMASK(3, 2) -#define AIC31XX_BCLK_MASTER BIT(2) -#define AIC31XX_WCLK_MASTER BIT(3) +#define AIC31XX_BCLK_MASTER BIT(3) +#define AIC31XX_WCLK_MASTER BIT(2) /* AIC31XX_DATA_OFFSET */ #define AIC31XX_DATA_OFFSET_MASK GENMASK(7, 0) diff --git a/sound/soc/codecs/tlv320aic32x4.c b/sound/soc/codecs/tlv320aic32x4.c index c63b717040ed..dcd8aeb45cb3 100644 --- a/sound/soc/codecs/tlv320aic32x4.c +++ b/sound/soc/codecs/tlv320aic32x4.c @@ -250,8 +250,8 @@ static DECLARE_TLV_DB_SCALE(tlv_pcm, -6350, 50, 0); static DECLARE_TLV_DB_SCALE(tlv_driver_gain, -600, 100, 0); /* -12dB min, 0.5dB steps */ static DECLARE_TLV_DB_SCALE(tlv_adc_vol, -1200, 50, 0); - -static DECLARE_TLV_DB_LINEAR(tlv_spk_vol, TLV_DB_GAIN_MUTE, 0); +/* -6dB min, 1dB steps */ +static DECLARE_TLV_DB_SCALE(tlv_tas_driver_gain, -5850, 50, 0); static DECLARE_TLV_DB_SCALE(tlv_amp_vol, 0, 600, 1); static const char * const lo_cm_text[] = { @@ -1063,21 +1063,20 @@ static const struct snd_soc_component_driver soc_component_dev_aic32x4 = { }; static const struct snd_kcontrol_new aic32x4_tas2505_snd_controls[] = { - SOC_DOUBLE_R_S_TLV("PCM Playback Volume", AIC32X4_LDACVOL, - AIC32X4_LDACVOL, 0, -0x7f, 0x30, 7, 0, tlv_pcm), + SOC_SINGLE_S8_TLV("PCM Playback Volume", + AIC32X4_LDACVOL, -0x7f, 0x30, tlv_pcm), SOC_ENUM("DAC Playback PowerTune Switch", l_ptm_enum), - SOC_DOUBLE_R_S_TLV("HP Driver Playback Volume", AIC32X4_HPLGAIN, - AIC32X4_HPLGAIN, 0, -0x6, 0x1d, 5, 0, - tlv_driver_gain), - SOC_DOUBLE_R("HP DAC Playback Switch", AIC32X4_HPLGAIN, - AIC32X4_HPLGAIN, 6, 0x01, 1), - SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0), + SOC_SINGLE_TLV("HP Driver Gain Volume", + AIC32X4_HPLGAIN, 0, 0x74, 1, tlv_tas_driver_gain), + SOC_SINGLE("HP DAC Playback Switch", AIC32X4_HPLGAIN, 6, 1, 1), - SOC_SINGLE_RANGE_TLV("Speaker Driver Playback Volume", TAS2505_SPKVOL1, - 0, 0, 117, 1, tlv_spk_vol), - SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", TAS2505_SPKVOL2, - 4, 5, 0, tlv_amp_vol), + SOC_SINGLE_TLV("Speaker Driver Playback Volume", + TAS2505_SPKVOL1, 0, 0x74, 1, tlv_tas_driver_gain), + SOC_SINGLE_TLV("Speaker Amplifier Playback Volume", + TAS2505_SPKVOL2, 4, 5, 0, tlv_amp_vol), + + SOC_SINGLE("Auto-mute Switch", AIC32X4_DACMUTE, 4, 7, 0), }; static const struct snd_kcontrol_new hp_output_mixer_controls[] = { diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index 78b76eceff8f..2fcc97370be2 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -3317,13 +3317,6 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component) (WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0); } - ret = wcd938x_irq_init(wcd938x, component->dev); - if (ret) { - dev_err(component->dev, "%s: IRQ init failed: %d\n", - __func__, ret); - return ret; - } - wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip, WCD938X_IRQ_HPHR_PDM_WD_INT); wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip, @@ -3553,7 +3546,6 @@ static int wcd938x_bind(struct device *dev) } wcd938x->sdw_priv[AIF1_PB] = dev_get_drvdata(wcd938x->rxdev); wcd938x->sdw_priv[AIF1_PB]->wcd938x = wcd938x; - wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; wcd938x->txdev = wcd938x_sdw_device_get(wcd938x->txnode); if (!wcd938x->txdev) { @@ -3562,7 +3554,6 @@ static int wcd938x_bind(struct device *dev) } wcd938x->sdw_priv[AIF1_CAP] = dev_get_drvdata(wcd938x->txdev); wcd938x->sdw_priv[AIF1_CAP]->wcd938x = wcd938x; - wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq; wcd938x->tx_sdw_dev = dev_to_sdw_dev(wcd938x->txdev); if (!wcd938x->tx_sdw_dev) { dev_err(dev, "could not get txslave with matching of dev\n"); @@ -3595,6 +3586,15 @@ static int wcd938x_bind(struct device *dev) return PTR_ERR(wcd938x->regmap); } + ret = wcd938x_irq_init(wcd938x, dev); + if (ret) { + dev_err(dev, "%s: IRQ init failed: %d\n", __func__, ret); + return ret; + } + + wcd938x->sdw_priv[AIF1_PB]->slave_irq = wcd938x->virq; + wcd938x->sdw_priv[AIF1_CAP]->slave_irq = wcd938x->virq; + ret = wcd938x_set_micbias_data(wcd938x); if (ret < 0) { dev_err(dev, "%s: bad micbias pdata\n", __func__); diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 37aa020f23f6..549d98241dae 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -282,6 +282,7 @@ /* * HALO_CCM_CORE_CONTROL */ +#define HALO_CORE_RESET 0x00000200 #define HALO_CORE_EN 0x00000001 /* @@ -1213,7 +1214,7 @@ static int wm_coeff_tlv_get(struct snd_kcontrol *kctl, mutex_lock(&ctl->dsp->pwr_lock); - ret = wm_coeff_read_ctrl_raw(ctl, ctl->cache, size); + ret = wm_coeff_read_ctrl(ctl, ctl->cache, size); if (!ret && copy_to_user(bytes, ctl->cache, size)) ret = -EFAULT; @@ -3333,7 +3334,8 @@ static int wm_halo_start_core(struct wm_adsp *dsp) { return regmap_update_bits(dsp->regmap, dsp->base + HALO_CCM_CORE_CONTROL, - HALO_CORE_EN, HALO_CORE_EN); + HALO_CORE_RESET | HALO_CORE_EN, + HALO_CORE_RESET | HALO_CORE_EN); } static void wm_halo_stop_core(struct wm_adsp *dsp) diff --git a/sound/soc/intel/boards/sof_sdw_max98373.c b/sound/soc/intel/boards/sof_sdw_max98373.c index 0e7ed906b341..25daef910aee 100644 --- a/sound/soc/intel/boards/sof_sdw_max98373.c +++ b/sound/soc/intel/boards/sof_sdw_max98373.c @@ -55,43 +55,68 @@ static int spk_init(struct snd_soc_pcm_runtime *rtd) return ret; } -static int max98373_sdw_trigger(struct snd_pcm_substream *substream, int cmd) +static int mx8373_enable_spk_pin(struct snd_pcm_substream *substream, bool enable) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); + struct snd_soc_dai *codec_dai; + struct snd_soc_dai *cpu_dai; int ret; + int j; - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - /* enable max98373 first */ - ret = max_98373_trigger(substream, cmd); - if (ret < 0) - break; - - ret = sdw_trigger(substream, cmd); - break; - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ret = sdw_trigger(substream, cmd); - if (ret < 0) - break; - - ret = max_98373_trigger(substream, cmd); - break; - default: - ret = -EINVAL; - break; + /* set spk pin by playback only */ + if (substream->stream == SNDRV_PCM_STREAM_CAPTURE) + return 0; + + cpu_dai = asoc_rtd_to_cpu(rtd, 0); + for_each_rtd_codec_dais(rtd, j, codec_dai) { + struct snd_soc_dapm_context *dapm = + snd_soc_component_get_dapm(cpu_dai->component); + char pin_name[16]; + + snprintf(pin_name, ARRAY_SIZE(pin_name), "%s Spk", + codec_dai->component->name_prefix); + + if (enable) + ret = snd_soc_dapm_enable_pin(dapm, pin_name); + else + ret = snd_soc_dapm_disable_pin(dapm, pin_name); + + if (!ret) + snd_soc_dapm_sync(dapm); } - return ret; + return 0; +} + +static int mx8373_sdw_prepare(struct snd_pcm_substream *substream) +{ + int ret = 0; + + /* according to soc_pcm_prepare dai link prepare is called first */ + ret = sdw_prepare(substream); + if (ret < 0) + return ret; + + return mx8373_enable_spk_pin(substream, true); +} + +static int mx8373_sdw_hw_free(struct snd_pcm_substream *substream) +{ + int ret = 0; + + /* according to soc_pcm_hw_free dai link free is called first */ + ret = sdw_hw_free(substream); + if (ret < 0) + return ret; + + return mx8373_enable_spk_pin(substream, false); } static const struct snd_soc_ops max_98373_sdw_ops = { .startup = sdw_startup, - .prepare = sdw_prepare, - .trigger = max98373_sdw_trigger, - .hw_free = sdw_hw_free, + .prepare = mx8373_sdw_prepare, + .trigger = sdw_trigger, + .hw_free = mx8373_sdw_hw_free, .shutdown = sdw_shutdown, }; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 46513bb97904..d1c570ca21ea 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -1015,6 +1015,7 @@ out: static int soc_pcm_trigger(struct snd_pcm_substream *substream, int cmd) { + struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream); int ret = -EINVAL, _ret = 0; int rollback = 0; @@ -1055,14 +1056,23 @@ start_err: case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); - if (ret < 0) - break; + if (rtd->dai_link->stop_dma_first) { + ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); + if (ret < 0) + break; - ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); - if (ret < 0) - break; + ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); + if (ret < 0) + break; + } else { + ret = snd_soc_pcm_dai_trigger(substream, cmd, rollback); + if (ret < 0) + break; + ret = snd_soc_pcm_component_trigger(substream, cmd, rollback); + if (ret < 0) + break; + } ret = snd_soc_link_trigger(substream, cmd, rollback); break; } diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c index a00262184efa..d04ce84fe7cc 100644 --- a/sound/soc/sof/intel/pci-tgl.c +++ b/sound/soc/sof/intel/pci-tgl.c @@ -89,6 +89,7 @@ static const struct sof_dev_desc adls_desc = { static const struct sof_dev_desc adl_desc = { .machines = snd_soc_acpi_intel_adl_machines, .alt_machines = snd_soc_acpi_intel_adl_sdw_machines, + .use_acpi_target_states = true, .resindex_lpe_base = 0, .resindex_pcicfg_base = -1, .resindex_imr_base = -1, diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index 573374b89b10..d3276b4595af 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -213,19 +213,19 @@ snd_pcm_uframes_t tegra_pcm_pointer(struct snd_soc_component *component, } EXPORT_SYMBOL_GPL(tegra_pcm_pointer); -static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream, +static int tegra_pcm_preallocate_dma_buffer(struct device *dev, struct snd_pcm *pcm, int stream, size_t size) { struct snd_pcm_substream *substream = pcm->streams[stream].substream; struct snd_dma_buffer *buf = &substream->dma_buffer; - buf->area = dma_alloc_wc(pcm->card->dev, size, &buf->addr, GFP_KERNEL); + buf->area = dma_alloc_wc(dev, size, &buf->addr, GFP_KERNEL); if (!buf->area) return -ENOMEM; buf->private_data = NULL; buf->dev.type = SNDRV_DMA_TYPE_DEV; - buf->dev.dev = pcm->card->dev; + buf->dev.dev = dev; buf->bytes = size; return 0; @@ -244,31 +244,28 @@ static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream) if (!buf->area) return; - dma_free_wc(pcm->card->dev, buf->bytes, buf->area, buf->addr); + dma_free_wc(buf->dev.dev, buf->bytes, buf->area, buf->addr); buf->area = NULL; } -static int tegra_pcm_dma_allocate(struct snd_soc_pcm_runtime *rtd, +static int tegra_pcm_dma_allocate(struct device *dev, struct snd_soc_pcm_runtime *rtd, size_t size) { - struct snd_card *card = rtd->card->snd_card; struct snd_pcm *pcm = rtd->pcm; int ret; - ret = dma_set_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); if (ret < 0) return ret; if (pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream) { - ret = tegra_pcm_preallocate_dma_buffer(pcm, - SNDRV_PCM_STREAM_PLAYBACK, size); + ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_PLAYBACK, size); if (ret) goto err; } if (pcm->streams[SNDRV_PCM_STREAM_CAPTURE].substream) { - ret = tegra_pcm_preallocate_dma_buffer(pcm, - SNDRV_PCM_STREAM_CAPTURE, size); + ret = tegra_pcm_preallocate_dma_buffer(dev, pcm, SNDRV_PCM_STREAM_CAPTURE, size); if (ret) goto err_free_play; } @@ -284,7 +281,16 @@ err: int tegra_pcm_construct(struct snd_soc_component *component, struct snd_soc_pcm_runtime *rtd) { - return tegra_pcm_dma_allocate(rtd, tegra_pcm_hardware.buffer_bytes_max); + struct device *dev = component->dev; + + /* + * Fallback for backwards-compatibility with older device trees that + * have the iommus property in the virtual, top-level "sound" node. + */ + if (!of_get_property(dev->of_node, "iommus", NULL)) + dev = rtd->card->snd_card->dev; + + return tegra_pcm_dma_allocate(dev, rtd, tegra_pcm_hardware.buffer_bytes_max); } EXPORT_SYMBOL_GPL(tegra_pcm_construct); diff --git a/sound/soc/ti/j721e-evm.c b/sound/soc/ti/j721e-evm.c index a7c0484d44ec..265bbc5a2f96 100644 --- a/sound/soc/ti/j721e-evm.c +++ b/sound/soc/ti/j721e-evm.c @@ -197,7 +197,7 @@ static int j721e_configure_refclk(struct j721e_priv *priv, return ret; } - if (priv->hsdiv_rates[domain->parent_clk_id] != scki) { + if (domain->parent_clk_id == -1 || priv->hsdiv_rates[domain->parent_clk_id] != scki) { dev_dbg(priv->dev, "%s configuration for %u Hz: %s, %dxFS (SCKI: %u Hz)\n", audio_domain == J721E_AUDIO_DOMAIN_CPB ? "CPB" : "IVI", @@ -278,23 +278,29 @@ static int j721e_audio_startup(struct snd_pcm_substream *substream) j721e_rule_rate, &priv->rate_range, SNDRV_PCM_HW_PARAM_RATE, -1); - mutex_unlock(&priv->mutex); if (ret) - return ret; + goto out; /* Reset TDM slots to 32 */ ret = snd_soc_dai_set_tdm_slot(cpu_dai, 0x3, 0x3, 2, 32); if (ret && ret != -ENOTSUPP) - return ret; + goto out; for_each_rtd_codec_dais(rtd, i, codec_dai) { ret = snd_soc_dai_set_tdm_slot(codec_dai, 0x3, 0x3, 2, 32); if (ret && ret != -ENOTSUPP) - return ret; + goto out; } - return 0; + if (ret == -ENOTSUPP) + ret = 0; +out: + if (ret) + domain->active--; + mutex_unlock(&priv->mutex); + + return ret; } static int j721e_audio_hw_params(struct snd_pcm_substream *substream, diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 30b3e128e28d..f4cdaf1ba44a 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -3295,7 +3295,15 @@ static void snd_usb_mixer_dump_cval(struct snd_info_buffer *buffer, { struct usb_mixer_elem_info *cval = mixer_elem_list_to_info(list); static const char * const val_types[] = { - "BOOLEAN", "INV_BOOLEAN", "S8", "U8", "S16", "U16", "S32", "U32", + [USB_MIXER_BOOLEAN] = "BOOLEAN", + [USB_MIXER_INV_BOOLEAN] = "INV_BOOLEAN", + [USB_MIXER_S8] = "S8", + [USB_MIXER_U8] = "U8", + [USB_MIXER_S16] = "S16", + [USB_MIXER_U16] = "U16", + [USB_MIXER_S32] = "S32", + [USB_MIXER_U32] = "U32", + [USB_MIXER_BESPOKEN] = "BESPOKEN", }; snd_iprintf(buffer, " Info: id=%i, control=%i, cmask=0x%x, " "channels=%i, type=\"%s\"\n", cval->head.id, diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 8b8bee3c3dd6..e7accd87e063 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1897,6 +1897,9 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16d8, 2), /* Kingston HyperX AMP */ REG_QUIRK_ENTRY(0x0951, 0x16ed, 2), /* Kingston HyperX Cloud Alpha S */ REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ + REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ + REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ + REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ { 0 } /* terminator */ }; diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index ff4d327a582e..88b28aa7431f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **btf** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | {**-d** | **--debug** } | + { **-B** | **--base-btf** } } *COMMANDS* := { **dump** | **help** } @@ -73,6 +74,20 @@ OPTIONS ======= .. include:: common_options.rst + -B, --base-btf *FILE* + Pass a base BTF object. Base BTF objects are typically used + with BTF objects for kernel modules. To avoid duplicating + all kernel symbols required by modules, BTF objects for + modules are "split", they are built incrementally on top of + the kernel (vmlinux) BTF object. So the base BTF reference + should usually point to the kernel BTF. + + When the main BTF object to process (for example, the + module BTF to dump) is passed as a *FILE*, bpftool attempts + to autodetect the path for the base object, and passing + this option is optional. When the main BTF object is passed + through other handles, this option becomes necessary. + EXAMPLES ======== **# bpftool btf dump id 1226** @@ -217,3 +232,34 @@ All the standard ways to specify map or program are supported: **# bpftool btf dump prog tag b88e0a09b1d9759d** **# bpftool btf dump prog pinned /sys/fs/bpf/prog_name** + +| +| **# bpftool btf dump file /sys/kernel/btf/i2c_smbus** +| (or) +| **# I2C_SMBUS_ID=$(bpftool btf show -p | jq '.[] | select(.name=="i2c_smbus").id')** +| **# bpftool btf dump id ${I2C_SMBUS_ID} -B /sys/kernel/btf/vmlinux** + +:: + + [104848] STRUCT 'i2c_smbus_alert' size=40 vlen=2 + 'alert' type_id=393 bits_offset=0 + 'ara' type_id=56050 bits_offset=256 + [104849] STRUCT 'alert_data' size=12 vlen=3 + 'addr' type_id=16 bits_offset=0 + 'type' type_id=56053 bits_offset=32 + 'data' type_id=7 bits_offset=64 + [104850] PTR '(anon)' type_id=104848 + [104851] PTR '(anon)' type_id=104849 + [104852] FUNC 'i2c_register_spd' type_id=84745 linkage=static + [104853] FUNC 'smbalert_driver_init' type_id=1213 linkage=static + [104854] FUNC_PROTO '(anon)' ret_type_id=18 vlen=1 + 'ara' type_id=56050 + [104855] FUNC 'i2c_handle_smbus_alert' type_id=104854 linkage=static + [104856] FUNC 'smbalert_remove' type_id=104854 linkage=static + [104857] FUNC_PROTO '(anon)' ret_type_id=18 vlen=2 + 'ara' type_id=56050 + 'id' type_id=56056 + [104858] FUNC 'smbalert_probe' type_id=104857 linkage=static + [104859] FUNC 'smbalert_work' type_id=9695 linkage=static + [104860] FUNC 'smbus_alert' type_id=71367 linkage=static + [104861] FUNC 'smbus_do_alert' type_id=84827 linkage=static diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst index baee8591ac76..3e4395eede4f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **cgroup** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } } *COMMANDS* := { **show** | **list** | **tree** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst index dd3771bdbc57..ab9f57ee4c3a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **feature** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **probe** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index 7cd6681137f3..2ef2f2df0279 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **gen** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-L** | **--use-loader** } } *COMMAND* := { **object** | **skeleton** | **help** } @@ -152,6 +153,12 @@ OPTIONS ======= .. include:: common_options.rst + -L, --use-loader + For skeletons, generate a "light" skeleton (also known as "loader" + skeleton). A light skeleton contains a loader eBPF program. It does + not use the majority of the libbpf infrastructure, and does not need + libelf. + EXAMPLES ======== **$ cat example1.bpf.c** diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 51f49bead619..471f363a725a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -12,6 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **iter** *COMMAND* + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } + *COMMANDS* := { **pin** | **help** } ITER COMMANDS diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 5f7db2a837cc..0de90f086238 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **link** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **pin** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 3d52256ba75f..d0c4abe08aba 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -12,7 +12,8 @@ SYNOPSIS **bpftool** [*OPTIONS*] **map** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-n** | **--nomount** } } *COMMANDS* := { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst index d8165d530937..1ae0375e8fea 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-net.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **net** *COMMAND* - *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst index e958ce91de72..ce52798a917d 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **perf** *COMMAND* - *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index fe1b38e7e887..91608cb7e44a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -12,7 +12,9 @@ SYNOPSIS **bpftool** [*OPTIONS*] **prog** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } | + { **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } | + { **-L** | **--use-loader** } } *COMMANDS* := { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** @@ -48,10 +50,11 @@ PROG COMMANDS | **struct_ops** | **fentry** | **fexit** | **freplace** | **sk_lookup** | } | *ATTACH_TYPE* := { -| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** +| **msg_verdict** | **skb_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector** | } | *METRICs* := { -| **cycles** | **instructions** | **l1d_loads** | **llc_misses** +| **cycles** | **instructions** | **l1d_loads** | **llc_misses** | +| **itlb_misses** | **dtlb_misses** | } @@ -223,6 +226,20 @@ OPTIONS Do not automatically attempt to mount any virtual file system (such as tracefs or BPF virtual file system) when necessary. + -L, --use-loader + Load program as a "loader" program. This is useful to debug + the generation of such programs. When this option is in + use, bpftool attempts to load the programs from the object + file into the kernel, but does not pin them (therefore, the + *PATH* must not be provided). + + When combined with the **-d**\ \|\ **--debug** option, + additional debug messages are generated, and the execution + of the loader program will use the **bpf_trace_printk**\ () + helper to log each step of loading BTF, creating the maps, + and loading the programs (see **bpftool prog tracelog** as + a way to dump those messages). + EXAMPLES ======== **# bpftool prog show** @@ -326,3 +343,16 @@ EXAMPLES 40176203 cycles (83.05%) 42518139 instructions # 1.06 insns per cycle (83.39%) 123 llc_misses # 2.89 LLC misses per million insns (83.15%) + +| +| Output below is for the trace logs. +| Run in separate terminals: +| **# bpftool prog tracelog** +| **# bpftool prog load -L -d file.o** + +:: + + bpftool-620059 [004] d... 2634685.517903: bpf_trace_printk: btf_load size 665 r=5 + bpftool-620059 [004] d... 2634685.517912: bpf_trace_printk: map_create sample_map idx 0 type 2 value_size 4 value_btf_id 0 r=6 + bpftool-620059 [004] d... 2634685.517997: bpf_trace_printk: prog_load sample insn_cnt 13 r=7 + bpftool-620059 [004] d... 2634685.517999: bpf_trace_printk: close(5) = 0 diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst index 506e70ee78e9..02afc0fc14cb 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst @@ -12,7 +12,7 @@ SYNOPSIS **bpftool** [*OPTIONS*] **struct_ops** *COMMAND* - *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *COMMANDS* := { **show** | **list** | **dump** | **register** | **unregister** | **help** } diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index e7d949334961..bb23f55bb05a 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -18,15 +18,15 @@ SYNOPSIS *OBJECT* := { **map** | **program** | **cgroup** | **perf** | **net** | **feature** } - *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** } - | { **-j** | **--json** } [{ **-p** | **--pretty** }] } + *OPTIONS* := { { **-V** | **--version** } | + { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-d** | **--debug** } } *MAP-COMMANDS* := - { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** - | **delete** | **pin** | **event_pipe** | **help** } + { **show** | **list** | **create** | **dump** | **update** | **lookup** | **getnext** | + **delete** | **pin** | **event_pipe** | **help** } - *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** - | **load** | **attach** | **detach** | **help** } + *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin** | + **load** | **attach** | **detach** | **help** } *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index cc33c5824a2f..88e2bcf16cca 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -260,7 +260,8 @@ _bpftool() # Deal with options if [[ ${words[cword]} == -* ]]; then - local c='--version --json --pretty --bpffs --mapcompat --debug' + local c='--version --json --pretty --bpffs --mapcompat --debug \ + --use-loader --base-btf' COMPREPLY=( $( compgen -W "$c" -- "$cur" ) ) return 0 fi @@ -278,7 +279,7 @@ _bpftool() _sysfs_get_netdevs return 0 ;; - file|pinned) + file|pinned|-B|--base-btf) _filedir return 0 ;; @@ -291,7 +292,8 @@ _bpftool() # Remove all options so completions don't have to deal with them. local i for (( i=1; i < ${#words[@]}; )); do - if [[ ${words[i]::1} == - ]]; then + if [[ ${words[i]::1} == - ]] && + [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then words=( "${words[@]:0:i}" "${words[@]:i+1}" ) [[ $i -le $cword ]] && cword=$(( cword - 1 )) else @@ -343,7 +345,8 @@ _bpftool() local PROG_TYPE='id pinned tag name' local MAP_TYPE='id pinned name' - local METRIC_TYPE='cycles instructions l1d_loads llc_misses' + local METRIC_TYPE='cycles instructions l1d_loads llc_misses \ + itlb_misses dtlb_misses' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -404,8 +407,10 @@ _bpftool() return 0 ;; 5) - COMPREPLY=( $( compgen -W 'msg_verdict stream_verdict \ - stream_parser flow_dissector' -- "$cur" ) ) + local BPFTOOL_PROG_ATTACH_TYPES='msg_verdict \ + skb_verdict stream_verdict stream_parser \ + flow_dissector' + COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_ATTACH_TYPES" -- "$cur" ) ) return 0 ;; 6) @@ -464,7 +469,7 @@ _bpftool() case $prev in type) - COMPREPLY=( $( compgen -W "socket kprobe \ + local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \ kretprobe classifier flow_dissector \ action tracepoint raw_tracepoint \ xdp perf_event cgroup/skb cgroup/sock \ @@ -479,8 +484,8 @@ _bpftool() cgroup/post_bind4 cgroup/post_bind6 \ cgroup/sysctl cgroup/getsockopt \ cgroup/setsockopt cgroup/sock_release struct_ops \ - fentry fexit freplace sk_lookup" -- \ - "$cur" ) ) + fentry fexit freplace sk_lookup' + COMPREPLY=( $( compgen -W "$BPFTOOL_PROG_LOAD_TYPES" -- "$cur" ) ) return 0 ;; id) @@ -698,15 +703,15 @@ _bpftool() return 0 ;; type) - COMPREPLY=( $( compgen -W 'hash array prog_array \ - perf_event_array percpu_hash percpu_array \ - stack_trace cgroup_array lru_hash \ + local BPFTOOL_MAP_CREATE_TYPES='hash array \ + prog_array perf_event_array percpu_hash \ + percpu_array stack_trace cgroup_array lru_hash \ lru_percpu_hash lpm_trie array_of_maps \ hash_of_maps devmap devmap_hash sockmap cpumap \ xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack sk_storage \ - struct_ops inode_storage task_storage' -- \ - "$cur" ) ) + struct_ops inode_storage task_storage ringbuf' + COMPREPLY=( $( compgen -W "$BPFTOOL_MAP_CREATE_TYPES" -- "$cur" ) ) return 0 ;; key|value|flags|entries) @@ -1017,34 +1022,37 @@ _bpftool() return 0 ;; attach|detach) - local ATTACH_TYPES='ingress egress sock_create sock_ops \ - device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ + local BPFTOOL_CGROUP_ATTACH_TYPES='ingress egress \ + sock_create sock_ops device \ + bind4 bind6 post_bind4 post_bind6 connect4 connect6 \ getpeername4 getpeername6 getsockname4 getsockname6 \ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \ setsockopt sock_release' local ATTACH_FLAGS='multi override' local PROG_TYPE='id pinned tag name' - case $prev in - $command) - _filedir - return 0 - ;; - ingress|egress|sock_create|sock_ops|device|bind4|bind6|\ - post_bind4|post_bind6|connect4|connect6|getpeername4|\ - getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\ - recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt|sock_release) + # Check for $prev = $command first + if [ $prev = $command ]; then + _filedir + return 0 + # Then check for attach type. This is done outside of the + # "case $prev in" to avoid writing the whole list of attach + # types again as pattern to match (where we cannot reuse + # our variable). + elif [[ $BPFTOOL_CGROUP_ATTACH_TYPES =~ $prev ]]; then COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ "$cur" ) ) return 0 - ;; + fi + # case/esac for the other cases + case $prev in id) _bpftool_get_prog_ids return 0 ;; *) - if ! _bpftool_search_list "$ATTACH_TYPES"; then - COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \ - "$cur" ) ) + if ! _bpftool_search_list "$BPFTOOL_CGROUP_ATTACH_TYPES"; then + COMPREPLY=( $( compgen -W \ + "$BPFTOOL_CGROUP_ATTACH_TYPES" -- "$cur" ) ) elif [[ "$command" == "attach" ]]; then # We have an attach type on the command line, # but it is not the previous word, or diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 385d5c955cf3..f7e5ff3586c9 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -580,16 +580,12 @@ static int do_dump(int argc, char **argv) } if (!btf) { - err = btf__get_from_id(btf_id, &btf); + btf = btf__load_from_kernel_by_id_split(btf_id, base_btf); + err = libbpf_get_error(btf); if (err) { p_err("get btf by id (%u): %s", btf_id, strerror(err)); goto done; } - if (!btf) { - err = -ENOENT; - p_err("can't find btf with ID (%u)", btf_id); - goto done; - } } if (dump_c) { @@ -985,7 +981,8 @@ static int do_help(int argc, char **argv) " FORMAT := { raw | c }\n" " " HELP_SPEC_MAP "\n" " " HELP_SPEC_PROGRAM "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-B|--base-btf} }\n" "", bin_name, "btf"); diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c index 7ca54d046362..9c25286a5c73 100644 --- a/tools/bpf/bpftool/btf_dumper.c +++ b/tools/bpf/bpftool/btf_dumper.c @@ -64,8 +64,10 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d, } info = &prog_info->info; - if (!info->btf_id || !info->nr_func_info || - btf__get_from_id(info->btf_id, &prog_btf)) + if (!info->btf_id || !info->nr_func_info) + goto print; + prog_btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(prog_btf)) goto print; finfo = u64_to_ptr(info->func_info); func_type = btf__type_by_id(prog_btf, finfo->type_id); diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 6e53b1d393f4..3571a281c43f 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -501,7 +501,8 @@ static int do_help(int argc, char **argv) HELP_SPEC_ATTACH_TYPES "\n" " " HELP_SPEC_ATTACH_FLAGS "\n" " " HELP_SPEC_PROGRAM "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index dc6daa193557..d42d930a3ec4 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -67,6 +67,12 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { [BPF_MODIFY_RETURN] = "mod_ret", [BPF_LSM_MAC] = "lsm_mac", [BPF_SK_LOOKUP] = "sk_lookup", + [BPF_TRACE_ITER] = "trace_iter", + [BPF_XDP_DEVMAP] = "xdp_devmap", + [BPF_XDP_CPUMAP] = "xdp_cpumap", + [BPF_XDP] = "xdp", + [BPF_SK_REUSEPORT_SELECT] = "sk_skb_reuseport_select", + [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE] = "sk_skb_reuseport_select_or_migrate", }; void p_err(const char *fmt, ...) diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 40a88df275f9..7f36385aa9e2 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -1005,6 +1005,7 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " COMPONENT := { kernel | dev NAME }\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 1d71ff8c52fa..d40d92bbf0e4 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1026,7 +1026,8 @@ static int do_help(int argc, char **argv) " %1$s %2$s skeleton FILE [name OBJECT_NAME]\n" " %1$s %2$s help\n" "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-L|--use-loader} }\n" "", bin_name, "gen"); diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 3b1aad7535dd..84a9b01d956d 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -97,7 +97,9 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n" " %1$s %2$s help\n" + "\n" " " HELP_SPEC_MAP "\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, "iter"); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index e77e1525d20a..8cc3e36f8cc6 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -401,7 +401,8 @@ static int do_help(int argc, char **argv) " %1$s %2$s help\n" "\n" " " HELP_SPEC_LINK "\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-n|--nomount} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 3ddfd4843738..02eaaf065f65 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -64,7 +64,8 @@ static int do_help(int argc, char **argv) " %s version\n" "\n" " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-V|--version} }\n" "", bin_name, bin_name, bin_name); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index c1cf29798b99..90caa42aac4c 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -57,8 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr) #define HELP_SPEC_PROGRAM \ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ - "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ - "\t {-m|--mapcompat} | {-n|--nomount} }" + "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}" #define HELP_SPEC_MAP \ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" #define HELP_SPEC_LINK \ diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 09ae0381205b..407071d54ab1 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -807,10 +807,11 @@ static struct btf *get_map_kv_btf(const struct bpf_map_info *info) } else if (info->btf_value_type_id) { int err; - err = btf__get_from_id(info->btf_id, &btf); - if (err || !btf) { + btf = btf__load_from_kernel_by_id(info->btf_id); + err = libbpf_get_error(btf); + if (err) { p_err("failed to get btf"); - btf = err ? ERR_PTR(err) : ERR_PTR(-ESRCH); + btf = ERR_PTR(err); } } @@ -1039,11 +1040,10 @@ static void print_key_value(struct bpf_map_info *info, void *key, void *value) { json_writer_t *btf_wtr; - struct btf *btf = NULL; - int err; + struct btf *btf; - err = btf__get_from_id(info->btf_id, &btf); - if (err) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { p_err("failed to get btf"); return; } @@ -1466,8 +1466,9 @@ static int do_help(int argc, char **argv) " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n" " queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n" - " task_storage }\n" - " " HELP_SPEC_OPTIONS "\n" + " task_storage }\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-n|--nomount} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index f836d115d7d6..649053704bd7 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -729,6 +729,7 @@ static int do_help(int argc, char **argv) "\n" " " HELP_SPEC_PROGRAM "\n" " ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n" + " " HELP_SPEC_OPTIONS " }\n" "\n" "Note: Only xdp and tc attachments are supported now.\n" " For progs attached to cgroups, use \"bpftool cgroup\"\n" diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c index ad23934819c7..50de087b0db7 100644 --- a/tools/bpf/bpftool/perf.c +++ b/tools/bpf/bpftool/perf.c @@ -231,7 +231,10 @@ static int do_show(int argc, char **argv) static int do_help(int argc, char **argv) { fprintf(stderr, - "Usage: %1$s %2$s { show | list | help }\n" + "Usage: %1$s %2$s { show | list }\n" + " %1$s %2$s help }\n" + "\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cc48726740ad..9c3e343b7d87 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -249,10 +249,10 @@ static void show_prog_metadata(int fd, __u32 num_maps) struct bpf_map_info map_info; struct btf_var_secinfo *vsi; bool printed_header = false; - struct btf *btf = NULL; unsigned int i, vlen; void *value = NULL; const char *name; + struct btf *btf; int err; if (!num_maps) @@ -263,8 +263,8 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!value) return; - err = btf__get_from_id(map_info.btf_id, &btf); - if (err || !btf) + btf = btf__load_from_kernel_by_id(map_info.btf_id); + if (libbpf_get_error(btf)) goto out_free; t_datasec = btf__type_by_id(btf, map_info.btf_value_type_id); @@ -646,9 +646,12 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, member_len = info->xlated_prog_len; } - if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { - p_err("failed to get btf"); - return -1; + if (info->btf_id) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to get btf"); + return -1; + } } func_info = u64_to_ptr(info->func_info); @@ -781,6 +784,8 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode, kernel_syms_destroy(&dd); } + btf__free(btf); + return 0; } @@ -2002,8 +2007,8 @@ static char *profile_target_name(int tgt_fd) struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; const struct btf_type *t; + struct btf *btf = NULL; char *name = NULL; - struct btf *btf; info_linear = bpf_program__get_prog_info_linear( tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); @@ -2012,12 +2017,17 @@ static char *profile_target_name(int tgt_fd) return NULL; } - if (info_linear->info.btf_id == 0 || - btf__get_from_id(info_linear->info.btf_id, &btf)) { + if (info_linear->info.btf_id == 0) { p_err("prog FD %d doesn't have valid btf", tgt_fd); goto out; } + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + if (libbpf_get_error(btf)) { + p_err("failed to load btf for prog FD %d", tgt_fd); + goto out; + } + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { @@ -2027,6 +2037,7 @@ static char *profile_target_name(int tgt_fd) } name = strdup(btf__name_by_offset(btf, t->name_off)); out: + btf__free(btf); free(info_linear); return name; } @@ -2245,10 +2256,12 @@ static int do_help(int argc, char **argv) " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n" " cgroup/getsockopt | cgroup/setsockopt | cgroup/sock_release |\n" " struct_ops | fentry | fexit | freplace | sk_lookup }\n" - " ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n" - " flow_dissector }\n" + " ATTACH_TYPE := { msg_verdict | skb_verdict | stream_verdict |\n" + " stream_parser | flow_dissector }\n" " METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n" - " " HELP_SPEC_OPTIONS "\n" + " " HELP_SPEC_OPTIONS " |\n" + " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n" + " {-L|--use-loader} }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c index b58b91f62ffb..ab2d2290569a 100644 --- a/tools/bpf/bpftool/struct_ops.c +++ b/tools/bpf/bpftool/struct_ops.c @@ -572,8 +572,8 @@ static int do_help(int argc, char **argv) " %1$s %2$s unregister STRUCT_OPS_MAP\n" " %1$s %2$s help\n" "\n" - " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n" " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n" + " " HELP_SPEC_OPTIONS " }\n" "", bin_name, argv[-2]); diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 3ad9301b0f00..de6365b53c9c 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -291,7 +291,7 @@ static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh) sh->sh_addralign = expected; if (gelf_update_shdr(scn, sh) == 0) { - printf("FAILED cannot update section header: %s\n", + pr_err("FAILED cannot update section header: %s\n", elf_errmsg(-1)); return -1; } @@ -317,6 +317,7 @@ static int elf_collect(struct object *obj) elf = elf_begin(fd, ELF_C_RDWR_MMAP, NULL); if (!elf) { + close(fd); pr_err("FAILED cannot create ELF descriptor: %s\n", elf_errmsg(-1)); return -1; @@ -484,7 +485,7 @@ static int symbols_resolve(struct object *obj) err = libbpf_get_error(btf); if (err) { pr_err("FAILED: load BTF from %s: %s\n", - obj->path, strerror(-err)); + obj->btf ?: obj->path, strerror(-err)); return -1; } @@ -555,8 +556,7 @@ static int id_patch(struct object *obj, struct btf_id *id) int i; if (!id->id) { - pr_err("FAILED unresolved symbol %s\n", id->name); - return -EINVAL; + pr_err("WARN: resolve_btfids: unresolved symbol %s\n", id->name); } for (i = 0; i < id->addr_cnt; i++) { @@ -734,8 +734,9 @@ int main(int argc, const char **argv) err = 0; out: - if (obj.efile.elf) + if (obj.efile.elf) { elf_end(obj.efile.elf); - close(obj.efile.fd); + close(obj.efile.fd); + } return err; } diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index d208b2af697f..eb15f319aa57 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -653,6 +653,7 @@ enum { IFLA_BOND_AD_ACTOR_SYSTEM, IFLA_BOND_TLB_DYNAMIC_LB, IFLA_BOND_PEER_NOTIF_DELAY, + IFLA_BOND_AD_LACP_ACTIVE, __IFLA_BOND_MAX, }; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 430f6874fa41..94f0a146bb7b 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,3 +1,3 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \ - btf_dump.o ringbuf.o strset.o linker.o gen_loader.o + btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index b46760b93bb4..85de4fd50699 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1180,7 +1180,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load(struct btf *btf) +int btf__load_into_kernel(struct btf *btf) { __u32 log_buf_size = 0, raw_size; char *log_buf = NULL; @@ -1228,6 +1228,7 @@ done: free(log_buf); return libbpf_err(err); } +int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel"))); int btf__fd(const struct btf *btf) { @@ -1382,21 +1383,35 @@ exit_free: return btf; } -int btf__get_from_id(__u32 id, struct btf **btf) +struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf) { - struct btf *res; - int err, btf_fd; + struct btf *btf; + int btf_fd; - *btf = NULL; btf_fd = bpf_btf_get_fd_by_id(id); if (btf_fd < 0) - return libbpf_err(-errno); - - res = btf_get_from_fd(btf_fd, NULL); - err = libbpf_get_error(res); + return libbpf_err_ptr(-errno); + btf = btf_get_from_fd(btf_fd, base_btf); close(btf_fd); + return libbpf_ptr(btf); +} + +struct btf *btf__load_from_kernel_by_id(__u32 id) +{ + return btf__load_from_kernel_by_id_split(id, NULL); +} + +int btf__get_from_id(__u32 id, struct btf **btf) +{ + struct btf *res; + int err; + + *btf = NULL; + res = btf__load_from_kernel_by_id(id); + err = libbpf_get_error(res); + if (err) return libbpf_err(err); @@ -4021,7 +4036,7 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d) */ if (d->hypot_adjust_canon) continue; - + if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) d->map[t_id] = c_id; @@ -4394,7 +4409,7 @@ static int btf_dedup_remap_types(struct btf_dedup *d) * Probe few well-known locations for vmlinux kernel image and try to load BTF * data out of it to use for target BTF. */ -struct btf *libbpf_find_kernel_btf(void) +struct btf *btf__load_vmlinux_btf(void) { struct { const char *path_fmt; @@ -4440,6 +4455,16 @@ struct btf *libbpf_find_kernel_btf(void) return libbpf_err_ptr(-ESRCH); } +struct btf *libbpf_find_kernel_btf(void) __attribute__((alias("btf__load_vmlinux_btf"))); + +struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf) +{ + char path[80]; + + snprintf(path, sizeof(path), "/sys/kernel/btf/%s", module_name); + return btf__parse_split(path, vmlinux_btf); +} + int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx) { int i, n, err; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b54f1c3ebd57..4a711f990904 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -44,8 +44,17 @@ LIBBPF_API struct btf *btf__parse_elf_split(const char *path, struct btf *base_b LIBBPF_API struct btf *btf__parse_raw(const char *path); LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf); +LIBBPF_API struct btf *btf__load_vmlinux_btf(void); +LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf); +LIBBPF_API struct btf *libbpf_find_kernel_btf(void); + +LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id); +LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf); +LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); + LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf); LIBBPF_API int btf__load(struct btf *btf); +LIBBPF_API int btf__load_into_kernel(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, @@ -66,7 +75,6 @@ LIBBPF_API void btf__set_fd(struct btf *btf, int fd); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); -LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, __u32 expected_key_size, __u32 expected_value_size, @@ -89,8 +97,6 @@ int btf_ext__reloc_line_info(const struct btf *btf, LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); -LIBBPF_API struct btf *libbpf_find_kernel_btf(void); - LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf, @@ -184,6 +190,25 @@ LIBBPF_API int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, const struct btf_dump_emit_type_decl_opts *opts); + +struct btf_dump_type_data_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + const char *indent_str; + int indent_level; + /* below match "show" flags for bpf_show_snprintf() */ + bool compact; /* no newlines/indentation */ + bool skip_names; /* skip member/type names */ + bool emit_zeroes; /* show 0-valued fields */ + size_t :0; +}; +#define btf_dump_type_data_opts__last_field emit_zeroes + +LIBBPF_API int +btf_dump__dump_type_data(struct btf_dump *d, __u32 id, + const void *data, size_t data_sz, + const struct btf_dump_type_data_opts *opts); + /* * A set of helpers for easier BTF types handling */ diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 5dc6b5172bb3..e4b483f15fb9 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -10,6 +10,8 @@ #include <stddef.h> #include <stdlib.h> #include <string.h> +#include <ctype.h> +#include <endian.h> #include <errno.h> #include <linux/err.h> #include <linux/btf.h> @@ -53,6 +55,26 @@ struct btf_dump_type_aux_state { __u8 referenced: 1; }; +/* indent string length; one indent string is added for each indent level */ +#define BTF_DATA_INDENT_STR_LEN 32 + +/* + * Common internal data for BTF type data dump operations. + */ +struct btf_dump_data { + const void *data_end; /* end of valid data to show */ + bool compact; + bool skip_names; + bool emit_zeroes; + __u8 indent_lvl; /* base indent level */ + char indent_str[BTF_DATA_INDENT_STR_LEN]; + /* below are used during iteration */ + int depth; + bool is_array_member; + bool is_array_terminated; + bool is_array_char; +}; + struct btf_dump { const struct btf *btf; const struct btf_ext *btf_ext; @@ -60,6 +82,7 @@ struct btf_dump { struct btf_dump_opts opts; int ptr_sz; bool strip_mods; + bool skip_anon_defs; int last_id; /* per-type auxiliary state */ @@ -89,6 +112,10 @@ struct btf_dump { * name occurrences */ struct hashmap *ident_names; + /* + * data for typed display; allocated if needed. + */ + struct btf_dump_data *typed_dump; }; static size_t str_hash_fn(const void *key, void *ctx) @@ -765,11 +792,11 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) break; case BTF_KIND_FUNC_PROTO: { const struct btf_param *p = btf_params(t); - __u16 vlen = btf_vlen(t); + __u16 n = btf_vlen(t); int i; btf_dump_emit_type(d, t->type, cont_id); - for (i = 0; i < vlen; i++, p++) + for (i = 0; i < n; i++, p++) btf_dump_emit_type(d, p->type, cont_id); break; @@ -852,8 +879,9 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id, const struct btf_type *t) { - btf_dump_printf(d, "%s %s", + btf_dump_printf(d, "%s%s%s", btf_is_struct(t) ? "struct" : "union", + t->name_off ? " " : "", btf_dump_type_name(d, id)); } @@ -1259,7 +1287,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_UNION: btf_dump_emit_mods(d, decls); /* inline anonymous struct/union */ - if (t->name_off == 0) + if (t->name_off == 0 && !d->skip_anon_defs) btf_dump_emit_struct_def(d, id, t, lvl); else btf_dump_emit_struct_fwd(d, id, t); @@ -1267,7 +1295,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, case BTF_KIND_ENUM: btf_dump_emit_mods(d, decls); /* inline anonymous enum */ - if (t->name_off == 0) + if (t->name_off == 0 && !d->skip_anon_defs) btf_dump_emit_enum_def(d, id, t, lvl); else btf_dump_emit_enum_fwd(d, id, t); @@ -1392,6 +1420,39 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, btf_dump_emit_name(d, fname, last_was_ptr); } +/* show type name as (type_name) */ +static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id, + bool top_level) +{ + const struct btf_type *t; + + /* for array members, we don't bother emitting type name for each + * member to avoid the redundancy of + * .name = (char[4])[(char)'f',(char)'o',(char)'o',] + */ + if (d->typed_dump->is_array_member) + return; + + /* avoid type name specification for variable/section; it will be done + * for the associated variable value(s). + */ + t = btf__type_by_id(d->btf, id); + if (btf_is_var(t) || btf_is_datasec(t)) + return; + + if (top_level) + btf_dump_printf(d, "("); + + d->skip_anon_defs = true; + d->strip_mods = true; + btf_dump_emit_type_decl(d, id, "", 0); + d->strip_mods = false; + d->skip_anon_defs = false; + + if (top_level) + btf_dump_printf(d, ")"); +} + /* return number of duplicates (occurrences) of a given name */ static size_t btf_dump_name_dups(struct btf_dump *d, struct hashmap *name_map, const char *orig_name) @@ -1442,3 +1503,803 @@ static const char *btf_dump_ident_name(struct btf_dump *d, __u32 id) { return btf_dump_resolve_name(d, id, d->ident_names); } + +static int btf_dump_dump_type_data(struct btf_dump *d, + const char *fname, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset, + __u8 bit_sz); + +static const char *btf_dump_data_newline(struct btf_dump *d) +{ + return d->typed_dump->compact || d->typed_dump->depth == 0 ? "" : "\n"; +} + +static const char *btf_dump_data_delim(struct btf_dump *d) +{ + return d->typed_dump->depth == 0 ? "" : ","; +} + +static void btf_dump_data_pfx(struct btf_dump *d) +{ + int i, lvl = d->typed_dump->indent_lvl + d->typed_dump->depth; + + if (d->typed_dump->compact) + return; + + for (i = 0; i < lvl; i++) + btf_dump_printf(d, "%s", d->typed_dump->indent_str); +} + +/* A macro is used here as btf_type_value[s]() appends format specifiers + * to the format specifier passed in; these do the work of appending + * delimiters etc while the caller simply has to specify the type values + * in the format specifier + value(s). + */ +#define btf_dump_type_values(d, fmt, ...) \ + btf_dump_printf(d, fmt "%s%s", \ + ##__VA_ARGS__, \ + btf_dump_data_delim(d), \ + btf_dump_data_newline(d)) + +static int btf_dump_unsupported_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id) +{ + btf_dump_printf(d, "<unsupported kind:%u>", btf_kind(t)); + return -ENOTSUP; +} + +static int btf_dump_get_bitfield_value(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u8 bits_offset, + __u8 bit_sz, + __u64 *value) +{ + __u16 left_shift_bits, right_shift_bits; + __u8 nr_copy_bits, nr_copy_bytes; + const __u8 *bytes = data; + int sz = t->size; + __u64 num = 0; + int i; + + /* Maximum supported bitfield size is 64 bits */ + if (sz > 8) { + pr_warn("unexpected bitfield size %d\n", sz); + return -EINVAL; + } + + /* Bitfield value retrieval is done in two steps; first relevant bytes are + * stored in num, then we left/right shift num to eliminate irrelevant bits. + */ + nr_copy_bits = bit_sz + bits_offset; + nr_copy_bytes = t->size; +#if __BYTE_ORDER == __LITTLE_ENDIAN + for (i = nr_copy_bytes - 1; i >= 0; i--) + num = num * 256 + bytes[i]; +#elif __BYTE_ORDER == __BIG_ENDIAN + for (i = 0; i < nr_copy_bytes; i++) + num = num * 256 + bytes[i]; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + left_shift_bits = 64 - nr_copy_bits; + right_shift_bits = 64 - bit_sz; + + *value = (num << left_shift_bits) >> right_shift_bits; + + return 0; +} + +static int btf_dump_bitfield_check_zero(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + __u64 check_num; + int err; + + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &check_num); + if (err) + return err; + if (check_num == 0) + return -ENODATA; + return 0; +} + +static int btf_dump_bitfield_data(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + __u64 print_num; + int err; + + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, &print_num); + if (err) + return err; + + btf_dump_type_values(d, "0x%llx", (unsigned long long)print_num); + + return 0; +} + +/* ints, floats and ptrs */ +static int btf_dump_base_type_check_zero(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + static __u8 bytecmp[16] = {}; + int nr_bytes; + + /* For pointer types, pointer size is not defined on a per-type basis. + * On dump creation however, we store the pointer size. + */ + if (btf_kind(t) == BTF_KIND_PTR) + nr_bytes = d->ptr_sz; + else + nr_bytes = t->size; + + if (nr_bytes < 1 || nr_bytes > 16) { + pr_warn("unexpected size %d for id [%u]\n", nr_bytes, id); + return -EINVAL; + } + + if (memcmp(data, bytecmp, nr_bytes) == 0) + return -ENODATA; + return 0; +} + +static bool ptr_is_aligned(const void *data, int data_sz) +{ + return ((uintptr_t)data) % data_sz == 0; +} + +static int btf_dump_int_data(struct btf_dump *d, + const struct btf_type *t, + __u32 type_id, + const void *data, + __u8 bits_offset) +{ + __u8 encoding = btf_int_encoding(t); + bool sign = encoding & BTF_INT_SIGNED; + int sz = t->size; + + if (sz == 0) { + pr_warn("unexpected size %d for id [%u]\n", sz, type_id); + return -EINVAL; + } + + /* handle packed int data - accesses of integers not aligned on + * int boundaries can cause problems on some platforms. + */ + if (!ptr_is_aligned(data, sz)) + return btf_dump_bitfield_data(d, t, data, 0, 0); + + switch (sz) { + case 16: { + const __u64 *ints = data; + __u64 lsi, msi; + + /* avoid use of __int128 as some 32-bit platforms do not + * support it. + */ +#if __BYTE_ORDER == __LITTLE_ENDIAN + lsi = ints[0]; + msi = ints[1]; +#elif __BYTE_ORDER == __BIG_ENDIAN + lsi = ints[1]; + msi = ints[0]; +#else +# error "Unrecognized __BYTE_ORDER__" +#endif + if (msi == 0) + btf_dump_type_values(d, "0x%llx", (unsigned long long)lsi); + else + btf_dump_type_values(d, "0x%llx%016llx", (unsigned long long)msi, + (unsigned long long)lsi); + break; + } + case 8: + if (sign) + btf_dump_type_values(d, "%lld", *(long long *)data); + else + btf_dump_type_values(d, "%llu", *(unsigned long long *)data); + break; + case 4: + if (sign) + btf_dump_type_values(d, "%d", *(__s32 *)data); + else + btf_dump_type_values(d, "%u", *(__u32 *)data); + break; + case 2: + if (sign) + btf_dump_type_values(d, "%d", *(__s16 *)data); + else + btf_dump_type_values(d, "%u", *(__u16 *)data); + break; + case 1: + if (d->typed_dump->is_array_char) { + /* check for null terminator */ + if (d->typed_dump->is_array_terminated) + break; + if (*(char *)data == '\0') { + d->typed_dump->is_array_terminated = true; + break; + } + if (isprint(*(char *)data)) { + btf_dump_type_values(d, "'%c'", *(char *)data); + break; + } + } + if (sign) + btf_dump_type_values(d, "%d", *(__s8 *)data); + else + btf_dump_type_values(d, "%u", *(__u8 *)data); + break; + default: + pr_warn("unexpected sz %d for id [%u]\n", sz, type_id); + return -EINVAL; + } + return 0; +} + +union float_data { + long double ld; + double d; + float f; +}; + +static int btf_dump_float_data(struct btf_dump *d, + const struct btf_type *t, + __u32 type_id, + const void *data) +{ + const union float_data *flp = data; + union float_data fl; + int sz = t->size; + + /* handle unaligned data; copy to local union */ + if (!ptr_is_aligned(data, sz)) { + memcpy(&fl, data, sz); + flp = &fl; + } + + switch (sz) { + case 16: + btf_dump_type_values(d, "%Lf", flp->ld); + break; + case 8: + btf_dump_type_values(d, "%lf", flp->d); + break; + case 4: + btf_dump_type_values(d, "%f", flp->f); + break; + default: + pr_warn("unexpected size %d for id [%u]\n", sz, type_id); + return -EINVAL; + } + return 0; +} + +static int btf_dump_var_data(struct btf_dump *d, + const struct btf_type *v, + __u32 id, + const void *data) +{ + enum btf_func_linkage linkage = btf_var(v)->linkage; + const struct btf_type *t; + const char *l; + __u32 type_id; + + switch (linkage) { + case BTF_FUNC_STATIC: + l = "static "; + break; + case BTF_FUNC_EXTERN: + l = "extern "; + break; + case BTF_FUNC_GLOBAL: + default: + l = ""; + break; + } + + /* format of output here is [linkage] [type] [varname] = (type)value, + * for example "static int cpu_profile_flip = (int)1" + */ + btf_dump_printf(d, "%s", l); + type_id = v->type; + t = btf__type_by_id(d->btf, type_id); + btf_dump_emit_type_cast(d, type_id, false); + btf_dump_printf(d, " %s = ", btf_name_of(d, v->name_off)); + return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); +} + +static int btf_dump_array_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + const struct btf_type *elem_type; + __u32 i, elem_size = 0, elem_type_id; + bool is_array_member; + + elem_type_id = array->type; + elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); + elem_size = btf__resolve_size(d->btf, elem_type_id); + if (elem_size <= 0) { + pr_warn("unexpected elem size %d for array type [%u]\n", elem_size, id); + return -EINVAL; + } + + if (btf_is_int(elem_type)) { + /* + * BTF_INT_CHAR encoding never seems to be set for + * char arrays, so if size is 1 and element is + * printable as a char, we'll do that. + */ + if (elem_size == 1) + d->typed_dump->is_array_char = true; + } + + /* note that we increment depth before calling btf_dump_print() below; + * this is intentional. btf_dump_data_newline() will not print a + * newline for depth 0 (since this leaves us with trailing newlines + * at the end of typed display), so depth is incremented first. + * For similar reasons, we decrement depth before showing the closing + * parenthesis. + */ + d->typed_dump->depth++; + btf_dump_printf(d, "[%s", btf_dump_data_newline(d)); + + /* may be a multidimensional array, so store current "is array member" + * status so we can restore it correctly later. + */ + is_array_member = d->typed_dump->is_array_member; + d->typed_dump->is_array_member = true; + for (i = 0; i < array->nelems; i++, data += elem_size) { + if (d->typed_dump->is_array_terminated) + break; + btf_dump_dump_type_data(d, NULL, elem_type, elem_type_id, data, 0, 0); + } + d->typed_dump->is_array_member = is_array_member; + d->typed_dump->depth--; + btf_dump_data_pfx(d); + btf_dump_type_values(d, "]"); + + return 0; +} + +static int btf_dump_struct_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_member *m = btf_members(t); + __u16 n = btf_vlen(t); + int i, err; + + /* note that we increment depth before calling btf_dump_print() below; + * this is intentional. btf_dump_data_newline() will not print a + * newline for depth 0 (since this leaves us with trailing newlines + * at the end of typed display), so depth is incremented first. + * For similar reasons, we decrement depth before showing the closing + * parenthesis. + */ + d->typed_dump->depth++; + btf_dump_printf(d, "{%s", btf_dump_data_newline(d)); + + for (i = 0; i < n; i++, m++) { + const struct btf_type *mtype; + const char *mname; + __u32 moffset; + __u8 bit_sz; + + mtype = btf__type_by_id(d->btf, m->type); + mname = btf_name_of(d, m->name_off); + moffset = btf_member_bit_offset(t, i); + + bit_sz = btf_member_bitfield_size(t, i); + err = btf_dump_dump_type_data(d, mname, mtype, m->type, data + moffset / 8, + moffset % 8, bit_sz); + if (err < 0) + return err; + } + d->typed_dump->depth--; + btf_dump_data_pfx(d); + btf_dump_type_values(d, "}"); + return err; +} + +union ptr_data { + unsigned int p; + unsigned long long lp; +}; + +static int btf_dump_ptr_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + if (ptr_is_aligned(data, d->ptr_sz) && d->ptr_sz == sizeof(void *)) { + btf_dump_type_values(d, "%p", *(void **)data); + } else { + union ptr_data pt; + + memcpy(&pt, data, d->ptr_sz); + if (d->ptr_sz == 4) + btf_dump_type_values(d, "0x%x", pt.p); + else + btf_dump_type_values(d, "0x%llx", pt.lp); + } + return 0; +} + +static int btf_dump_get_enum_value(struct btf_dump *d, + const struct btf_type *t, + const void *data, + __u32 id, + __s64 *value) +{ + int sz = t->size; + + /* handle unaligned enum value */ + if (!ptr_is_aligned(data, sz)) { + __u64 val; + int err; + + err = btf_dump_get_bitfield_value(d, t, data, 0, 0, &val); + if (err) + return err; + *value = (__s64)val; + return 0; + } + + switch (t->size) { + case 8: + *value = *(__s64 *)data; + return 0; + case 4: + *value = *(__s32 *)data; + return 0; + case 2: + *value = *(__s16 *)data; + return 0; + case 1: + *value = *(__s8 *)data; + return 0; + default: + pr_warn("unexpected size %d for enum, id:[%u]\n", t->size, id); + return -EINVAL; + } +} + +static int btf_dump_enum_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_enum *e; + __s64 value; + int i, err; + + err = btf_dump_get_enum_value(d, t, data, id, &value); + if (err) + return err; + + for (i = 0, e = btf_enum(t); i < btf_vlen(t); i++, e++) { + if (value != e->val) + continue; + btf_dump_type_values(d, "%s", btf_name_of(d, e->name_off)); + return 0; + } + + btf_dump_type_values(d, "%d", value); + return 0; +} + +static int btf_dump_datasec_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_var_secinfo *vsi; + const struct btf_type *var; + __u32 i; + int err; + + btf_dump_type_values(d, "SEC(\"%s\") ", btf_name_of(d, t->name_off)); + + for (i = 0, vsi = btf_var_secinfos(t); i < btf_vlen(t); i++, vsi++) { + var = btf__type_by_id(d->btf, vsi->type); + err = btf_dump_dump_type_data(d, NULL, var, vsi->type, data + vsi->offset, 0, 0); + if (err < 0) + return err; + btf_dump_printf(d, ";"); + } + return 0; +} + +/* return size of type, or if base type overflows, return -E2BIG. */ +static int btf_dump_type_data_check_overflow(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset) +{ + __s64 size = btf__resolve_size(d->btf, id); + + if (size < 0 || size >= INT_MAX) { + pr_warn("unexpected size [%zu] for id [%u]\n", + (size_t)size, id); + return -EINVAL; + } + + /* Only do overflow checking for base types; we do not want to + * avoid showing part of a struct, union or array, even if we + * do not have enough data to show the full object. By + * restricting overflow checking to base types we can ensure + * that partial display succeeds, while avoiding overflowing + * and using bogus data for display. + */ + t = skip_mods_and_typedefs(d->btf, id, NULL); + if (!t) { + pr_warn("unexpected error skipping mods/typedefs for id [%u]\n", + id); + return -EINVAL; + } + + switch (btf_kind(t)) { + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_PTR: + case BTF_KIND_ENUM: + if (data + bits_offset / 8 + size > d->typed_dump->data_end) + return -E2BIG; + break; + default: + break; + } + return (int)size; +} + +static int btf_dump_type_data_check_zero(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + __s64 value; + int i, err; + + /* toplevel exceptions; we show zero values if + * - we ask for them (emit_zeros) + * - if we are at top-level so we see "struct empty { }" + * - or if we are an array member and the array is non-empty and + * not a char array; we don't want to be in a situation where we + * have an integer array 0, 1, 0, 1 and only show non-zero values. + * If the array contains zeroes only, or is a char array starting + * with a '\0', the array-level check_zero() will prevent showing it; + * we are concerned with determining zero value at the array member + * level here. + */ + if (d->typed_dump->emit_zeroes || d->typed_dump->depth == 0 || + (d->typed_dump->is_array_member && + !d->typed_dump->is_array_char)) + return 0; + + t = skip_mods_and_typedefs(d->btf, id, NULL); + + switch (btf_kind(t)) { + case BTF_KIND_INT: + if (bit_sz) + return btf_dump_bitfield_check_zero(d, t, data, bits_offset, bit_sz); + return btf_dump_base_type_check_zero(d, t, id, data); + case BTF_KIND_FLOAT: + case BTF_KIND_PTR: + return btf_dump_base_type_check_zero(d, t, id, data); + case BTF_KIND_ARRAY: { + const struct btf_array *array = btf_array(t); + const struct btf_type *elem_type; + __u32 elem_type_id, elem_size; + bool ischar; + + elem_type_id = array->type; + elem_size = btf__resolve_size(d->btf, elem_type_id); + elem_type = skip_mods_and_typedefs(d->btf, elem_type_id, NULL); + + ischar = btf_is_int(elem_type) && elem_size == 1; + + /* check all elements; if _any_ element is nonzero, all + * of array is displayed. We make an exception however + * for char arrays where the first element is 0; these + * are considered zeroed also, even if later elements are + * non-zero because the string is terminated. + */ + for (i = 0; i < array->nelems; i++) { + if (i == 0 && ischar && *(char *)data == 0) + return -ENODATA; + err = btf_dump_type_data_check_zero(d, elem_type, + elem_type_id, + data + + (i * elem_size), + bits_offset, 0); + if (err != -ENODATA) + return err; + } + return -ENODATA; + } + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + __u16 n = btf_vlen(t); + + /* if any struct/union member is non-zero, the struct/union + * is considered non-zero and dumped. + */ + for (i = 0; i < n; i++, m++) { + const struct btf_type *mtype; + __u32 moffset; + + mtype = btf__type_by_id(d->btf, m->type); + moffset = btf_member_bit_offset(t, i); + + /* btf_int_bits() does not store member bitfield size; + * bitfield size needs to be stored here so int display + * of member can retrieve it. + */ + bit_sz = btf_member_bitfield_size(t, i); + err = btf_dump_type_data_check_zero(d, mtype, m->type, data + moffset / 8, + moffset % 8, bit_sz); + if (err != ENODATA) + return err; + } + return -ENODATA; + } + case BTF_KIND_ENUM: + err = btf_dump_get_enum_value(d, t, data, id, &value); + if (err) + return err; + if (value == 0) + return -ENODATA; + return 0; + default: + return 0; + } +} + +/* returns size of data dumped, or error. */ +static int btf_dump_dump_type_data(struct btf_dump *d, + const char *fname, + const struct btf_type *t, + __u32 id, + const void *data, + __u8 bits_offset, + __u8 bit_sz) +{ + int size, err; + + size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset); + if (size < 0) + return size; + err = btf_dump_type_data_check_zero(d, t, id, data, bits_offset, bit_sz); + if (err) { + /* zeroed data is expected and not an error, so simply skip + * dumping such data. Record other errors however. + */ + if (err == -ENODATA) + return size; + return err; + } + btf_dump_data_pfx(d); + + if (!d->typed_dump->skip_names) { + if (fname && strlen(fname) > 0) + btf_dump_printf(d, ".%s = ", fname); + btf_dump_emit_type_cast(d, id, true); + } + + t = skip_mods_and_typedefs(d->btf, id, NULL); + + switch (btf_kind(t)) { + case BTF_KIND_UNKN: + case BTF_KIND_FWD: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + err = btf_dump_unsupported_data(d, t, id); + break; + case BTF_KIND_INT: + if (bit_sz) + err = btf_dump_bitfield_data(d, t, data, bits_offset, bit_sz); + else + err = btf_dump_int_data(d, t, id, data, bits_offset); + break; + case BTF_KIND_FLOAT: + err = btf_dump_float_data(d, t, id, data); + break; + case BTF_KIND_PTR: + err = btf_dump_ptr_data(d, t, id, data); + break; + case BTF_KIND_ARRAY: + err = btf_dump_array_data(d, t, id, data); + break; + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + err = btf_dump_struct_data(d, t, id, data); + break; + case BTF_KIND_ENUM: + /* handle bitfield and int enum values */ + if (bit_sz) { + __u64 print_num; + __s64 enum_val; + + err = btf_dump_get_bitfield_value(d, t, data, bits_offset, bit_sz, + &print_num); + if (err) + break; + enum_val = (__s64)print_num; + err = btf_dump_enum_data(d, t, id, &enum_val); + } else + err = btf_dump_enum_data(d, t, id, data); + break; + case BTF_KIND_VAR: + err = btf_dump_var_data(d, t, id, data); + break; + case BTF_KIND_DATASEC: + err = btf_dump_datasec_data(d, t, id, data); + break; + default: + pr_warn("unexpected kind [%u] for id [%u]\n", + BTF_INFO_KIND(t->info), id); + return -EINVAL; + } + if (err < 0) + return err; + return size; +} + +int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, + const void *data, size_t data_sz, + const struct btf_dump_type_data_opts *opts) +{ + struct btf_dump_data typed_dump = {}; + const struct btf_type *t; + int ret; + + if (!OPTS_VALID(opts, btf_dump_type_data_opts)) + return libbpf_err(-EINVAL); + + t = btf__type_by_id(d->btf, id); + if (!t) + return libbpf_err(-ENOENT); + + d->typed_dump = &typed_dump; + d->typed_dump->data_end = data + data_sz; + d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0); + + /* default indent string is a tab */ + if (!opts->indent_str) + d->typed_dump->indent_str[0] = '\t'; + else + strncat(d->typed_dump->indent_str, opts->indent_str, + sizeof(d->typed_dump->indent_str) - 1); + + d->typed_dump->compact = OPTS_GET(opts, compact, false); + d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); + d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + + ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); + + d->typed_dump = NULL; + + return libbpf_err(ret); +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4ccfae30e681..cb106e8c42cb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -498,6 +498,10 @@ struct bpf_object { * it at load time. */ struct btf *btf_vmlinux; + /* Path to the custom BTF to be used for BPF CO-RE relocations as an + * override for vmlinux BTF. + */ + char *btf_custom_path; /* vmlinux BTF override for CO-RE relocations */ struct btf *btf_vmlinux_override; /* Lazily initialized kernel module BTFs */ @@ -591,11 +595,6 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn) insn->off == 0; } -static bool is_ldimm64_insn(struct bpf_insn *insn) -{ - return insn->code == (BPF_LD | BPF_IMM | BPF_DW); -} - static bool is_call_insn(const struct bpf_insn *insn) { return insn->code == (BPF_JMP | BPF_CALL); @@ -2645,8 +2644,10 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) struct bpf_program *prog; int i; - /* CO-RE relocations need kernel BTF */ - if (obj->btf_ext && obj->btf_ext->core_relo_info.len) + /* CO-RE relocations need kernel BTF, only when btf_custom_path + * is not specified + */ + if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path) return true; /* Support for typed ksyms needs kernel BTF */ @@ -2679,7 +2680,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) if (!force && !obj_needs_vmlinux_btf(obj)) return 0; - obj->btf_vmlinux = libbpf_find_kernel_btf(); + obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { pr_warn("Error loading vmlinux BTF: %d\n", err); @@ -2768,7 +2769,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) */ btf__set_fd(kern_btf, 0); } else { - err = btf__load(kern_btf); + err = btf__load_into_kernel(kern_btf); } if (sanitize) { if (!err) { @@ -4521,6 +4522,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b { struct bpf_create_map_attr create_attr; struct bpf_map_def *def = &map->def; + int err = 0; memset(&create_attr, 0, sizeof(create_attr)); @@ -4563,8 +4565,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { - int err; - err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", @@ -4589,8 +4589,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (map->fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { char *cp, errmsg[STRERR_BUFSIZE]; - int err = -errno; + err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", map->name, cp, err); @@ -4602,8 +4602,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b map->fd = bpf_create_map_xattr(&create_attr); } - if (map->fd < 0) - return -errno; + err = map->fd < 0 ? -errno : 0; if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) { if (obj->gen_loader) @@ -4612,7 +4611,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b zfree(&map->inner_map); } - return 0; + return err; } static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) @@ -4658,10 +4657,13 @@ bpf_object__create_maps(struct bpf_object *obj) char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; + bool retried; for (i = 0; i < obj->nr_maps; i++) { map = &obj->maps[i]; + retried = false; +retry: if (map->pin_path) { err = bpf_object__reuse_map(map); if (err) { @@ -4669,6 +4671,12 @@ bpf_object__create_maps(struct bpf_object *obj) map->name); goto err_out; } + if (retried && map->fd < 0) { + pr_warn("map '%s': cannot find pinned map\n", + map->name); + err = -ENOENT; + goto err_out; + } } if (map->fd >= 0) { @@ -4702,9 +4710,13 @@ bpf_object__create_maps(struct bpf_object *obj) if (map->pin_path && !map->pinned) { err = bpf_map__pin(map, NULL); if (err) { + zclose(map->fd); + if (!retried && err == -EEXIST) { + retried = true; + goto retry; + } pr_warn("map '%s': failed to auto-pin at '%s': %d\n", map->name, map->pin_path, err); - zclose(map->fd); goto err_out; } } @@ -4721,279 +4733,6 @@ err_out: return err; } -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { - __u32 type_id; /* struct/union type or array element type */ - __u32 idx; /* field index or array index */ - const char *name; /* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { - const struct btf *btf; - /* high-level spec: named fields and array indices only */ - struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; - /* original unresolved (no skip_mods_or_typedefs) root type ID */ - __u32 root_type_id; - /* CO-RE relocation kind */ - enum bpf_core_relo_kind relo_kind; - /* high-level spec length */ - int len; - /* raw, low-level spec: 1-to-1 with accessor spec string */ - int raw_spec[BPF_CORE_SPEC_MAX_LEN]; - /* raw spec length */ - int raw_len; - /* field bit offset represented by spec */ - __u32 bit_offset; -}; - -static bool str_is_empty(const char *s) -{ - return !s || !s[0]; -} - -static bool is_flex_arr(const struct btf *btf, - const struct bpf_core_accessor *acc, - const struct btf_array *arr) -{ - const struct btf_type *t; - - /* not a flexible array, if not inside a struct or has non-zero size */ - if (!acc->name || arr->nelems > 0) - return false; - - /* has to be the last member of enclosing struct */ - t = btf__type_by_id(btf, acc->type_id); - return acc->idx == btf_vlen(t) - 1; -} - -static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_FIELD_BYTE_OFFSET: return "byte_off"; - case BPF_FIELD_BYTE_SIZE: return "byte_sz"; - case BPF_FIELD_EXISTS: return "field_exists"; - case BPF_FIELD_SIGNED: return "signed"; - case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; - case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; - case BPF_TYPE_ID_LOCAL: return "local_type_id"; - case BPF_TYPE_ID_TARGET: return "target_type_id"; - case BPF_TYPE_EXISTS: return "type_exists"; - case BPF_TYPE_SIZE: return "type_size"; - case BPF_ENUMVAL_EXISTS: return "enumval_exists"; - case BPF_ENUMVAL_VALUE: return "enumval_value"; - default: return "unknown"; - } -} - -static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_FIELD_BYTE_OFFSET: - case BPF_FIELD_BYTE_SIZE: - case BPF_FIELD_EXISTS: - case BPF_FIELD_SIGNED: - case BPF_FIELD_LSHIFT_U64: - case BPF_FIELD_RSHIFT_U64: - return true; - default: - return false; - } -} - -static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_TYPE_ID_LOCAL: - case BPF_TYPE_ID_TARGET: - case BPF_TYPE_EXISTS: - case BPF_TYPE_SIZE: - return true; - default: - return false; - } -} - -static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) -{ - switch (kind) { - case BPF_ENUMVAL_EXISTS: - case BPF_ENUMVAL_VALUE: - return true; - default: - return false; - } -} - -/* - * Turn bpf_core_relo into a low- and high-level spec representation, - * validating correctness along the way, as well as calculating resulting - * field bit offset, specified by accessor string. Low-level spec captures - * every single level of nestedness, including traversing anonymous - * struct/union members. High-level one only captures semantically meaningful - * "turning points": named fields and array indicies. - * E.g., for this case: - * - * struct sample { - * int __unimportant; - * struct { - * int __1; - * int __2; - * int a[7]; - * }; - * }; - * - * struct sample *s = ...; - * - * int x = &s->a[3]; // access string = '0:1:2:3' - * - * Low-level spec has 1:1 mapping with each element of access string (it's - * just a parsed access string representation): [0, 1, 2, 3]. - * - * High-level spec will capture only 3 points: - * - intial zero-index access by pointer (&s->... is the same as &s[0]...); - * - field 'a' access (corresponds to '2' in low-level spec); - * - array element #3 access (corresponds to '3' in low-level spec). - * - * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, - * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their - * spec and raw_spec are kept empty. - * - * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access - * string to specify enumerator's value index that need to be relocated. - */ -static int bpf_core_parse_spec(const struct btf *btf, - __u32 type_id, - const char *spec_str, - enum bpf_core_relo_kind relo_kind, - struct bpf_core_spec *spec) -{ - int access_idx, parsed_len, i; - struct bpf_core_accessor *acc; - const struct btf_type *t; - const char *name; - __u32 id; - __s64 sz; - - if (str_is_empty(spec_str) || *spec_str == ':') - return -EINVAL; - - memset(spec, 0, sizeof(*spec)); - spec->btf = btf; - spec->root_type_id = type_id; - spec->relo_kind = relo_kind; - - /* type-based relocations don't have a field access string */ - if (core_relo_is_type_based(relo_kind)) { - if (strcmp(spec_str, "0")) - return -EINVAL; - return 0; - } - - /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ - while (*spec_str) { - if (*spec_str == ':') - ++spec_str; - if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) - return -EINVAL; - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - spec_str += parsed_len; - spec->raw_spec[spec->raw_len++] = access_idx; - } - - if (spec->raw_len == 0) - return -EINVAL; - - t = skip_mods_and_typedefs(btf, type_id, &id); - if (!t) - return -EINVAL; - - access_idx = spec->raw_spec[0]; - acc = &spec->spec[0]; - acc->type_id = id; - acc->idx = access_idx; - spec->len++; - - if (core_relo_is_enumval_based(relo_kind)) { - if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) - return -EINVAL; - - /* record enumerator name in a first accessor */ - acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); - return 0; - } - - if (!core_relo_is_field_based(relo_kind)) - return -EINVAL; - - sz = btf__resolve_size(btf, id); - if (sz < 0) - return sz; - spec->bit_offset = access_idx * sz * 8; - - for (i = 1; i < spec->raw_len; i++) { - t = skip_mods_and_typedefs(btf, id, &id); - if (!t) - return -EINVAL; - - access_idx = spec->raw_spec[i]; - acc = &spec->spec[spec->len]; - - if (btf_is_composite(t)) { - const struct btf_member *m; - __u32 bit_offset; - - if (access_idx >= btf_vlen(t)) - return -EINVAL; - - bit_offset = btf_member_bit_offset(t, access_idx); - spec->bit_offset += bit_offset; - - m = btf_members(t) + access_idx; - if (m->name_off) { - name = btf__name_by_offset(btf, m->name_off); - if (str_is_empty(name)) - return -EINVAL; - - acc->type_id = id; - acc->idx = access_idx; - acc->name = name; - spec->len++; - } - - id = m->type; - } else if (btf_is_array(t)) { - const struct btf_array *a = btf_array(t); - bool flex; - - t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t) - return -EINVAL; - - flex = is_flex_arr(btf, acc - 1, a); - if (!flex && access_idx >= a->nelems) - return -EINVAL; - - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->len++; - - sz = btf__resolve_size(btf, id); - if (sz < 0) - return sz; - spec->bit_offset += access_idx * sz * 8; - } else { - pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", - type_id, spec_str, i, id, btf_kind_str(t)); - return -EINVAL; - } - } - - return 0; -} - static bool bpf_core_is_flavor_sep(const char *s) { /* check X___Y name pattern, where X and Y are not underscores */ @@ -5006,7 +4745,7 @@ static bool bpf_core_is_flavor_sep(const char *s) * before last triple underscore. Struct name part after last triple * underscore is ignored by BPF CO-RE relocation during relocation matching. */ -static size_t bpf_core_essential_name_len(const char *name) +size_t bpf_core_essential_name_len(const char *name) { size_t n = strlen(name); int i; @@ -5018,34 +4757,20 @@ static size_t bpf_core_essential_name_len(const char *name) return n; } -struct core_cand -{ - const struct btf *btf; - const struct btf_type *t; - const char *name; - __u32 id; -}; - -/* dynamically sized list of type IDs and its associated struct btf */ -struct core_cand_list { - struct core_cand *cands; - int len; -}; - -static void bpf_core_free_cands(struct core_cand_list *cands) +static void bpf_core_free_cands(struct bpf_core_cand_list *cands) { free(cands->cands); free(cands); } -static int bpf_core_add_cands(struct core_cand *local_cand, +static int bpf_core_add_cands(struct bpf_core_cand *local_cand, size_t local_essent_len, const struct btf *targ_btf, const char *targ_btf_name, int targ_start_id, - struct core_cand_list *cands) + struct bpf_core_cand_list *cands) { - struct core_cand *new_cands, *cand; + struct bpf_core_cand *new_cands, *cand; const struct btf_type *t; const char *targ_name; size_t targ_essent_len; @@ -5181,11 +4906,11 @@ err_out: return 0; } -static struct core_cand_list * +static struct bpf_core_cand_list * bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id) { - struct core_cand local_cand = {}; - struct core_cand_list *cands; + struct bpf_core_cand local_cand = {}; + struct bpf_core_cand_list *cands; const struct btf *main_btf; size_t local_essent_len; int err, i; @@ -5239,165 +4964,6 @@ err_out: return ERR_PTR(err); } -/* Check two types for compatibility for the purpose of field access - * relocation. const/volatile/restrict and typedefs are skipped to ensure we - * are relocating semantically compatible entities: - * - any two STRUCTs/UNIONs are compatible and can be mixed; - * - any two FWDs are compatible, if their names match (modulo flavor suffix); - * - any two PTRs are always compatible; - * - for ENUMs, names should be the same (ignoring flavor suffix) or at - * least one of enums should be anonymous; - * - for ENUMs, check sizes, names are ignored; - * - for INT, size and signedness are ignored; - * - any two FLOATs are always compatible; - * - for ARRAY, dimensionality is ignored, element types are checked for - * compatibility recursively; - * - everything else shouldn't be ever a target of relocation. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - */ -static int bpf_core_fields_are_compat(const struct btf *local_btf, - __u32 local_id, - const struct btf *targ_btf, - __u32 targ_id) -{ - const struct btf_type *local_type, *targ_type; - -recur: - local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!local_type || !targ_type) - return -EINVAL; - - if (btf_is_composite(local_type) && btf_is_composite(targ_type)) - return 1; - if (btf_kind(local_type) != btf_kind(targ_type)) - return 0; - - switch (btf_kind(local_type)) { - case BTF_KIND_PTR: - case BTF_KIND_FLOAT: - return 1; - case BTF_KIND_FWD: - case BTF_KIND_ENUM: { - const char *local_name, *targ_name; - size_t local_len, targ_len; - - local_name = btf__name_by_offset(local_btf, - local_type->name_off); - targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); - local_len = bpf_core_essential_name_len(local_name); - targ_len = bpf_core_essential_name_len(targ_name); - /* one of them is anonymous or both w/ same flavor-less names */ - return local_len == 0 || targ_len == 0 || - (local_len == targ_len && - strncmp(local_name, targ_name, local_len) == 0); - } - case BTF_KIND_INT: - /* just reject deprecated bitfield-like integers; all other - * integers are by default compatible between each other - */ - return btf_int_offset(local_type) == 0 && - btf_int_offset(targ_type) == 0; - case BTF_KIND_ARRAY: - local_id = btf_array(local_type)->type; - targ_id = btf_array(targ_type)->type; - goto recur; - default: - pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); - return 0; - } -} - -/* - * Given single high-level named field accessor in local type, find - * corresponding high-level accessor for a target type. Along the way, - * maintain low-level spec for target as well. Also keep updating target - * bit offset. - * - * Searching is performed through recursive exhaustive enumeration of all - * fields of a struct/union. If there are any anonymous (embedded) - * structs/unions, they are recursively searched as well. If field with - * desired name is found, check compatibility between local and target types, - * before returning result. - * - * 1 is returned, if field is found. - * 0 is returned if no compatible field is found. - * <0 is returned on error. - */ -static int bpf_core_match_member(const struct btf *local_btf, - const struct bpf_core_accessor *local_acc, - const struct btf *targ_btf, - __u32 targ_id, - struct bpf_core_spec *spec, - __u32 *next_targ_id) -{ - const struct btf_type *local_type, *targ_type; - const struct btf_member *local_member, *m; - const char *local_name, *targ_name; - __u32 local_id; - int i, n, found; - - targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); - if (!targ_type) - return -EINVAL; - if (!btf_is_composite(targ_type)) - return 0; - - local_id = local_acc->type_id; - local_type = btf__type_by_id(local_btf, local_id); - local_member = btf_members(local_type) + local_acc->idx; - local_name = btf__name_by_offset(local_btf, local_member->name_off); - - n = btf_vlen(targ_type); - m = btf_members(targ_type); - for (i = 0; i < n; i++, m++) { - __u32 bit_offset; - - bit_offset = btf_member_bit_offset(targ_type, i); - - /* too deep struct/union/array nesting */ - if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - - /* speculate this member will be the good one */ - spec->bit_offset += bit_offset; - spec->raw_spec[spec->raw_len++] = i; - - targ_name = btf__name_by_offset(targ_btf, m->name_off); - if (str_is_empty(targ_name)) { - /* embedded struct/union, we need to go deeper */ - found = bpf_core_match_member(local_btf, local_acc, - targ_btf, m->type, - spec, next_targ_id); - if (found) /* either found or error */ - return found; - } else if (strcmp(local_name, targ_name) == 0) { - /* matching named field */ - struct bpf_core_accessor *targ_acc; - - targ_acc = &spec->spec[spec->len++]; - targ_acc->type_id = targ_id; - targ_acc->idx = i; - targ_acc->name = targ_name; - - *next_targ_id = m->type; - found = bpf_core_fields_are_compat(local_btf, - local_member->type, - targ_btf, m->type); - if (!found) - spec->len--; /* pop accessor */ - return found; - } - /* member turned out not to be what we looked for */ - spec->bit_offset -= bit_offset; - spec->raw_len--; - } - - return 0; -} - /* Check local and target types for compatibility. This check is used for * type-based CO-RE relocations and follow slightly different rules than * field-based relocations. This function assumes that root types were already @@ -5417,8 +4983,8 @@ static int bpf_core_match_member(const struct btf *local_btf, * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations. */ -static int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, - const struct btf *targ_btf, __u32 targ_id) +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id) { const struct btf_type *local_type, *targ_type; int depth = 32; /* max recursion depth */ @@ -5492,671 +5058,6 @@ recur: } } -/* - * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + bit offset). - */ -static int bpf_core_spec_match(struct bpf_core_spec *local_spec, - const struct btf *targ_btf, __u32 targ_id, - struct bpf_core_spec *targ_spec) -{ - const struct btf_type *targ_type; - const struct bpf_core_accessor *local_acc; - struct bpf_core_accessor *targ_acc; - int i, sz, matched; - - memset(targ_spec, 0, sizeof(*targ_spec)); - targ_spec->btf = targ_btf; - targ_spec->root_type_id = targ_id; - targ_spec->relo_kind = local_spec->relo_kind; - - if (core_relo_is_type_based(local_spec->relo_kind)) { - return bpf_core_types_are_compat(local_spec->btf, - local_spec->root_type_id, - targ_btf, targ_id); - } - - local_acc = &local_spec->spec[0]; - targ_acc = &targ_spec->spec[0]; - - if (core_relo_is_enumval_based(local_spec->relo_kind)) { - size_t local_essent_len, targ_essent_len; - const struct btf_enum *e; - const char *targ_name; - - /* has to resolve to an enum */ - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); - if (!btf_is_enum(targ_type)) - return 0; - - local_essent_len = bpf_core_essential_name_len(local_acc->name); - - for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { - targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); - targ_essent_len = bpf_core_essential_name_len(targ_name); - if (targ_essent_len != local_essent_len) - continue; - if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { - targ_acc->type_id = targ_id; - targ_acc->idx = i; - targ_acc->name = targ_name; - targ_spec->len++; - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; - targ_spec->raw_len++; - return 1; - } - } - return 0; - } - - if (!core_relo_is_field_based(local_spec->relo_kind)) - return -EINVAL; - - for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { - targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, - &targ_id); - if (!targ_type) - return -EINVAL; - - if (local_acc->name) { - matched = bpf_core_match_member(local_spec->btf, - local_acc, - targ_btf, targ_id, - targ_spec, &targ_id); - if (matched <= 0) - return matched; - } else { - /* for i=0, targ_id is already treated as array element - * type (because it's the original struct), for others - * we should find array element type first - */ - if (i > 0) { - const struct btf_array *a; - bool flex; - - if (!btf_is_array(targ_type)) - return 0; - - a = btf_array(targ_type); - flex = is_flex_arr(targ_btf, targ_acc - 1, a); - if (!flex && local_acc->idx >= a->nelems) - return 0; - if (!skip_mods_and_typedefs(targ_btf, a->type, - &targ_id)) - return -EINVAL; - } - - /* too deep struct/union/array nesting */ - if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) - return -E2BIG; - - targ_acc->type_id = targ_id; - targ_acc->idx = local_acc->idx; - targ_acc->name = NULL; - targ_spec->len++; - targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; - targ_spec->raw_len++; - - sz = btf__resolve_size(targ_btf, targ_id); - if (sz < 0) - return sz; - targ_spec->bit_offset += local_acc->idx * sz * 8; - } - } - - return 1; -} - -static int bpf_core_calc_field_relo(const struct bpf_program *prog, - const struct bpf_core_relo *relo, - const struct bpf_core_spec *spec, - __u32 *val, __u32 *field_sz, __u32 *type_id, - bool *validate) -{ - const struct bpf_core_accessor *acc; - const struct btf_type *t; - __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; - const struct btf_member *m; - const struct btf_type *mt; - bool bitfield; - __s64 sz; - - *field_sz = 0; - - if (relo->kind == BPF_FIELD_EXISTS) { - *val = spec ? 1 : 0; - return 0; - } - - if (!spec) - return -EUCLEAN; /* request instruction poisoning */ - - acc = &spec->spec[spec->len - 1]; - t = btf__type_by_id(spec->btf, acc->type_id); - - /* a[n] accessor needs special handling */ - if (!acc->name) { - if (relo->kind == BPF_FIELD_BYTE_OFFSET) { - *val = spec->bit_offset / 8; - /* remember field size for load/store mem size */ - sz = btf__resolve_size(spec->btf, acc->type_id); - if (sz < 0) - return -EINVAL; - *field_sz = sz; - *type_id = acc->type_id; - } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { - sz = btf__resolve_size(spec->btf, acc->type_id); - if (sz < 0) - return -EINVAL; - *val = sz; - } else { - pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", - prog->name, relo->kind, relo->insn_off / 8); - return -EINVAL; - } - if (validate) - *validate = true; - return 0; - } - - m = btf_members(t) + acc->idx; - mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); - bit_off = spec->bit_offset; - bit_sz = btf_member_bitfield_size(t, acc->idx); - - bitfield = bit_sz > 0; - if (bitfield) { - byte_sz = mt->size; - byte_off = bit_off / 8 / byte_sz * byte_sz; - /* figure out smallest int size necessary for bitfield load */ - while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { - if (byte_sz >= 8) { - /* bitfield can't be read with 64-bit read */ - pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", - prog->name, relo->kind, relo->insn_off / 8); - return -E2BIG; - } - byte_sz *= 2; - byte_off = bit_off / 8 / byte_sz * byte_sz; - } - } else { - sz = btf__resolve_size(spec->btf, field_type_id); - if (sz < 0) - return -EINVAL; - byte_sz = sz; - byte_off = spec->bit_offset / 8; - bit_sz = byte_sz * 8; - } - - /* for bitfields, all the relocatable aspects are ambiguous and we - * might disagree with compiler, so turn off validation of expected - * value, except for signedness - */ - if (validate) - *validate = !bitfield; - - switch (relo->kind) { - case BPF_FIELD_BYTE_OFFSET: - *val = byte_off; - if (!bitfield) { - *field_sz = byte_sz; - *type_id = field_type_id; - } - break; - case BPF_FIELD_BYTE_SIZE: - *val = byte_sz; - break; - case BPF_FIELD_SIGNED: - /* enums will be assumed unsigned */ - *val = btf_is_enum(mt) || - (btf_int_encoding(mt) & BTF_INT_SIGNED); - if (validate) - *validate = true; /* signedness is never ambiguous */ - break; - case BPF_FIELD_LSHIFT_U64: -#if __BYTE_ORDER == __LITTLE_ENDIAN - *val = 64 - (bit_off + bit_sz - byte_off * 8); -#else - *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); -#endif - break; - case BPF_FIELD_RSHIFT_U64: - *val = 64 - bit_sz; - if (validate) - *validate = true; /* right shift is never ambiguous */ - break; - case BPF_FIELD_EXISTS: - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, - const struct bpf_core_spec *spec, - __u32 *val) -{ - __s64 sz; - - /* type-based relos return zero when target type is not found */ - if (!spec) { - *val = 0; - return 0; - } - - switch (relo->kind) { - case BPF_TYPE_ID_TARGET: - *val = spec->root_type_id; - break; - case BPF_TYPE_EXISTS: - *val = 1; - break; - case BPF_TYPE_SIZE: - sz = btf__resolve_size(spec->btf, spec->root_type_id); - if (sz < 0) - return -EINVAL; - *val = sz; - break; - case BPF_TYPE_ID_LOCAL: - /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ - default: - return -EOPNOTSUPP; - } - - return 0; -} - -static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, - const struct bpf_core_spec *spec, - __u32 *val) -{ - const struct btf_type *t; - const struct btf_enum *e; - - switch (relo->kind) { - case BPF_ENUMVAL_EXISTS: - *val = spec ? 1 : 0; - break; - case BPF_ENUMVAL_VALUE: - if (!spec) - return -EUCLEAN; /* request instruction poisoning */ - t = btf__type_by_id(spec->btf, spec->spec[0].type_id); - e = btf_enum(t) + spec->spec[0].idx; - *val = e->val; - break; - default: - return -EOPNOTSUPP; - } - - return 0; -} - -struct bpf_core_relo_res -{ - /* expected value in the instruction, unless validate == false */ - __u32 orig_val; - /* new value that needs to be patched up to */ - __u32 new_val; - /* relocation unsuccessful, poison instruction, but don't fail load */ - bool poison; - /* some relocations can't be validated against orig_val */ - bool validate; - /* for field byte offset relocations or the forms: - * *(T *)(rX + <off>) = rY - * rX = *(T *)(rY + <off>), - * we remember original and resolved field size to adjust direct - * memory loads of pointers and integers; this is necessary for 32-bit - * host kernel architectures, but also allows to automatically - * relocate fields that were resized from, e.g., u32 to u64, etc. - */ - bool fail_memsz_adjust; - __u32 orig_sz; - __u32 orig_type_id; - __u32 new_sz; - __u32 new_type_id; -}; - -/* Calculate original and target relocation values, given local and target - * specs and relocation kind. These values are calculated for each candidate. - * If there are multiple candidates, resulting values should all be consistent - * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. - * If instruction has to be poisoned, *poison will be set to true. - */ -static int bpf_core_calc_relo(const struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct bpf_core_spec *local_spec, - const struct bpf_core_spec *targ_spec, - struct bpf_core_relo_res *res) -{ - int err = -EOPNOTSUPP; - - res->orig_val = 0; - res->new_val = 0; - res->poison = false; - res->validate = true; - res->fail_memsz_adjust = false; - res->orig_sz = res->new_sz = 0; - res->orig_type_id = res->new_type_id = 0; - - if (core_relo_is_field_based(relo->kind)) { - err = bpf_core_calc_field_relo(prog, relo, local_spec, - &res->orig_val, &res->orig_sz, - &res->orig_type_id, &res->validate); - err = err ?: bpf_core_calc_field_relo(prog, relo, targ_spec, - &res->new_val, &res->new_sz, - &res->new_type_id, NULL); - if (err) - goto done; - /* Validate if it's safe to adjust load/store memory size. - * Adjustments are performed only if original and new memory - * sizes differ. - */ - res->fail_memsz_adjust = false; - if (res->orig_sz != res->new_sz) { - const struct btf_type *orig_t, *new_t; - - orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); - new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); - - /* There are two use cases in which it's safe to - * adjust load/store's mem size: - * - reading a 32-bit kernel pointer, while on BPF - * size pointers are always 64-bit; in this case - * it's safe to "downsize" instruction size due to - * pointer being treated as unsigned integer with - * zero-extended upper 32-bits; - * - reading unsigned integers, again due to - * zero-extension is preserving the value correctly. - * - * In all other cases it's incorrect to attempt to - * load/store field because read value will be - * incorrect, so we poison relocated instruction. - */ - if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) - goto done; - if (btf_is_int(orig_t) && btf_is_int(new_t) && - btf_int_encoding(orig_t) != BTF_INT_SIGNED && - btf_int_encoding(new_t) != BTF_INT_SIGNED) - goto done; - - /* mark as invalid mem size adjustment, but this will - * only be checked for LDX/STX/ST insns - */ - res->fail_memsz_adjust = true; - } - } else if (core_relo_is_type_based(relo->kind)) { - err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); - err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); - } else if (core_relo_is_enumval_based(relo->kind)) { - err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); - err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); - } - -done: - if (err == -EUCLEAN) { - /* EUCLEAN is used to signal instruction poisoning request */ - res->poison = true; - err = 0; - } else if (err == -EOPNOTSUPP) { - /* EOPNOTSUPP means unknown/unsupported relocation */ - pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", - prog->name, relo_idx, core_relo_kind_str(relo->kind), - relo->kind, relo->insn_off / 8); - } - - return err; -} - -/* - * Turn instruction for which CO_RE relocation failed into invalid one with - * distinct signature. - */ -static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx, - int insn_idx, struct bpf_insn *insn) -{ - pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", - prog->name, relo_idx, insn_idx); - insn->code = BPF_JMP | BPF_CALL; - insn->dst_reg = 0; - insn->src_reg = 0; - insn->off = 0; - /* if this instruction is reachable (not a dead code), - * verifier will complain with the following message: - * invalid func unknown#195896080 - */ - insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ -} - -static int insn_bpf_size_to_bytes(struct bpf_insn *insn) -{ - switch (BPF_SIZE(insn->code)) { - case BPF_DW: return 8; - case BPF_W: return 4; - case BPF_H: return 2; - case BPF_B: return 1; - default: return -1; - } -} - -static int insn_bytes_to_bpf_size(__u32 sz) -{ - switch (sz) { - case 8: return BPF_DW; - case 4: return BPF_W; - case 2: return BPF_H; - case 1: return BPF_B; - default: return -1; - } -} - -/* - * Patch relocatable BPF instruction. - * - * Patched value is determined by relocation kind and target specification. - * For existence relocations target spec will be NULL if field/type is not found. - * Expected insn->imm value is determined using relocation kind and local - * spec, and is checked before patching instruction. If actual insn->imm value - * is wrong, bail out with error. - * - * Currently supported classes of BPF instruction are: - * 1. rX = <imm> (assignment with immediate operand); - * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. rX = <imm64> (load with 64-bit immediate value); - * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; - * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; - * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. - */ -static int bpf_core_patch_insn(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct bpf_core_relo_res *res) -{ - __u32 orig_val, new_val; - struct bpf_insn *insn; - int insn_idx; - __u8 class; - - if (relo->insn_off % BPF_INSN_SZ) - return -EINVAL; - insn_idx = relo->insn_off / BPF_INSN_SZ; - /* adjust insn_idx from section frame of reference to the local - * program's frame of reference; (sub-)program code is not yet - * relocated, so it's enough to just subtract in-section offset - */ - insn_idx = insn_idx - prog->sec_insn_off; - insn = &prog->insns[insn_idx]; - class = BPF_CLASS(insn->code); - - if (res->poison) { -poison: - /* poison second part of ldimm64 to avoid confusing error from - * verifier about "unknown opcode 00" - */ - if (is_ldimm64_insn(insn)) - bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1); - bpf_core_poison_insn(prog, relo_idx, insn_idx, insn); - return 0; - } - - orig_val = res->orig_val; - new_val = res->new_val; - - switch (class) { - case BPF_ALU: - case BPF_ALU64: - if (BPF_SRC(insn->code) != BPF_K) - return -EINVAL; - if (res->validate && insn->imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", - prog->name, relo_idx, - insn_idx, insn->imm, orig_val, new_val); - return -EINVAL; - } - orig_val = insn->imm; - insn->imm = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", - prog->name, relo_idx, insn_idx, - orig_val, new_val); - break; - case BPF_LDX: - case BPF_ST: - case BPF_STX: - if (res->validate && insn->off != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", - prog->name, relo_idx, insn_idx, insn->off, orig_val, new_val); - return -EINVAL; - } - if (new_val > SHRT_MAX) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", - prog->name, relo_idx, insn_idx, new_val); - return -ERANGE; - } - if (res->fail_memsz_adjust) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " - "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", - prog->name, relo_idx, insn_idx); - goto poison; - } - - orig_val = insn->off; - insn->off = new_val; - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", - prog->name, relo_idx, insn_idx, orig_val, new_val); - - if (res->new_sz != res->orig_sz) { - int insn_bytes_sz, insn_bpf_sz; - - insn_bytes_sz = insn_bpf_size_to_bytes(insn); - if (insn_bytes_sz != res->orig_sz) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", - prog->name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); - return -EINVAL; - } - - insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); - if (insn_bpf_sz < 0) { - pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", - prog->name, relo_idx, insn_idx, res->new_sz); - return -EINVAL; - } - - insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); - pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", - prog->name, relo_idx, insn_idx, res->orig_sz, res->new_sz); - } - break; - case BPF_LD: { - __u64 imm; - - if (!is_ldimm64_insn(insn) || - insn[0].src_reg != 0 || insn[0].off != 0 || - insn_idx + 1 >= prog->insns_cnt || - insn[1].code != 0 || insn[1].dst_reg != 0 || - insn[1].src_reg != 0 || insn[1].off != 0) { - pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", - prog->name, relo_idx, insn_idx); - return -EINVAL; - } - - imm = insn[0].imm + ((__u64)insn[1].imm << 32); - if (res->validate && imm != orig_val) { - pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", - prog->name, relo_idx, - insn_idx, (unsigned long long)imm, - orig_val, new_val); - return -EINVAL; - } - - insn[0].imm = new_val; - insn[1].imm = 0; /* currently only 32-bit values are supported */ - pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", - prog->name, relo_idx, insn_idx, - (unsigned long long)imm, new_val); - break; - } - default: - pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", - prog->name, relo_idx, insn_idx, insn->code, - insn->src_reg, insn->dst_reg, insn->off, insn->imm); - return -EINVAL; - } - - return 0; -} - -/* Output spec definition in the format: - * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, - * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b - */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) -{ - const struct btf_type *t; - const struct btf_enum *e; - const char *s; - __u32 type_id; - int i; - - type_id = spec->root_type_id; - t = btf__type_by_id(spec->btf, type_id); - s = btf__name_by_offset(spec->btf, t->name_off); - - libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); - - if (core_relo_is_type_based(spec->relo_kind)) - return; - - if (core_relo_is_enumval_based(spec->relo_kind)) { - t = skip_mods_and_typedefs(spec->btf, type_id, NULL); - e = btf_enum(t) + spec->raw_spec[0]; - s = btf__name_by_offset(spec->btf, e->name_off); - - libbpf_print(level, "::%s = %u", s, e->val); - return; - } - - if (core_relo_is_field_based(spec->relo_kind)) { - for (i = 0; i < spec->len; i++) { - if (spec->spec[i].name) - libbpf_print(level, ".%s", spec->spec[i].name); - else if (i > 0 || spec->spec[i].idx > 0) - libbpf_print(level, "[%u]", spec->spec[i].idx); - } - - libbpf_print(level, " ("); - for (i = 0; i < spec->raw_len; i++) - libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); - - if (spec->bit_offset % 8) - libbpf_print(level, " @ offset %u.%u)", - spec->bit_offset / 8, spec->bit_offset % 8); - else - libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); - return; - } -} - static size_t bpf_core_hash_fn(const void *key, void *ctx) { return (size_t)key; @@ -6172,73 +5073,33 @@ static void *u32_as_hash_key(__u32 x) return (void *)(uintptr_t)x; } -/* - * CO-RE relocate single instruction. - * - * The outline and important points of the algorithm: - * 1. For given local type, find corresponding candidate target types. - * Candidate type is a type with the same "essential" name, ignoring - * everything after last triple underscore (___). E.g., `sample`, - * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates - * for each other. Names with triple underscore are referred to as - * "flavors" and are useful, among other things, to allow to - * specify/support incompatible variations of the same kernel struct, which - * might differ between different kernel versions and/or build - * configurations. - * - * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C - * converter, when deduplicated BTF of a kernel still contains more than - * one different types with the same name. In that case, ___2, ___3, etc - * are appended starting from second name conflict. But start flavors are - * also useful to be defined "locally", in BPF program, to extract same - * data from incompatible changes between different kernel - * versions/configurations. For instance, to handle field renames between - * kernel versions, one can use two flavors of the struct name with the - * same common name and use conditional relocations to extract that field, - * depending on target kernel version. - * 2. For each candidate type, try to match local specification to this - * candidate target type. Matching involves finding corresponding - * high-level spec accessors, meaning that all named fields should match, - * as well as all array accesses should be within the actual bounds. Also, - * types should be compatible (see bpf_core_fields_are_compat for details). - * 3. It is supported and expected that there might be multiple flavors - * matching the spec. As long as all the specs resolve to the same set of - * offsets across all candidates, there is no error. If there is any - * ambiguity, CO-RE relocation will fail. This is necessary to accomodate - * imprefection of BTF deduplication, which can cause slight duplication of - * the same BTF type, if some directly or indirectly referenced (by - * pointer) type gets resolved to different actual types in different - * object files. If such situation occurs, deduplicated BTF will end up - * with two (or more) structurally identical types, which differ only in - * types they refer to through pointer. This should be OK in most cases and - * is not an error. - * 4. Candidate types search is performed by linearly scanning through all - * types in target BTF. It is anticipated that this is overall more - * efficient memory-wise and not significantly worse (if not better) - * CPU-wise compared to prebuilding a map from all local type names to - * a list of candidate type names. It's also sped up by caching resolved - * list of matching candidates per each local "root" type ID, that has at - * least one bpf_core_relo associated with it. This list is shared - * between multiple relocations for the same type ID and is updated as some - * of the candidates are pruned due to structural incompatibility. - */ static int bpf_core_apply_relo(struct bpf_program *prog, const struct bpf_core_relo *relo, int relo_idx, const struct btf *local_btf, struct hashmap *cand_cache) { - struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; const void *type_key = u32_as_hash_key(relo->type_id); - struct bpf_core_relo_res cand_res, targ_res; + struct bpf_core_cand_list *cands = NULL; + const char *prog_name = prog->name; const struct btf_type *local_type; const char *local_name; - struct core_cand_list *cands = NULL; - __u32 local_id; - const char *spec_str; - int i, j, err; + __u32 local_id = relo->type_id; + struct bpf_insn *insn; + int insn_idx, err; + + if (relo->insn_off % BPF_INSN_SZ) + return -EINVAL; + insn_idx = relo->insn_off / BPF_INSN_SZ; + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; + if (insn_idx > prog->insns_cnt) + return -EINVAL; + insn = &prog->insns[insn_idx]; - local_id = relo->type_id; local_type = btf__type_by_id(local_btf, local_id); if (!local_type) return -EINVAL; @@ -6247,51 +5108,19 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!local_name) return -EINVAL; - spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - if (str_is_empty(spec_str)) - return -EINVAL; - if (prog->obj->gen_loader) { - pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n", + pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", prog - prog->obj->programs, relo->insn_off / 8, - local_name, spec_str, relo->kind); + local_name, relo->kind); return -ENOTSUP; } - err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); - if (err) { - pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", - prog->name, relo_idx, local_id, btf_kind_str(local_type), - str_is_empty(local_name) ? "<anon>" : local_name, - spec_str, err); - return -EINVAL; - } - - pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog->name, - relo_idx, core_relo_kind_str(relo->kind), relo->kind); - bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); - - /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ - if (relo->kind == BPF_TYPE_ID_LOCAL) { - targ_res.validate = true; - targ_res.poison = false; - targ_res.orig_val = local_spec.root_type_id; - targ_res.new_val = local_spec.root_type_id; - goto patch_insn; - } - /* libbpf doesn't support candidate search for anonymous types */ - if (str_is_empty(spec_str)) { - pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", - prog->name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); - return -EOPNOTSUPP; - } - - if (!hashmap__find(cand_cache, type_key, (void **)&cands)) { + if (relo->kind != BPF_TYPE_ID_LOCAL && + !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); if (IS_ERR(cands)) { pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n", - prog->name, relo_idx, local_id, btf_kind_str(local_type), + prog_name, relo_idx, local_id, btf_kind_str(local_type), local_name, PTR_ERR(cands)); return PTR_ERR(cands); } @@ -6302,97 +5131,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - for (i = 0, j = 0; i < cands->len; i++) { - err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, - cands->cands[i].id, &cand_spec); - if (err < 0) { - pr_warn("prog '%s': relo #%d: error matching candidate #%d ", - prog->name, relo_idx, i); - bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); - libbpf_print(LIBBPF_WARN, ": %d\n", err); - return err; - } - - pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog->name, - relo_idx, err == 0 ? "non-matching" : "matching", i); - bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); - libbpf_print(LIBBPF_DEBUG, "\n"); - - if (err == 0) - continue; - - err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, &cand_spec, &cand_res); - if (err) - return err; - - if (j == 0) { - targ_res = cand_res; - targ_spec = cand_spec; - } else if (cand_spec.bit_offset != targ_spec.bit_offset) { - /* if there are many field relo candidates, they - * should all resolve to the same bit offset - */ - pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", - prog->name, relo_idx, cand_spec.bit_offset, - targ_spec.bit_offset); - return -EINVAL; - } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { - /* all candidates should result in the same relocation - * decision and value, otherwise it's dangerous to - * proceed due to ambiguity - */ - pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", - prog->name, relo_idx, - cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res.poison ? "failure" : "success", targ_res.new_val); - return -EINVAL; - } - - cands->cands[j++] = cands->cands[i]; - } - - /* - * For BPF_FIELD_EXISTS relo or when used BPF program has field - * existence checks or kernel version/config checks, it's expected - * that we might not find any candidates. In this case, if field - * wasn't found in any candidate, the list of candidates shouldn't - * change at all, we'll just handle relocating appropriately, - * depending on relo's kind. - */ - if (j > 0) - cands->len = j; - - /* - * If no candidates were found, it might be both a programmer error, - * as well as expected case, depending whether instruction w/ - * relocation is guarded in some way that makes it unreachable (dead - * code) if relocation can't be resolved. This is handled in - * bpf_core_patch_insn() uniformly by replacing that instruction with - * BPF helper call insn (using invalid helper ID). If that instruction - * is indeed unreachable, then it will be ignored and eliminated by - * verifier. If it was an error, then verifier will complain and point - * to a specific instruction number in its log. - */ - if (j == 0) { - pr_debug("prog '%s': relo #%d: no matching targets found\n", - prog->name, relo_idx); - - /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog, relo, relo_idx, &local_spec, NULL, &targ_res); - if (err) - return err; - } - -patch_insn: - /* bpf_core_patch_insn() should know how to handle missing targ_spec */ - err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res); - if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n", - prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err); - return -EINVAL; - } - - return 0; + return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); } static int @@ -7232,7 +5971,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj) for (i = 0; i < obj->nr_programs; i++) { struct bpf_program *p = &obj->programs[i]; - + if (!p->nr_reloc) continue; @@ -7596,7 +6335,7 @@ static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig; + const char *obj_name, *kconfig, *btf_tmp_path; struct bpf_program *prog; struct bpf_object *obj; char tmp_name[64]; @@ -7627,11 +6366,26 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); + if (btf_tmp_path) { + if (strlen(btf_tmp_path) >= PATH_MAX) { + err = -ENAMETOOLONG; + goto out; + } + obj->btf_custom_path = strdup(btf_tmp_path); + if (!obj->btf_custom_path) { + err = -ENOMEM; + goto out; + } + } + kconfig = OPTS_GET(opts, kconfig, NULL); if (kconfig) { obj->kconfig = strdup(kconfig); - if (!obj->kconfig) - return ERR_PTR(-ENOMEM); + if (!obj->kconfig) { + err = -ENOMEM; + goto out; + } } err = bpf_object__elf_init(obj); @@ -8097,7 +6851,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) err = err ? : bpf_object__sanitize_maps(obj); err = err ? : bpf_object__init_kern_struct_ops_maps(obj); err = err ? : bpf_object__create_maps(obj); - err = err ? : bpf_object__relocate(obj, attr->target_btf_path); + err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); err = err ? : bpf_object__load_progs(obj, attr->log_level); if (obj->gen_loader) { @@ -8492,6 +7246,11 @@ const char *bpf_map__get_pin_path(const struct bpf_map *map) return map->pin_path; } +const char *bpf_map__pin_path(const struct bpf_map *map) +{ + return map->pin_path; +} + bool bpf_map__is_pinned(const struct bpf_map *map) { return map->pinned; @@ -8744,6 +7503,7 @@ void bpf_object__close(struct bpf_object *obj) for (i = 0; i < obj->nr_maps; i++) bpf_map__destroy(&obj->maps[i]); + zfree(&obj->btf_custom_path); zfree(&obj->kconfig); zfree(&obj->externs); obj->nr_extern = 0; @@ -9513,7 +8273,7 @@ static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, ret = snprintf(btf_type_name, sizeof(btf_type_name), "%s%s", prefix, name); /* snprintf returns the number of characters written excluding the - * the terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it + * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it * indicates truncation. */ if (ret < 0 || ret >= sizeof(btf_type_name)) @@ -9537,7 +8297,7 @@ int libbpf_find_vmlinux_btf_id(const char *name, struct btf *btf; int err; - btf = libbpf_find_kernel_btf(); + btf = btf__load_vmlinux_btf(); err = libbpf_get_error(btf); if (err) { pr_warn("vmlinux BTF is not found\n"); @@ -9556,8 +8316,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) { struct bpf_prog_info_linear *info_linear; struct bpf_prog_info *info; - struct btf *btf = NULL; - int err = -EINVAL; + struct btf *btf; + int err; info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); err = libbpf_get_error(info_linear); @@ -9566,12 +8326,15 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) attach_prog_fd); return err; } + + err = -EINVAL; info = &info_linear->info; if (!info->btf_id) { pr_warn("The target program doesn't have BTF\n"); goto out; } - if (btf__get_from_id(info->btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { pr_warn("Failed to get BTF of the program\n"); goto out; } @@ -10055,7 +8818,7 @@ struct bpf_link { int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog) { int ret; - + ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL); return libbpf_err_errno(ret); } @@ -10346,25 +9109,28 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, return pfd; } -struct bpf_program_attach_kprobe_opts { - bool retprobe; - unsigned long offset; -}; - -static struct bpf_link* +struct bpf_link * bpf_program__attach_kprobe_opts(struct bpf_program *prog, const char *func_name, - struct bpf_program_attach_kprobe_opts *opts) + struct bpf_kprobe_opts *opts) { char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; + unsigned long offset; + bool retprobe; int pfd, err; - pfd = perf_event_open_probe(false /* uprobe */, opts->retprobe, func_name, - opts->offset, -1 /* pid */); + if (!OPTS_VALID(opts, bpf_kprobe_opts)) + return libbpf_err_ptr(-EINVAL); + + retprobe = OPTS_GET(opts, retprobe, false); + offset = OPTS_GET(opts, offset, 0); + + pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, + offset, -1 /* pid */); if (pfd < 0) { pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n", - prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name, + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return libbpf_err_ptr(pfd); } @@ -10373,7 +9139,7 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog, if (err) { close(pfd); pr_warn("prog '%s': failed to attach to %s '%s': %s\n", - prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name, + prog->name, retprobe ? "kretprobe" : "kprobe", func_name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return libbpf_err_ptr(err); } @@ -10384,9 +9150,9 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, const char *func_name) { - struct bpf_program_attach_kprobe_opts opts = { + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, .retprobe = retprobe, - }; + ); return bpf_program__attach_kprobe_opts(prog, func_name, &opts); } @@ -10394,7 +9160,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, struct bpf_program *prog) { - struct bpf_program_attach_kprobe_opts opts; + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts); unsigned long offset = 0; struct bpf_link *link; const char *func_name; @@ -10404,13 +9170,14 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, func_name = prog->sec_name + sec->len; opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0; - n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%lx", &func, &offset); + n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset); if (n < 1) { err = -EINVAL; pr_warn("kprobe name is invalid: %s\n", func_name); return libbpf_err_ptr(err); } if (opts.retprobe && offset != 0) { + free(func); err = -EINVAL; pr_warn("kretprobes do not support offset specification\n"); return libbpf_err_ptr(err); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6e61342ba56c..1271d99bb7aa 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -94,8 +94,26 @@ struct bpf_object_open_opts { * system Kconfig for CONFIG_xxx externs. */ const char *kconfig; + /* Path to the custom BTF to be used for BPF CO-RE relocations. + * This custom BTF completely replaces the use of vmlinux BTF + * for the purpose of CO-RE relocations. + * NOTE: any other BPF feature (e.g., fentry/fexit programs, + * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux. + */ + const char *btf_custom_path; }; -#define bpf_object_open_opts__last_field kconfig +#define bpf_object_open_opts__last_field btf_custom_path + +struct bpf_kprobe_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* function's offset to install kprobe to */ + unsigned long offset; + /* kprobe is return probe */ + bool retprobe; + size_t :0; +}; +#define bpf_kprobe_opts__last_field retprobe LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * @@ -243,6 +261,10 @@ LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, const char *func_name); LIBBPF_API struct bpf_link * +bpf_program__attach_kprobe_opts(struct bpf_program *prog, + const char *func_name, + struct bpf_kprobe_opts *opts); +LIBBPF_API struct bpf_link * bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, size_t func_offset); @@ -477,6 +499,7 @@ LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 944c99d1ded3..58e0fb2c482f 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -371,7 +371,15 @@ LIBBPF_0.4.0 { LIBBPF_0.5.0 { global: bpf_map__initial_value; + bpf_map__pin_path; bpf_map_lookup_and_delete_elem_flags; + bpf_program__attach_kprobe_opts; bpf_object__gen_loader; + btf__load_from_kernel_by_id; + btf__load_from_kernel_by_id_split; + btf__load_into_kernel; + btf__load_module_btf; + btf__load_vmlinux_btf; + btf_dump__dump_type_data; libbpf_set_strict_mode; } LIBBPF_0.4.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 016ca7cb4f8a..f7b691d5f9eb 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -14,6 +14,7 @@ #include <errno.h> #include <linux/err.h> #include "libbpf_legacy.h" +#include "relo_core.h" /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -366,76 +367,6 @@ struct bpf_line_info_min { __u32 line_col; }; -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { - BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ - BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ - BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ - BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ - BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ - BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ - BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ - BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ - BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ - BPF_TYPE_SIZE = 9, /* type size in bytes */ - BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ - BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ -}; - -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * type or field; - * - access_str_off - offset into corresponding .BTF string section. String - * interpretation depends on specific relocation kind: - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample *s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - * __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { - __u32 insn_off; - __u32 type_id; - __u32 access_str_off; - enum bpf_core_relo_kind kind; -}; typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx); typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx); @@ -494,4 +425,14 @@ static inline void *libbpf_ptr(void *ret) return ret; } +static inline bool str_is_empty(const char *s) +{ + return !s || !s[0]; +} + +static inline bool is_ldimm64_insn(struct bpf_insn *insn) +{ + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c new file mode 100644 index 000000000000..4016ed492d0c --- /dev/null +++ b/tools/lib/bpf/relo_core.c @@ -0,0 +1,1295 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Facebook */ + +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> +#include <linux/err.h> + +#include "libbpf.h" +#include "bpf.h" +#include "btf.h" +#include "str_error.h" +#include "libbpf_internal.h" + +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* original unresolved (no skip_mods_or_typedefs) root type ID */ + __u32 root_type_id; + /* CO-RE relocation kind */ + enum bpf_core_relo_kind relo_kind; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field bit offset represented by spec */ + __u32 bit_offset; +}; + +static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +{ + const struct btf_type *t; + + /* not a flexible array, if not inside a struct or has non-zero size */ + if (!acc->name || arr->nelems > 0) + return false; + + /* has to be the last member of enclosing struct */ + t = btf__type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; +} + +static const char *core_relo_kind_str(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_FIELD_BYTE_OFFSET: return "byte_off"; + case BPF_FIELD_BYTE_SIZE: return "byte_sz"; + case BPF_FIELD_EXISTS: return "field_exists"; + case BPF_FIELD_SIGNED: return "signed"; + case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; + case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; + case BPF_TYPE_ID_LOCAL: return "local_type_id"; + case BPF_TYPE_ID_TARGET: return "target_type_id"; + case BPF_TYPE_EXISTS: return "type_exists"; + case BPF_TYPE_SIZE: return "type_size"; + case BPF_ENUMVAL_EXISTS: return "enumval_exists"; + case BPF_ENUMVAL_VALUE: return "enumval_value"; + default: return "unknown"; + } +} + +static bool core_relo_is_field_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_FIELD_BYTE_OFFSET: + case BPF_FIELD_BYTE_SIZE: + case BPF_FIELD_EXISTS: + case BPF_FIELD_SIGNED: + case BPF_FIELD_LSHIFT_U64: + case BPF_FIELD_RSHIFT_U64: + return true; + default: + return false; + } +} + +static bool core_relo_is_type_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_TYPE_ID_LOCAL: + case BPF_TYPE_ID_TARGET: + case BPF_TYPE_EXISTS: + case BPF_TYPE_SIZE: + return true; + default: + return false; + } +} + +static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind) +{ + switch (kind) { + case BPF_ENUMVAL_EXISTS: + case BPF_ENUMVAL_VALUE: + return true; + default: + return false; + } +} + +/* + * Turn bpf_core_relo into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field bit offset, specified by accessor string. Low-level spec captures + * every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + * struct sample { + * int __unimportant; + * struct { + * int __1; + * int __2; + * int a[7]; + * }; + * }; + * + * struct sample *s = ...; + * + * int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - field 'a' access (corresponds to '2' in low-level spec); + * - array element #3 access (corresponds to '3' in low-level spec). + * + * Type-based relocations (TYPE_EXISTS/TYPE_SIZE, + * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their + * spec and raw_spec are kept empty. + * + * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access + * string to specify enumerator's value index that need to be relocated. + */ +static int bpf_core_parse_spec(const struct btf *btf, + __u32 type_id, + const char *spec_str, + enum bpf_core_relo_kind relo_kind, + struct bpf_core_spec *spec) +{ + int access_idx, parsed_len, i; + struct bpf_core_accessor *acc; + const struct btf_type *t; + const char *name; + __u32 id; + __s64 sz; + + if (str_is_empty(spec_str) || *spec_str == ':') + return -EINVAL; + + memset(spec, 0, sizeof(*spec)); + spec->btf = btf; + spec->root_type_id = type_id; + spec->relo_kind = relo_kind; + + /* type-based relocations don't have a field access string */ + if (core_relo_is_type_based(relo_kind)) { + if (strcmp(spec_str, "0")) + return -EINVAL; + return 0; + } + + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ + while (*spec_str) { + if (*spec_str == ':') + ++spec_str; + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) + return -EINVAL; + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + spec_str += parsed_len; + spec->raw_spec[spec->raw_len++] = access_idx; + } + + if (spec->raw_len == 0) + return -EINVAL; + + t = skip_mods_and_typedefs(btf, type_id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[0]; + acc = &spec->spec[0]; + acc->type_id = id; + acc->idx = access_idx; + spec->len++; + + if (core_relo_is_enumval_based(relo_kind)) { + if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t)) + return -EINVAL; + + /* record enumerator name in a first accessor */ + acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off); + return 0; + } + + if (!core_relo_is_field_based(relo_kind)) + return -EINVAL; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->bit_offset = access_idx * sz * 8; + + for (i = 1; i < spec->raw_len; i++) { + t = skip_mods_and_typedefs(btf, id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[i]; + acc = &spec->spec[spec->len]; + + if (btf_is_composite(t)) { + const struct btf_member *m; + __u32 bit_offset; + + if (access_idx >= btf_vlen(t)) + return -EINVAL; + + bit_offset = btf_member_bit_offset(t, access_idx); + spec->bit_offset += bit_offset; + + m = btf_members(t) + access_idx; + if (m->name_off) { + name = btf__name_by_offset(btf, m->name_off); + if (str_is_empty(name)) + return -EINVAL; + + acc->type_id = id; + acc->idx = access_idx; + acc->name = name; + spec->len++; + } + + id = m->type; + } else if (btf_is_array(t)) { + const struct btf_array *a = btf_array(t); + bool flex; + + t = skip_mods_and_typedefs(btf, a->type, &id); + if (!t) + return -EINVAL; + + flex = is_flex_arr(btf, acc - 1, a); + if (!flex && access_idx >= a->nelems) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->bit_offset += access_idx * sz * 8; + } else { + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", + type_id, spec_str, i, id, btf_kind_str(t)); + return -EINVAL; + } + } + + return 0; +} + +/* Check two types for compatibility for the purpose of field access + * relocation. const/volatile/restrict and typedefs are skipped to ensure we + * are relocating semantically compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible, if their names match (modulo flavor suffix); + * - any two PTRs are always compatible; + * - for ENUMs, names should be the same (ignoring flavor suffix) or at + * least one of enums should be anonymous; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and signedness are ignored; + * - any two FLOATs are always compatible; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ +static int bpf_core_fields_are_compat(const struct btf *local_btf, + __u32 local_id, + const struct btf *targ_btf, + __u32 targ_id) +{ + const struct btf_type *local_type, *targ_type; + +recur: + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) + return 1; + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_PTR: + case BTF_KIND_FLOAT: + return 1; + case BTF_KIND_FWD: + case BTF_KIND_ENUM: { + const char *local_name, *targ_name; + size_t local_len, targ_len; + + local_name = btf__name_by_offset(local_btf, + local_type->name_off); + targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); + local_len = bpf_core_essential_name_len(local_name); + targ_len = bpf_core_essential_name_len(targ_name); + /* one of them is anonymous or both w/ same flavor-less names */ + return local_len == 0 || targ_len == 0 || + (local_len == targ_len && + strncmp(local_name, targ_name, local_len) == 0); + } + case BTF_KIND_INT: + /* just reject deprecated bitfield-like integers; all other + * integers are by default compatible between each other + */ + return btf_int_offset(local_type) == 0 && + btf_int_offset(targ_type) == 0; + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + default: + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); + return 0; + } +} + +/* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * bit offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ +static int bpf_core_match_member(const struct btf *local_btf, + const struct bpf_core_accessor *local_acc, + const struct btf *targ_btf, + __u32 targ_id, + struct bpf_core_spec *spec, + __u32 *next_targ_id) +{ + const struct btf_type *local_type, *targ_type; + const struct btf_member *local_member, *m; + const char *local_name, *targ_name; + __u32 local_id; + int i, n, found; + + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!targ_type) + return -EINVAL; + if (!btf_is_composite(targ_type)) + return 0; + + local_id = local_acc->type_id; + local_type = btf__type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + + n = btf_vlen(targ_type); + m = btf_members(targ_type); + for (i = 0; i < n; i++, m++) { + __u32 bit_offset; + + bit_offset = btf_member_bit_offset(targ_type, i); + + /* too deep struct/union/array nesting */ + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + /* speculate this member will be the good one */ + spec->bit_offset += bit_offset; + spec->raw_spec[spec->raw_len++] = i; + + targ_name = btf__name_by_offset(targ_btf, m->name_off); + if (str_is_empty(targ_name)) { + /* embedded struct/union, we need to go deeper */ + found = bpf_core_match_member(local_btf, local_acc, + targ_btf, m->type, + spec, next_targ_id); + if (found) /* either found or error */ + return found; + } else if (strcmp(local_name, targ_name) == 0) { + /* matching named field */ + struct bpf_core_accessor *targ_acc; + + targ_acc = &spec->spec[spec->len++]; + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + + *next_targ_id = m->type; + found = bpf_core_fields_are_compat(local_btf, + local_member->type, + targ_btf, m->type); + if (!found) + spec->len--; /* pop accessor */ + return found; + } + /* member turned out not to be what we looked for */ + spec->bit_offset -= bit_offset; + spec->raw_len--; + } + + return 0; +} + +/* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + bit offset). + */ +static int bpf_core_spec_match(struct bpf_core_spec *local_spec, + const struct btf *targ_btf, __u32 targ_id, + struct bpf_core_spec *targ_spec) +{ + const struct btf_type *targ_type; + const struct bpf_core_accessor *local_acc; + struct bpf_core_accessor *targ_acc; + int i, sz, matched; + + memset(targ_spec, 0, sizeof(*targ_spec)); + targ_spec->btf = targ_btf; + targ_spec->root_type_id = targ_id; + targ_spec->relo_kind = local_spec->relo_kind; + + if (core_relo_is_type_based(local_spec->relo_kind)) { + return bpf_core_types_are_compat(local_spec->btf, + local_spec->root_type_id, + targ_btf, targ_id); + } + + local_acc = &local_spec->spec[0]; + targ_acc = &targ_spec->spec[0]; + + if (core_relo_is_enumval_based(local_spec->relo_kind)) { + size_t local_essent_len, targ_essent_len; + const struct btf_enum *e; + const char *targ_name; + + /* has to resolve to an enum */ + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id); + if (!btf_is_enum(targ_type)) + return 0; + + local_essent_len = bpf_core_essential_name_len(local_acc->name); + + for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) { + targ_name = btf__name_by_offset(targ_spec->btf, e->name_off); + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) { + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + return 1; + } + } + return 0; + } + + if (!core_relo_is_field_based(local_spec->relo_kind)) + return -EINVAL; + + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, + &targ_id); + if (!targ_type) + return -EINVAL; + + if (local_acc->name) { + matched = bpf_core_match_member(local_spec->btf, + local_acc, + targ_btf, targ_id, + targ_spec, &targ_id); + if (matched <= 0) + return matched; + } else { + /* for i=0, targ_id is already treated as array element + * type (because it's the original struct), for others + * we should find array element type first + */ + if (i > 0) { + const struct btf_array *a; + bool flex; + + if (!btf_is_array(targ_type)) + return 0; + + a = btf_array(targ_type); + flex = is_flex_arr(targ_btf, targ_acc - 1, a); + if (!flex && local_acc->idx >= a->nelems) + return 0; + if (!skip_mods_and_typedefs(targ_btf, a->type, + &targ_id)) + return -EINVAL; + } + + /* too deep struct/union/array nesting */ + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + targ_acc->type_id = targ_id; + targ_acc->idx = local_acc->idx; + targ_acc->name = NULL; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + + sz = btf__resolve_size(targ_btf, targ_id); + if (sz < 0) + return sz; + targ_spec->bit_offset += local_acc->idx * sz * 8; + } + } + + return 1; +} + +static int bpf_core_calc_field_relo(const char *prog_name, + const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val, __u32 *field_sz, __u32 *type_id, + bool *validate) +{ + const struct bpf_core_accessor *acc; + const struct btf_type *t; + __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id; + const struct btf_member *m; + const struct btf_type *mt; + bool bitfield; + __s64 sz; + + *field_sz = 0; + + if (relo->kind == BPF_FIELD_EXISTS) { + *val = spec ? 1 : 0; + return 0; + } + + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + + acc = &spec->spec[spec->len - 1]; + t = btf__type_by_id(spec->btf, acc->type_id); + + /* a[n] accessor needs special handling */ + if (!acc->name) { + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { + *val = spec->bit_offset / 8; + /* remember field size for load/store mem size */ + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *field_sz = sz; + *type_id = acc->type_id; + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { + sz = btf__resolve_size(spec->btf, acc->type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + } else { + pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", + prog_name, relo->kind, relo->insn_off / 8); + return -EINVAL; + } + if (validate) + *validate = true; + return 0; + } + + m = btf_members(t) + acc->idx; + mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id); + bit_off = spec->bit_offset; + bit_sz = btf_member_bitfield_size(t, acc->idx); + + bitfield = bit_sz > 0; + if (bitfield) { + byte_sz = mt->size; + byte_off = bit_off / 8 / byte_sz * byte_sz; + /* figure out smallest int size necessary for bitfield load */ + while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { + if (byte_sz >= 8) { + /* bitfield can't be read with 64-bit read */ + pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", + prog_name, relo->kind, relo->insn_off / 8); + return -E2BIG; + } + byte_sz *= 2; + byte_off = bit_off / 8 / byte_sz * byte_sz; + } + } else { + sz = btf__resolve_size(spec->btf, field_type_id); + if (sz < 0) + return -EINVAL; + byte_sz = sz; + byte_off = spec->bit_offset / 8; + bit_sz = byte_sz * 8; + } + + /* for bitfields, all the relocatable aspects are ambiguous and we + * might disagree with compiler, so turn off validation of expected + * value, except for signedness + */ + if (validate) + *validate = !bitfield; + + switch (relo->kind) { + case BPF_FIELD_BYTE_OFFSET: + *val = byte_off; + if (!bitfield) { + *field_sz = byte_sz; + *type_id = field_type_id; + } + break; + case BPF_FIELD_BYTE_SIZE: + *val = byte_sz; + break; + case BPF_FIELD_SIGNED: + /* enums will be assumed unsigned */ + *val = btf_is_enum(mt) || + (btf_int_encoding(mt) & BTF_INT_SIGNED); + if (validate) + *validate = true; /* signedness is never ambiguous */ + break; + case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN + *val = 64 - (bit_off + bit_sz - byte_off * 8); +#else + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); +#endif + break; + case BPF_FIELD_RSHIFT_U64: + *val = 64 - bit_sz; + if (validate) + *validate = true; /* right shift is never ambiguous */ + break; + case BPF_FIELD_EXISTS: + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val) +{ + __s64 sz; + + /* type-based relos return zero when target type is not found */ + if (!spec) { + *val = 0; + return 0; + } + + switch (relo->kind) { + case BPF_TYPE_ID_TARGET: + *val = spec->root_type_id; + break; + case BPF_TYPE_EXISTS: + *val = 1; + break; + case BPF_TYPE_SIZE: + sz = btf__resolve_size(spec->btf, spec->root_type_id); + if (sz < 0) + return -EINVAL; + *val = sz; + break; + case BPF_TYPE_ID_LOCAL: + /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, + const struct bpf_core_spec *spec, + __u32 *val) +{ + const struct btf_type *t; + const struct btf_enum *e; + + switch (relo->kind) { + case BPF_ENUMVAL_EXISTS: + *val = spec ? 1 : 0; + break; + case BPF_ENUMVAL_VALUE: + if (!spec) + return -EUCLEAN; /* request instruction poisoning */ + t = btf__type_by_id(spec->btf, spec->spec[0].type_id); + e = btf_enum(t) + spec->spec[0].idx; + *val = e->val; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +struct bpf_core_relo_res +{ + /* expected value in the instruction, unless validate == false */ + __u32 orig_val; + /* new value that needs to be patched up to */ + __u32 new_val; + /* relocation unsuccessful, poison instruction, but don't fail load */ + bool poison; + /* some relocations can't be validated against orig_val */ + bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + <off>) = rY + * rX = *(T *)(rY + <off>), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; +}; + +/* Calculate original and target relocation values, given local and target + * specs and relocation kind. These values are calculated for each candidate. + * If there are multiple candidates, resulting values should all be consistent + * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity. + * If instruction has to be poisoned, *poison will be set to true. + */ +static int bpf_core_calc_relo(const char *prog_name, + const struct bpf_core_relo *relo, + int relo_idx, + const struct bpf_core_spec *local_spec, + const struct bpf_core_spec *targ_spec, + struct bpf_core_relo_res *res) +{ + int err = -EOPNOTSUPP; + + res->orig_val = 0; + res->new_val = 0; + res->poison = false; + res->validate = true; + res->fail_memsz_adjust = false; + res->orig_sz = res->new_sz = 0; + res->orig_type_id = res->new_type_id = 0; + + if (core_relo_is_field_based(relo->kind)) { + err = bpf_core_calc_field_relo(prog_name, relo, local_spec, + &res->orig_val, &res->orig_sz, + &res->orig_type_id, &res->validate); + err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec, + &res->new_val, &res->new_sz, + &res->new_type_id, NULL); + if (err) + goto done; + /* Validate if it's safe to adjust load/store memory size. + * Adjustments are performed only if original and new memory + * sizes differ. + */ + res->fail_memsz_adjust = false; + if (res->orig_sz != res->new_sz) { + const struct btf_type *orig_t, *new_t; + + orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); + new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); + + /* There are two use cases in which it's safe to + * adjust load/store's mem size: + * - reading a 32-bit kernel pointer, while on BPF + * size pointers are always 64-bit; in this case + * it's safe to "downsize" instruction size due to + * pointer being treated as unsigned integer with + * zero-extended upper 32-bits; + * - reading unsigned integers, again due to + * zero-extension is preserving the value correctly. + * + * In all other cases it's incorrect to attempt to + * load/store field because read value will be + * incorrect, so we poison relocated instruction. + */ + if (btf_is_ptr(orig_t) && btf_is_ptr(new_t)) + goto done; + if (btf_is_int(orig_t) && btf_is_int(new_t) && + btf_int_encoding(orig_t) != BTF_INT_SIGNED && + btf_int_encoding(new_t) != BTF_INT_SIGNED) + goto done; + + /* mark as invalid mem size adjustment, but this will + * only be checked for LDX/STX/ST insns + */ + res->fail_memsz_adjust = true; + } + } else if (core_relo_is_type_based(relo->kind)) { + err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); + } else if (core_relo_is_enumval_based(relo->kind)) { + err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val); + err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); + } + +done: + if (err == -EUCLEAN) { + /* EUCLEAN is used to signal instruction poisoning request */ + res->poison = true; + err = 0; + } else if (err == -EOPNOTSUPP) { + /* EOPNOTSUPP means unknown/unsupported relocation */ + pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n", + prog_name, relo_idx, core_relo_kind_str(relo->kind), + relo->kind, relo->insn_off / 8); + } + + return err; +} + +/* + * Turn instruction for which CO_RE relocation failed into invalid one with + * distinct signature. + */ +static void bpf_core_poison_insn(const char *prog_name, int relo_idx, + int insn_idx, struct bpf_insn *insn) +{ + pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n", + prog_name, relo_idx, insn_idx); + insn->code = BPF_JMP | BPF_CALL; + insn->dst_reg = 0; + insn->src_reg = 0; + insn->off = 0; + /* if this instruction is reachable (not a dead code), + * verifier will complain with the following message: + * invalid func unknown#195896080 + */ + insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ +} + +static int insn_bpf_size_to_bytes(struct bpf_insn *insn) +{ + switch (BPF_SIZE(insn->code)) { + case BPF_DW: return 8; + case BPF_W: return 4; + case BPF_H: return 2; + case BPF_B: return 1; + default: return -1; + } +} + +static int insn_bytes_to_bpf_size(__u32 sz) +{ + switch (sz) { + case 8: return BPF_DW; + case 4: return BPF_W; + case 2: return BPF_H; + case 1: return BPF_B; + default: return -1; + } +} + +/* + * Patch relocatable BPF instruction. + * + * Patched value is determined by relocation kind and target specification. + * For existence relocations target spec will be NULL if field/type is not found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error. + * + * Currently supported classes of BPF instruction are: + * 1. rX = <imm> (assignment with immediate operand); + * 2. rX += <imm> (arithmetic operations with immediate operand); + * 3. rX = <imm64> (load with 64-bit immediate value); + * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64}; + * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64}; + * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}. + */ +static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res) +{ + __u32 orig_val, new_val; + __u8 class; + + class = BPF_CLASS(insn->code); + + if (res->poison) { +poison: + /* poison second part of ldimm64 to avoid confusing error from + * verifier about "unknown opcode 00" + */ + if (is_ldimm64_insn(insn)) + bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1); + bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn); + return 0; + } + + orig_val = res->orig_val; + new_val = res->new_val; + + switch (class) { + case BPF_ALU: + case BPF_ALU64: + if (BPF_SRC(insn->code) != BPF_K) + return -EINVAL; + if (res->validate && insn->imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", + prog_name, relo_idx, + insn_idx, insn->imm, orig_val, new_val); + return -EINVAL; + } + orig_val = insn->imm; + insn->imm = new_val; + pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n", + prog_name, relo_idx, insn_idx, + orig_val, new_val); + break; + case BPF_LDX: + case BPF_ST: + case BPF_STX: + if (res->validate && insn->off != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n", + prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val); + return -EINVAL; + } + if (new_val > SHRT_MAX) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n", + prog_name, relo_idx, insn_idx, new_val); + return -ERANGE; + } + if (res->fail_memsz_adjust) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. " + "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n", + prog_name, relo_idx, insn_idx); + goto poison; + } + + orig_val = insn->off; + insn->off = new_val; + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n", + prog_name, relo_idx, insn_idx, orig_val, new_val); + + if (res->new_sz != res->orig_sz) { + int insn_bytes_sz, insn_bpf_sz; + + insn_bytes_sz = insn_bpf_size_to_bytes(insn); + if (insn_bytes_sz != res->orig_sz) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n", + prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz); + return -EINVAL; + } + + insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz); + if (insn_bpf_sz < 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n", + prog_name, relo_idx, insn_idx, res->new_sz); + return -EINVAL; + } + + insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code); + pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n", + prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz); + } + break; + case BPF_LD: { + __u64 imm; + + if (!is_ldimm64_insn(insn) || + insn[0].src_reg != 0 || insn[0].off != 0 || + insn[1].code != 0 || insn[1].dst_reg != 0 || + insn[1].src_reg != 0 || insn[1].off != 0) { + pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n", + prog_name, relo_idx, insn_idx); + return -EINVAL; + } + + imm = insn[0].imm + ((__u64)insn[1].imm << 32); + if (res->validate && imm != orig_val) { + pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n", + prog_name, relo_idx, + insn_idx, (unsigned long long)imm, + orig_val, new_val); + return -EINVAL; + } + + insn[0].imm = new_val; + insn[1].imm = 0; /* currently only 32-bit values are supported */ + pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n", + prog_name, relo_idx, insn_idx, + (unsigned long long)imm, new_val); + break; + } + default: + pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n", + prog_name, relo_idx, insn_idx, insn->code, + insn->src_reg, insn->dst_reg, insn->off, insn->imm); + return -EINVAL; + } + + return 0; +} + +/* Output spec definition in the format: + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ +static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +{ + const struct btf_type *t; + const struct btf_enum *e; + const char *s; + __u32 type_id; + int i; + + type_id = spec->root_type_id; + t = btf__type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + + libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); + + if (core_relo_is_type_based(spec->relo_kind)) + return; + + if (core_relo_is_enumval_based(spec->relo_kind)) { + t = skip_mods_and_typedefs(spec->btf, type_id, NULL); + e = btf_enum(t) + spec->raw_spec[0]; + s = btf__name_by_offset(spec->btf, e->name_off); + + libbpf_print(level, "::%s = %u", s, e->val); + return; + } + + if (core_relo_is_field_based(spec->relo_kind)) { + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else if (i > 0 || spec->spec[i].idx > 0) + libbpf_print(level, "[%u]", spec->spec[i].idx); + } + + libbpf_print(level, " ("); + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]); + + if (spec->bit_offset % 8) + libbpf_print(level, " @ offset %u.%u)", + spec->bit_offset / 8, spec->bit_offset % 8); + else + libbpf_print(level, " @ offset %u)", spec->bit_offset / 8); + return; + } +} + +/* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + * Candidate type is a type with the same "essential" name, ignoring + * everything after last triple underscore (___). E.g., `sample`, + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + * for each other. Names with triple underscore are referred to as + * "flavors" and are useful, among other things, to allow to + * specify/support incompatible variations of the same kernel struct, which + * might differ between different kernel versions and/or build + * configurations. + * + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + * converter, when deduplicated BTF of a kernel still contains more than + * one different types with the same name. In that case, ___2, ___3, etc + * are appended starting from second name conflict. But start flavors are + * also useful to be defined "locally", in BPF program, to extract same + * data from incompatible changes between different kernel + * versions/configurations. For instance, to handle field renames between + * kernel versions, one can use two flavors of the struct name with the + * same common name and use conditional relocations to extract that field, + * depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + * candidate target type. Matching involves finding corresponding + * high-level spec accessors, meaning that all named fields should match, + * as well as all array accesses should be within the actual bounds. Also, + * types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + * matching the spec. As long as all the specs resolve to the same set of + * offsets across all candidates, there is no error. If there is any + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate + * imprefection of BTF deduplication, which can cause slight duplication of + * the same BTF type, if some directly or indirectly referenced (by + * pointer) type gets resolved to different actual types in different + * object files. If such situation occurs, deduplicated BTF will end up + * with two (or more) structurally identical types, which differ only in + * types they refer to through pointer. This should be OK in most cases and + * is not an error. + * 4. Candidate types search is performed by linearly scanning through all + * types in target BTF. It is anticipated that this is overall more + * efficient memory-wise and not significantly worse (if not better) + * CPU-wise compared to prebuilding a map from all local type names to + * a list of candidate type names. It's also sped up by caching resolved + * list of matching candidates per each local "root" type ID, that has at + * least one bpf_core_relo associated with it. This list is shared + * between multiple relocations for the same type ID and is updated as some + * of the candidates are pruned due to structural incompatibility. + */ +int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands) +{ + struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; + struct bpf_core_relo_res cand_res, targ_res; + const struct btf_type *local_type; + const char *local_name; + __u32 local_id; + const char *spec_str; + int i, j, err; + + local_id = relo->type_id; + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (!local_name) + return -EINVAL; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + if (str_is_empty(spec_str)) + return -EINVAL; + + err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); + if (err) { + pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n", + prog_name, relo_idx, local_id, btf_kind_str(local_type), + str_is_empty(local_name) ? "<anon>" : local_name, + spec_str, err); + return -EINVAL; + } + + pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name, + relo_idx, core_relo_kind_str(relo->kind), relo->kind); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ + if (relo->kind == BPF_TYPE_ID_LOCAL) { + targ_res.validate = true; + targ_res.poison = false; + targ_res.orig_val = local_spec.root_type_id; + targ_res.new_val = local_spec.root_type_id; + goto patch_insn; + } + + /* libbpf doesn't support candidate search for anonymous types */ + if (str_is_empty(spec_str)) { + pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n", + prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind); + return -EOPNOTSUPP; + } + + + for (i = 0, j = 0; i < cands->len; i++) { + err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, + cands->cands[i].id, &cand_spec); + if (err < 0) { + pr_warn("prog '%s': relo #%d: error matching candidate #%d ", + prog_name, relo_idx, i); + bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); + libbpf_print(LIBBPF_WARN, ": %d\n", err); + return err; + } + + pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name, + relo_idx, err == 0 ? "non-matching" : "matching", i); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (err == 0) + continue; + + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); + if (err) + return err; + + if (j == 0) { + targ_res = cand_res; + targ_spec = cand_spec; + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { + /* if there are many field relo candidates, they + * should all resolve to the same bit offset + */ + pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.bit_offset, + targ_spec.bit_offset); + return -EINVAL; + } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + /* all candidates should result in the same relocation + * decision and value, otherwise it's dangerous to + * proceed due to ambiguity + */ + pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", + prog_name, relo_idx, + cand_res.poison ? "failure" : "success", cand_res.new_val, + targ_res.poison ? "failure" : "success", targ_res.new_val); + return -EINVAL; + } + + cands->cands[j++] = cands->cands[i]; + } + + /* + * For BPF_FIELD_EXISTS relo or when used BPF program has field + * existence checks or kernel version/config checks, it's expected + * that we might not find any candidates. In this case, if field + * wasn't found in any candidate, the list of candidates shouldn't + * change at all, we'll just handle relocating appropriately, + * depending on relo's kind. + */ + if (j > 0) + cands->len = j; + + /* + * If no candidates were found, it might be both a programmer error, + * as well as expected case, depending whether instruction w/ + * relocation is guarded in some way that makes it unreachable (dead + * code) if relocation can't be resolved. This is handled in + * bpf_core_patch_insn() uniformly by replacing that instruction with + * BPF helper call insn (using invalid helper ID). If that instruction + * is indeed unreachable, then it will be ignored and eliminated by + * verifier. If it was an error, then verifier will complain and point + * to a specific instruction number in its log. + */ + if (j == 0) { + pr_debug("prog '%s': relo #%d: no matching targets found\n", + prog_name, relo_idx); + + /* calculate single target relo result explicitly */ + err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); + if (err) + return err; + } + +patch_insn: + /* bpf_core_patch_insn() should know how to handle missing targ_spec */ + err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); + if (err) { + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", + prog_name, relo_idx, relo->insn_off / 8, err); + return -EINVAL; + } + + return 0; +} diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h new file mode 100644 index 000000000000..3b9f8f18346c --- /dev/null +++ b/tools/lib/bpf/relo_core.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __RELO_CORE_H +#define __RELO_CORE_H + +/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value + * has to be adjusted by relocations. + */ +enum bpf_core_relo_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_BYTE_SIZE = 1, /* field size in bytes */ + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ + BPF_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */ + BPF_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */ + BPF_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */ + BPF_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */ + BPF_TYPE_ID_TARGET = 7, /* type ID in target kernel */ + BPF_TYPE_EXISTS = 8, /* type existence in target kernel */ + BPF_TYPE_SIZE = 9, /* type size in bytes */ + BPF_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */ + BPF_ENUMVAL_VALUE = 11, /* enum value integer value */ +}; + +/* The minimum bpf_core_relo checked by the loader + * + * CO-RE relocation captures the following data: + * - insn_off - instruction offset (in bytes) within a BPF program that needs + * its insn->imm field to be relocated with actual field info; + * - type_id - BTF type ID of the "root" (containing) entity of a relocatable + * type or field; + * - access_str_off - offset into corresponding .BTF string section. String + * interpretation depends on specific relocation kind: + * - for field-based relocations, string encodes an accessed field using + * a sequence of field and array indices, separated by colon (:). It's + * conceptually very close to LLVM's getelementptr ([0]) instruction's + * arguments for identifying offset to a field. + * - for type-based relocations, strings is expected to be just "0"; + * - for enum value-based relocations, string contains an index of enum + * value within its enum type; + * + * Example to provide a better feel. + * + * struct sample { + * int a; + * struct { + * int b[10]; + * }; + * }; + * + * struct sample *s = ...; + * int x = &s->a; // encoded as "0:0" (a is field #0) + * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, + * // b is field #0 inside anon struct, accessing elem #5) + * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) + * + * type_id for all relocs in this example will capture BTF type id of + * `struct sample`. + * + * Such relocation is emitted when using __builtin_preserve_access_index() + * Clang built-in, passing expression that captures field address, e.g.: + * + * bpf_probe_read(&dst, sizeof(dst), + * __builtin_preserve_access_index(&src->a.b.c)); + * + * In this case Clang will emit field relocation recording necessary data to + * be able to find offset of embedded `a.b.c` field within `src` struct. + * + * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction + */ +struct bpf_core_relo { + __u32 insn_off; + __u32 type_id; + __u32 access_str_off; + enum bpf_core_relo_kind kind; +}; + +struct bpf_core_cand { + const struct btf *btf; + const struct btf_type *t; + const char *name; + __u32 id; +}; + +/* dynamically sized list of type IDs and its associated struct btf */ +struct bpf_core_cand_list { + struct bpf_core_cand *cands; + int len; +}; + +int bpf_core_apply_relo_insn(const char *prog_name, + struct bpf_insn *insn, int insn_idx, + const struct bpf_core_relo *relo, int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands); +int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, + const struct btf *targ_btf, __u32 targ_id); + +size_t bpf_core_essential_name_len(const char *name); +#endif diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index cdecda1ddd36..996d025b8ed8 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -223,10 +223,10 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, free(info_linear); return -1; } - if (btf__get_from_id(info->btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(info->btf_id); + if (libbpf_get_error(btf)) { pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id); err = -1; - btf = NULL; goto out; } perf_env__fetch_btf(env, info->btf_id, btf); @@ -296,7 +296,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, out: free(info_linear); - free(btf); + btf__free(btf); return err ? -1 : 0; } @@ -478,7 +478,8 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) if (btf_id == 0) goto out; - if (btf__get_from_id(btf_id, &btf)) { + btf = btf__load_from_kernel_by_id(btf_id); + if (libbpf_get_error(btf)) { pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, btf_id); goto out; @@ -486,7 +487,7 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id) perf_env__fetch_btf(env, btf_id, btf); out: - free(btf); + btf__free(btf); close(fd); } diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 8150e03367bb..ba0f20853651 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -64,8 +64,8 @@ static char *bpf_target_prog_name(int tgt_fd) struct bpf_prog_info_linear *info_linear; struct bpf_func_info *func_info; const struct btf_type *t; + struct btf *btf = NULL; char *name = NULL; - struct btf *btf; info_linear = bpf_program__get_prog_info_linear( tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO); @@ -74,12 +74,17 @@ static char *bpf_target_prog_name(int tgt_fd) return NULL; } - if (info_linear->info.btf_id == 0 || - btf__get_from_id(info_linear->info.btf_id, &btf)) { + if (info_linear->info.btf_id == 0) { pr_debug("prog FD %d doesn't have valid btf\n", tgt_fd); goto out; } + btf = btf__load_from_kernel_by_id(info_linear->info.btf_id); + if (libbpf_get_error(btf)) { + pr_debug("failed to load btf for prog FD %d\n", tgt_fd); + goto out; + } + func_info = u64_to_ptr(info_linear->info.func_info); t = btf__type_by_id(btf, func_info[0].type_id); if (!t) { @@ -89,6 +94,7 @@ static char *bpf_target_prog_name(int tgt_fd) } name = strdup(btf__name_by_offset(btf, t->name_off)); out: + btf__free(btf); free(info_linear); return name; } diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 22f8326547eb..bc1f64873c8f 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2434,6 +2434,22 @@ static int cs_etm__process_event(struct perf_session *session, return 0; } +static void dump_queued_data(struct cs_etm_auxtrace *etm, + struct perf_record_auxtrace *event) +{ + struct auxtrace_buffer *buf; + unsigned int i; + /* + * Find all buffers with same reference in the queues and dump them. + * This is because the queues can contain multiple entries of the same + * buffer that were split on aux records. + */ + for (i = 0; i < etm->queues.nr_queues; ++i) + list_for_each_entry(buf, &etm->queues.queue_array[i].head, list) + if (buf->reference == event->reference) + cs_etm__dump_event(etm, buf); +} + static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool __maybe_unused) @@ -2466,7 +2482,8 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session, cs_etm__dump_event(etm, buffer); auxtrace_buffer__put_data(buffer); } - } + } else if (dump_trace) + dump_queued_data(etm, &event->auxtrace); return 0; } @@ -3042,7 +3059,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); - return 0; } err = cs_etm__synth_events(etm, session); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 72e7f3616157..8af693d9678c 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -192,8 +192,6 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (!(prot & PROT_EXEC)) dso__set_loaded(dso); } - - nsinfo__put(dso->nsinfo); dso->nsinfo = nsi; if (build_id__is_defined(bid)) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index a1bd7007a8b4..fc683bc41715 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -742,9 +742,13 @@ struct pmu_events_map *__weak pmu_events_map__find(void) return perf_pmu__find_map(NULL); } -static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) +/* + * Suffix must be in form tok_{digits}, or tok{digits}, or same as pmu_name + * to be valid. + */ +static bool perf_pmu__valid_suffix(const char *pmu_name, char *tok) { - char *p; + const char *p; if (strncmp(pmu_name, tok, strlen(tok))) return false; @@ -753,12 +757,16 @@ static bool perf_pmu__valid_suffix(char *pmu_name, char *tok) if (*p == 0) return true; - if (*p != '_') - return false; + if (*p == '_') + ++p; - ++p; - if (*p == 0 || !isdigit(*p)) - return false; + /* Ensure we end in a number */ + while (1) { + if (!isdigit(*p)) + return false; + if (*(++p) == 0) + break; + } return true; } @@ -789,12 +797,19 @@ bool pmu_uncore_alias_match(const char *pmu_name, const char *name) * match "socket" in "socketX_pmunameY" and then "pmuname" in * "pmunameY". */ - for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) { + while (1) { + char *next_tok = strtok_r(NULL, ",", &tmp); + name = strstr(name, tok); - if (!name || !perf_pmu__valid_suffix((char *)name, tok)) { + if (!name || + (!next_tok && !perf_pmu__valid_suffix(name, tok))) { res = false; goto out; } + if (!next_tok) + break; + tok = next_tok; + name += strlen(tok); } res = true; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index fb010a35d61a..da9e8b699e42 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -38,6 +38,7 @@ TARGETS += mount_setattr TARGETS += mqueue TARGETS += nci TARGETS += net +TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/mptcp TARGETS += netfilter diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index addcfd8b615e..433f8bef261e 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -23,7 +23,6 @@ test_skb_cgroup_id_user test_cgroup_storage test_flow_dissector flow_dissector_load -test_netcnt test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f405b20c1e6c..2a58b7b5aea4 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_verifier_log test_dev_cgroup \ test_sock test_sockmap get_cgroup_id_user \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sysctl \ + test_tcpnotify_user test_sysctl \ test_progs-no_alu32 # Also test bpf-gcc, if present @@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c -$(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 8deec1ca9150..9b17f2867488 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -19,6 +19,13 @@ the CI. It builds the kernel (without overwriting your existing Kconfig), recomp bpf selftests, runs them (by default ``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by default in ``~/.bpf_selftests``). +Script dependencies: +- clang (preferably built from sources, https://github.com/llvm/llvm-project); +- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/); +- qemu; +- docutils (for ``rst2man``); +- libcap-devel. + For more information on about using the script, run: .. code-block:: console diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h index 81084c1c2c23..0ab1c88041cd 100644 --- a/tools/testing/selftests/bpf/netcnt_common.h +++ b/tools/testing/selftests/bpf/netcnt_common.h @@ -6,19 +6,39 @@ #define MAX_PERCPU_PACKETS 32 -struct percpu_net_cnt { - __u64 packets; - __u64 bytes; +/* sizeof(struct bpf_local_storage_elem): + * + * It really is about 128 bytes on x86_64, but allocate more to account for + * possible layout changes, different architectures, etc. + * The kernel will wrap up to PAGE_SIZE internally anyway. + */ +#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 256 - __u64 prev_ts; +/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */ +#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \ + SIZEOF_BPF_LOCAL_STORAGE_ELEM) - __u64 prev_packets; - __u64 prev_bytes; +#define PCPU_MIN_UNIT_SIZE 32768 + +union percpu_net_cnt { + struct { + __u64 packets; + __u64 bytes; + + __u64 prev_ts; + + __u64 prev_packets; + __u64 prev_bytes; + }; + __u8 data[PCPU_MIN_UNIT_SIZE]; }; -struct net_cnt { - __u64 packets; - __u64 bytes; +union net_cnt { + struct { + __u64 packets; + __u64 bytes; + }; + __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE]; }; #endif diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 2060bc122c53..d6857683397f 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -66,17 +66,13 @@ int settimeo(int fd, int timeout_ms) #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) -int start_server(int family, int type, const char *addr_str, __u16 port, - int timeout_ms) +static int __start_server(int type, const struct sockaddr *addr, + socklen_t addrlen, int timeout_ms, bool reuseport) { - struct sockaddr_storage addr = {}; - socklen_t len; + int on = 1; int fd; - if (make_sockaddr(family, addr_str, port, &addr, &len)) - return -1; - - fd = socket(family, type, 0); + fd = socket(addr->sa_family, type, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; @@ -85,7 +81,13 @@ int start_server(int family, int type, const char *addr_str, __u16 port, if (settimeo(fd, timeout_ms)) goto error_close; - if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { + if (reuseport && + setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) { + log_err("Failed to set SO_REUSEPORT"); + return -1; + } + + if (bind(fd, addr, addrlen) < 0) { log_err("Failed to bind socket"); goto error_close; } @@ -104,6 +106,69 @@ error_close: return -1; } +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) +{ + struct sockaddr_storage addr; + socklen_t addrlen; + + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) + return -1; + + return __start_server(type, (struct sockaddr *)&addr, + addrlen, timeout_ms, false); +} + +int *start_reuseport_server(int family, int type, const char *addr_str, + __u16 port, int timeout_ms, unsigned int nr_listens) +{ + struct sockaddr_storage addr; + unsigned int nr_fds = 0; + socklen_t addrlen; + int *fds; + + if (!nr_listens) + return NULL; + + if (make_sockaddr(family, addr_str, port, &addr, &addrlen)) + return NULL; + + fds = malloc(sizeof(*fds) * nr_listens); + if (!fds) + return NULL; + + fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, + timeout_ms, true); + if (fds[0] == -1) + goto close_fds; + nr_fds = 1; + + if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen)) + goto close_fds; + + for (; nr_fds < nr_listens; nr_fds++) { + fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, + addrlen, timeout_ms, true); + if (fds[nr_fds] == -1) + goto close_fds; + } + + return fds; + +close_fds: + free_fds(fds, nr_fds); + return NULL; +} + +void free_fds(int *fds, unsigned int nr_close_fds) +{ + if (fds) { + while (nr_close_fds) + close(fds[--nr_close_fds]); + free(fds); + } +} + int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms) { @@ -217,6 +282,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, if (family == AF_INET) { struct sockaddr_in *sin = (void *)addr; + memset(addr, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_port = htons(port); if (addr_str && @@ -230,6 +296,7 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, } else if (family == AF_INET6) { struct sockaddr_in6 *sin6 = (void *)addr; + memset(addr, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); if (addr_str && @@ -243,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, } return -1; } + +char *ping_command(int family) +{ + if (family == AF_INET6) { + /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ + if (!system("which ping6 >/dev/null 2>&1")) + return "ping6"; + else + return "ping -6"; + } + return "ping"; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 5e0d51c07b63..c59a8f6d770b 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -36,11 +36,16 @@ extern struct ipv6_packet pkt_v6; int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); +int *start_reuseport_server(int family, int type, const char *addr_str, + __u16 port, int timeout_ms, + unsigned int nr_listens); +void free_fds(int *fds, unsigned int nr_close_fds); int connect_to_fd(int server_fd, int timeout_ms); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms); int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); +char *ping_command(int family); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c new file mode 100644 index 000000000000..85babb0487b3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <test_progs.h> +#include "network_helpers.h" +#include "bpf_dctcp.skel.h" +#include "bpf_cubic.skel.h" +#include "bpf_iter_setsockopt.skel.h" + +static int create_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo")) + return -1; + + return 0; +} + +static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds) +{ + unsigned int i; + + for (i = 0; i < nr_fds; i++) { + if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic", + sizeof("bpf_cubic"))) + return i; + } + + return nr_fds; +} + +static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds) +{ + char tcp_cc[16]; + socklen_t optlen = sizeof(tcp_cc); + unsigned int i; + + for (i = 0; i < nr_fds; i++) { + if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION, + tcp_cc, &optlen) || + strcmp(tcp_cc, "bpf_dctcp")) + return i; + } + + return nr_fds; +} + +static int *make_established(int listen_fd, unsigned int nr_est, + int **paccepted_fds) +{ + int *est_fds, *accepted_fds; + unsigned int i; + + est_fds = malloc(sizeof(*est_fds) * nr_est); + if (!est_fds) + return NULL; + + accepted_fds = malloc(sizeof(*accepted_fds) * nr_est); + if (!accepted_fds) { + free(est_fds); + return NULL; + } + + for (i = 0; i < nr_est; i++) { + est_fds[i] = connect_to_fd(listen_fd, 0); + if (est_fds[i] == -1) + break; + if (set_bpf_cubic(&est_fds[i], 1) != 1) { + close(est_fds[i]); + break; + } + + accepted_fds[i] = accept(listen_fd, NULL, 0); + if (accepted_fds[i] == -1) { + close(est_fds[i]); + break; + } + } + + if (!ASSERT_EQ(i, nr_est, "create established fds")) { + free_fds(accepted_fds, i); + free_fds(est_fds, i); + return NULL; + } + + *paccepted_fds = accepted_fds; + return est_fds; +} + +static unsigned short get_local_port(int fd) +{ + struct sockaddr_in6 addr; + socklen_t addrlen = sizeof(addr); + + if (!getsockname(fd, &addr, &addrlen)) + return ntohs(addr.sin6_port); + + return 0; +} + +static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel, + bool random_retry) +{ + int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL; + unsigned int nr_reuse_listens = 256, nr_est = 256; + int err, iter_fd = -1, listen_fd = -1; + char buf; + + /* Prepare non-reuseport listen_fd */ + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (!ASSERT_GE(listen_fd, 0, "start_server")) + return; + if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1, + "set listen_fd to cubic")) + goto done; + iter_skel->bss->listen_hport = get_local_port(listen_fd); + if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0, + "get_local_port(listen_fd)")) + goto done; + + /* Connect to non-reuseport listen_fd */ + est_fds = make_established(listen_fd, nr_est, &accepted_fds); + if (!ASSERT_OK_PTR(est_fds, "create established")) + goto done; + + /* Prepare reuseport listen fds */ + reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM, + "::1", 0, 0, + nr_reuse_listens); + if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server")) + goto done; + if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens), + nr_reuse_listens, "set reuse_listen_fds to cubic")) + goto done; + iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]); + if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0, + "get_local_port(reuse_listen_fds[0])")) + goto done; + + /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */ + iter_skel->bss->random_retry = random_retry; + iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc)); + if (!ASSERT_GE(iter_fd, 0, "create iter_fd")) + goto done; + + while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 && + errno == EAGAIN) + ; + if (!ASSERT_OK(err, "read iter error")) + goto done; + + /* Check reuseport listen fds for dctcp */ + ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens), + nr_reuse_listens, + "check reuse_listen_fds dctcp"); + + /* Check non reuseport listen fd for dctcp */ + ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1, + "check listen_fd dctcp"); + + /* Check established fds for dctcp */ + ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est, + "check est_fds dctcp"); + + /* Check accepted fds for dctcp */ + ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est, + "check accepted_fds dctcp"); + +done: + if (iter_fd != -1) + close(iter_fd); + if (listen_fd != -1) + close(listen_fd); + free_fds(reuse_listen_fds, nr_reuse_listens); + free_fds(accepted_fds, nr_est); + free_fds(est_fds, nr_est); +} + +void test_bpf_iter_setsockopt(void) +{ + struct bpf_iter_setsockopt *iter_skel = NULL; + struct bpf_cubic *cubic_skel = NULL; + struct bpf_dctcp *dctcp_skel = NULL; + struct bpf_link *cubic_link = NULL; + struct bpf_link *dctcp_link = NULL; + + if (create_netns()) + return; + + /* Load iter_skel */ + iter_skel = bpf_iter_setsockopt__open_and_load(); + if (!ASSERT_OK_PTR(iter_skel, "iter_skel")) + return; + iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL); + if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter")) + goto done; + + /* Load bpf_cubic */ + cubic_skel = bpf_cubic__open_and_load(); + if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel")) + goto done; + cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic); + if (!ASSERT_OK_PTR(cubic_link, "cubic_link")) + goto done; + + /* Load bpf_dctcp */ + dctcp_skel = bpf_dctcp__open_and_load(); + if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel")) + goto done; + dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp); + if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link")) + goto done; + + do_bpf_iter_setsockopt(iter_skel, true); + do_bpf_iter_setsockopt(iter_skel, false); + +done: + bpf_link__destroy(cubic_link); + bpf_link__destroy(dctcp_link); + bpf_cubic__destroy(cubic_skel); + bpf_dctcp__destroy(dctcp_skel); + bpf_iter_setsockopt__destroy(iter_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 857e3f26086f..649f87382c8d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4350,7 +4350,8 @@ static void do_test_file(unsigned int test_num) goto done; } - err = btf__get_from_id(info.btf_id, &btf); + btf = btf__load_from_kernel_by_id(info.btf_id); + err = libbpf_get_error(btf); if (CHECK(err, "cannot get btf from kernel, err: %d", err)) goto done; @@ -4386,6 +4387,7 @@ skip: fprintf(stderr, "OK"); done: + btf__free(btf); free(func_info); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 1b90e684ff13..52ccf0cf35e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -232,7 +232,593 @@ err_out: btf__free(btf); } +#define STRSIZE 4096 + +static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args) +{ + char *s = ctx, new[STRSIZE]; + + vsnprintf(new, STRSIZE, fmt, args); + if (strlen(s) < STRSIZE) + strncat(s, new, STRSIZE - strlen(s) - 1); +} + +static int btf_dump_data(struct btf *btf, struct btf_dump *d, + char *name, char *prefix, __u64 flags, void *ptr, + size_t ptr_sz, char *str, const char *expected_val) +{ + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + size_t type_sz; + __s32 type_id; + int ret = 0; + + if (flags & BTF_F_COMPACT) + opts.compact = true; + if (flags & BTF_F_NONAME) + opts.skip_names = true; + if (flags & BTF_F_ZERO) + opts.emit_zeroes = true; + if (prefix) { + ASSERT_STRNEQ(name, prefix, strlen(prefix), + "verify prefix match"); + name += strlen(prefix) + 1; + } + type_id = btf__find_by_name(btf, name); + if (!ASSERT_GE(type_id, 0, "find type id")) + return -ENOENT; + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } else { + if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG")) + return -EINVAL; + } + if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \ + _expected, ...) \ + do { \ + char __ptrtype[64] = #_type; \ + char *_ptrtype = (char *)__ptrtype; \ + _type _ptrdata = __VA_ARGS__; \ + void *_ptr = &_ptrdata; \ + \ + (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \ + _ptr, sizeof(_type), _str, \ + _expected); \ + } while (0) + +/* Use where expected data string matches its stringified declaration */ +#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \ + ...) \ + TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \ + "(" #_type ")" #__VA_ARGS__, __VA_ARGS__) + +/* overflow test; pass typesize < expected type size, ensure E2BIG returned */ +#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \ + _expected, ...) \ + do { \ + char __ptrtype[64] = #_type; \ + char *_ptrtype = (char *)__ptrtype; \ + _type _ptrdata = __VA_ARGS__; \ + void *_ptr = &_ptrdata; \ + \ + (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \ + _ptr, _type_sz, _str, _expected); \ + } while (0) + +#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \ + _expected, ...) \ + do { \ + _type _ptrdata = __VA_ARGS__; \ + void *_ptr = &_ptrdata; \ + \ + (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \ + _ptr, sizeof(_type), _str, \ + _expected); \ + } while (0) + +static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d, + char *str) +{ +#ifdef __SIZEOF_INT128__ + __int128 i = 0xffffffffffffffff; + + /* this dance is required because we cannot directly initialize + * a 128-bit value to anything larger than a 64-bit value. + */ + i = (i << 64) | (i - 1); +#endif + /* simple int */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, + "1234", 1234); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234); + + /* zero value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO, + "(int)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "0", 0); + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME, + "-4567", -4567); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567); + + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1); + +#ifdef __SIZEOF_INT128__ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT, + "(__int128)0xffffffffffffffff", + 0xffffffffffffffff); + ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str, + "(__int128)0xfffffffffffffffffffffffffffffffe"), + "dump __int128"); +#endif +} + +static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + float t1 = 1.234567; + float t2 = -1.234567; + float t3 = 0.0; + double t4 = 5.678912; + double t5 = -5.678912; + double t6 = 0.0; + long double t7 = 9.876543; + long double t8 = -9.876543; + long double t9 = 0.0; + + /* since the kernel does not likely have any float types in its BTF, we + * will need to add some of various sizes. + */ + + ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float"); + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str, + "(test_float)1.234567"), "dump float"); + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str, + "(test_float)-1.234567"), "dump float"); + ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str, + "(test_float)0.000000"), "dump float"); + + ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double"); + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str, + "(test_double)5.678912"), "dump double"); + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str, + "(test_double)-5.678912"), "dump double"); + ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str, + "(test_double)0.000000"), "dump double"); + + ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double"); + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16, + str, "(test_long_double)9.876543"), + "dump long_double"); + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16, + str, "(test_long_double)-9.876543"), + "dump long_double"); + ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16, + str, "(test_long_double)0.000000"), + "dump long_double"); +} + +static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + /* simple char */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME, + "100", 100); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100); + /* zero value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT, + "(char)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO, + "(char)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0); + + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100); +} + +static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + /* simple typedef */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME, + "1", 1); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1); + /* zero value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO, + "(u64)0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "0", 0); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0); + + /* typedef struct */ + TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, + {.counter = (int)1,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME, + "{1,}", { .counter = 1 }); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0, +"(atomic_t){\n" +" .counter = (int)1,\n" +"}", + {.counter = 1,}); + /* typedef with 0 value should be printed at toplevel */ + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}", + {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME, + "{}", {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0, +"(atomic_t){\n" +"}", + {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO, + "(atomic_t){.counter = (int)0,}", + {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "{0,}", {.counter = 0,}); + TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO, +"(atomic_t){\n" +" .counter = (int)0,\n" +"}", + { .counter = 0,}); + + /* overflow should show type but not value since it overflows */ + TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1, + "(atomic_t){\n", { .counter = 1}); +} + +static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + /* enum where enum value does (and does not) exist */ + TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, + "(enum bpf_cmd)BPF_MAP_CREATE", 0); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME, + "BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0, + "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "BPF_MAP_CREATE", 0); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_ZERO, + "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "BPF_MAP_CREATE", BPF_MAP_CREATE); + TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, + BTF_F_COMPACT | BTF_F_NONAME, + "2000", 2000); + TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0, + "(enum bpf_cmd)2000", 2000); + + TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd, + sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE); +} + +static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + char zero_data[512] = { }; + char type_data[512]; + void *fops = type_data; + void *skb = type_data; + size_t type_sz; + __s32 type_id; + char *cmpstr; + int ret; + + memset(type_data, 255, sizeof(type_data)); + + /* simple struct */ + TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT, + {.name_off = (__u32)3,.val = (__s32)-1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME, + "{3,-1,}", + { .name_off = 3, .val = -1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0, +"(struct btf_enum){\n" +" .name_off = (__u32)3,\n" +" .val = (__s32)-1,\n" +"}", + { .name_off = 3, .val = -1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME, + "{-1,}", + { .name_off = 0, .val = -1,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO, + "{0,-1,}", + { .name_off = 0, .val = -1,}); + /* empty struct should be printed */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT, + "(struct btf_enum){}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_NONAME, + "{}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0, +"(struct btf_enum){\n" +"}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_COMPACT | BTF_F_ZERO, + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}", + { .name_off = 0, .val = 0,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, + BTF_F_ZERO, +"(struct btf_enum){\n" +" .name_off = (__u32)0,\n" +" .val = (__s32)0,\n" +"}", + { .name_off = 0, .val = 0,}); + + /* struct with pointers */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT, + "(struct list_head){.next = (struct list_head *)0x1,}", + { .next = (struct list_head *)1 }); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0, +"(struct list_head){\n" +" .next = (struct list_head *)0x1,\n" +"}", + { .next = (struct list_head *)1 }); + /* NULL pointer should not be displayed */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT, + "(struct list_head){}", + { .next = (struct list_head *)0 }); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0, +"(struct list_head){\n" +"}", + { .next = (struct list_head *)0 }); + + /* struct with function pointers */ + type_id = btf__find_by_name(btf, "file_operations"); + if (ASSERT_GT(type_id, 0, "find type id")) { + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + + ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts); + ASSERT_EQ(ret, type_sz, + "unexpected return value dumping file_operations"); + cmpstr = +"(struct file_operations){\n" +" .owner = (struct module *)0xffffffffffffffff,\n" +" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,"; + + ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations"); + } + + /* struct with char array */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, + "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}", + { .name = "foo",}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, + BTF_F_COMPACT | BTF_F_NONAME, + "{['f','o','o',],}", + {.name = "foo",}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0, +"(struct bpf_prog_info){\n" +" .name = (char[16])[\n" +" 'f',\n" +" 'o',\n" +" 'o',\n" +" ],\n" +"}", + {.name = "foo",}); + /* leading null char means do not display string */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, + "(struct bpf_prog_info){}", + {.name = {'\0', 'f', 'o', 'o'}}); + /* handle non-printable characters */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT, + "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}", + { .name = {1, 2, 3, 0}}); + + /* struct with non-char array */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT, + "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}", + { .cb = {1, 2, 3, 4, 5,},}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, + BTF_F_COMPACT | BTF_F_NONAME, + "{[1,2,3,4,5,],}", + { .cb = { 1, 2, 3, 4, 5},}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0, +"(struct __sk_buff){\n" +" .cb = (__u32[5])[\n" +" 1,\n" +" 2,\n" +" 3,\n" +" 4,\n" +" 5,\n" +" ],\n" +"}", + { .cb = { 1, 2, 3, 4, 5},}); + /* For non-char, arrays, show non-zero values only */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT, + "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}", + { .cb = { 0, 0, 1, 0, 0},}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0, +"(struct __sk_buff){\n" +" .cb = (__u32[5])[\n" +" 0,\n" +" 0,\n" +" 1,\n" +" 0,\n" +" 0,\n" +" ],\n" +"}", + { .cb = { 0, 0, 1, 0, 0},}); + + /* struct with bitfields */ + TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT, + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, + BTF_F_COMPACT | BTF_F_NONAME, + "{1,0x2,0x3,4,5,}", + { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4, + .imm = 5,}); + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0, +"(struct bpf_insn){\n" +" .code = (__u8)1,\n" +" .dst_reg = (__u8)0x2,\n" +" .src_reg = (__u8)0x3,\n" +" .off = (__s16)4,\n" +" .imm = (__s32)5,\n" +"}", + {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5}); + + /* zeroed bitfields should not be displayed */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT, + "(struct bpf_insn){.dst_reg = (__u8)0x1,}", + { .code = 0, .dst_reg = 1}); + + /* struct with enum bitfield */ + type_id = btf__find_by_name(btf, "fs_context"); + if (ASSERT_GT(type_id, 0, "find fs_context")) { + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + + opts.emit_zeroes = true; + ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts); + ASSERT_EQ(ret, type_sz, + "unexpected return value dumping fs_context"); + + ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL, + "bitfield value not present"); + } + + /* struct with nested anon union */ + TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT, + "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}", + { .op = 1, .args = { 1, 2, 3, 4}}); + + /* union with nested struct */ + TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT, + "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}", + { .map = { .map_fd = 1 }}); + + /* struct skb with nested structs/unions; because type output is so + * complex, we don't do a string comparison, just verify we return + * the type size as the amount of data displayed. + */ + type_id = btf__find_by_name(btf, "sk_buff"); + if (ASSERT_GT(type_id, 0, "find struct sk_buff")) { + type_sz = btf__resolve_size(btf, type_id); + str[0] = '\0'; + + ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts); + ASSERT_EQ(ret, type_sz, + "unexpected return value dumping sk_buff"); + } + + /* overflow bpf_sock_ops struct with final element nonzero/zero. + * Regardless of the value of the final field, we don't have all the + * data we need to display it, so we should trigger an overflow. + * In other words oveflow checking should trump "is field zero?" + * checks because if we've overflowed, it shouldn't matter what the + * field is - we can't trust its value so shouldn't display it. + */ + TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, + sizeof(struct bpf_sock_ops) - 1, + "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", + { .op = 1, .skb_tcp_flags = 2}); + TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops, + sizeof(struct bpf_sock_ops) - 1, + "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n", + { .op = 1, .skb_tcp_flags = 0}); +} + +static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, + char *str) +{ + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT, + "int cpu_number = (int)100", 100); + TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT, + "static int cpu_profile_flip = (int)2", 2); +} + +static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, + const char *name, const char *expected_val, + void *data, size_t data_sz) +{ + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + int ret = 0, cmp; + size_t secsize; + __s32 type_id; + + opts.compact = true; + + type_id = btf__find_by_name(btf, name); + if (!ASSERT_GT(type_id, 0, "find type id")) + return; + + secsize = btf__resolve_size(btf, type_id); + ASSERT_EQ(secsize, 0, "verify section size"); + + str[0] = '\0'; + ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts); + ASSERT_EQ(ret, 0, "unexpected return value"); + + cmp = strcmp(str, expected_val); + ASSERT_EQ(cmp, 0, "ensure expected/actual match"); +} + +static void test_btf_dump_datasec_data(char *str) +{ + struct btf *btf = btf__parse("xdping_kern.o", NULL); + struct btf_dump_opts opts = { .ctx = str }; + char license[4] = "GPL"; + struct btf_dump *d; + + if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found")) + return; + + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + test_btf_datasec(btf, d, str, "license", + "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];", + license, sizeof(license)); +} + void test_btf_dump() { + char str[STRSIZE]; + struct btf_dump_opts opts = { .ctx = str }; + struct btf_dump *d; + struct btf *btf; int i; for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) { @@ -245,4 +831,33 @@ void test_btf_dump() { } if (test__start_subtest("btf_dump: incremental")) test_btf_dump_incremental(); + + btf = libbpf_find_kernel_btf(); + if (!ASSERT_OK_PTR(btf, "no kernel BTF found")) + return; + + d = btf_dump__new(btf, NULL, &opts, btf_dump_snprintf); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Verify type display for various types. */ + if (test__start_subtest("btf_dump: int_data")) + test_btf_dump_int_data(btf, d, str); + if (test__start_subtest("btf_dump: float_data")) + test_btf_dump_float_data(btf, d, str); + if (test__start_subtest("btf_dump: char_data")) + test_btf_dump_char_data(btf, d, str); + if (test__start_subtest("btf_dump: typedef_data")) + test_btf_dump_typedef_data(btf, d, str); + if (test__start_subtest("btf_dump: enum_data")) + test_btf_dump_enum_data(btf, d, str); + if (test__start_subtest("btf_dump: struct_data")) + test_btf_dump_struct_data(btf, d, str); + if (test__start_subtest("btf_dump: var_data")) + test_btf_dump_var_data(btf, d, str); + btf_dump__free(d); + btf__free(btf); + + if (test__start_subtest("btf_dump: datasec_data")) + test_btf_dump_datasec_data(str); } diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c index 981c251453d9..3d4b2a358d47 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -53,8 +53,8 @@ void test_core_autosize(void) char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; int err, fd = -1, zero = 0; int char_id, short_id, int_id, long_long_id, void_ptr_id, id; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); struct test_core_autosize* skel = NULL; - struct bpf_object_load_attr load_attr = {}; struct bpf_program *prog; struct bpf_map *bss_map; struct btf *btf = NULL; @@ -125,9 +125,10 @@ void test_core_autosize(void) fd = -1; /* open and load BPF program with custom BTF as the kernel BTF */ - skel = test_core_autosize__open(); + open_opts.btf_custom_path = btf_file; + skel = test_core_autosize__open_opts(&open_opts); if (!ASSERT_OK_PTR(skel, "skel_open")) - return; + goto cleanup; /* disable handle_signed() for now */ prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); @@ -135,9 +136,7 @@ void test_core_autosize(void) goto cleanup; bpf_program__set_autoload(prog, false); - load_attr.obj = skel->obj; - load_attr.target_btf_path = btf_file; - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(skel->obj); if (!ASSERT_OK(err, "prog_load")) goto cleanup; @@ -204,14 +203,13 @@ void test_core_autosize(void) skel = NULL; /* now re-load with handle_signed() enabled, it should fail loading */ - skel = test_core_autosize__open(); + open_opts.btf_custom_path = btf_file; + skel = test_core_autosize__open_opts(&open_opts); if (!ASSERT_OK_PTR(skel, "skel_open")) - return; + goto cleanup; - load_attr.obj = skel->obj; - load_attr.target_btf_path = btf_file; - err = bpf_object__load_xattr(&load_attr); - if (!ASSERT_ERR(err, "bad_prog_load")) + err = test_core_autosize__load(skel); + if (!ASSERT_ERR(err, "skel_load")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index d02e064c535f..4739b15b2a97 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -816,7 +816,7 @@ static size_t roundup_page(size_t sz) void test_core_reloc(void) { const size_t mmap_sz = roundup_page(sizeof(struct data)); - struct bpf_object_load_attr load_attr = {}; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); struct core_reloc_test_case *test_case; const char *tp_name, *probe_name; int err, i, equal; @@ -846,9 +846,16 @@ void test_core_reloc(void) continue; } - obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); + if (test_case->btf_src_file) { + err = access(test_case->btf_src_file, R_OK); + if (!ASSERT_OK(err, "btf_src_file")) + goto cleanup; + } + + open_opts.btf_custom_path = test_case->btf_src_file; + obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts); if (!ASSERT_OK_PTR(obj, "obj_open")) - continue; + goto cleanup; probe_name = "raw_tracepoint/sys_enter"; tp_name = "sys_enter"; @@ -862,17 +869,7 @@ void test_core_reloc(void) "prog '%s' not found\n", probe_name)) goto cleanup; - - if (test_case->btf_src_file) { - err = access(test_case->btf_src_file, R_OK); - if (!ASSERT_OK(err, "btf_src_file")) - goto cleanup; - } - - load_attr.obj = obj; - load_attr.log_level = 0; - load_attr.target_btf_path = test_case->btf_src_file; - err = bpf_object__load_xattr(&load_attr); + err = bpf_object__load(obj); if (err) { if (!test_case->fails) ASSERT_OK(err, "obj_load"); diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index 088b3653610d..02a465f36d59 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -17,6 +17,7 @@ void test_get_func_ip_test(void) */ #ifndef __x86_64__ bpf_program__set_autoload(skel->progs.test6, false); + bpf_program__set_autoload(skel->progs.test7, false); #endif err = get_func_ip_test__load(skel); @@ -46,6 +47,7 @@ void test_get_func_ip_test(void) ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); #ifdef __x86_64__ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result"); + ASSERT_EQ(skel->bss->test7_result, 1, "test7_result"); #endif cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c new file mode 100644 index 000000000000..6ede48bde91b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/sysinfo.h> +#include <test_progs.h> +#include "network_helpers.h" +#include "netcnt_prog.skel.h" +#include "netcnt_common.h" + +#define CG_NAME "/netcnt" + +void test_netcnt(void) +{ + union percpu_net_cnt *percpu_netcnt = NULL; + struct bpf_cgroup_storage_key key; + int map_fd, percpu_map_fd; + struct netcnt_prog *skel; + unsigned long packets; + union net_cnt netcnt; + unsigned long bytes; + int cpu, nproc; + int cg_fd = -1; + char cmd[128]; + + skel = netcnt_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load")) + return; + + nproc = get_nprocs_conf(); + percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); + if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)")) + goto err; + + cg_fd = test__join_cgroup(CG_NAME); + if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup")) + goto err; + + skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd); + if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt, + "attach_cgroup(bpf_nextcnt)")) + goto err; + + snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6)); + ASSERT_OK(system(cmd), cmd); + + map_fd = bpf_map__fd(skel->maps.netcnt); + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key")) + goto err; + + if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)")) + goto err; + + percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt); + if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]), + "bpf_map_lookup_elem(percpu_netcnt)")) + goto err; + + /* Some packets can be still in per-cpu cache, but not more than + * MAX_PERCPU_PACKETS. + */ + packets = netcnt.packets; + bytes = netcnt.bytes; + for (cpu = 0; cpu < nproc; cpu++) { + ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS"); + + packets += percpu_netcnt[cpu].packets; + bytes += percpu_netcnt[cpu].bytes; + } + + /* No packets should be lost */ + ASSERT_EQ(packets, 10000, "packets"); + + /* Let's check that bytes counter matches the number of packets + * multiplied by the size of ipv6 ICMP packet. + */ + ASSERT_EQ(bytes, packets * 104, "bytes"); + +err: + if (cg_fd != -1) + close(cg_fd); + free(percpu_netcnt); + netcnt_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index fcf54b3a1dd0..d4b953ae3407 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -125,6 +125,10 @@ void test_pinning(void) if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) goto out; + /* get pinning path */ + if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path")) + goto out; + /* set pinning path of other map and re-pin all */ map = bpf_object__find_map_by_name(obj, "nopinmap"); if (CHECK(!map, "find map", "NULL map")) @@ -134,6 +138,11 @@ void test_pinning(void) if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) goto out; + /* get pinning path after set */ + if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath, + "get pin path after set")) + goto out; + /* should only pin the one unpinned map */ err = bpf_object__pin_maps(obj, NULL); if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno)) diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index de2688166696..4e91f4d6466c 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -34,8 +34,8 @@ void test_reference_tracking(void) if (!test__start_subtest(title)) continue; - /* Expect verifier failure if test name has 'fail' */ - if (strstr(title, "fail") != NULL) { + /* Expect verifier failure if test name has 'err' */ + if (strstr(title, "err_") != NULL) { libbpf_print_fn_t old_print_fn; old_print_fn = libbpf_set_print(NULL); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 5703c918812b..e7201ba29ccd 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -13,15 +13,16 @@ #define _GNU_SOURCE #include <arpa/inet.h> +#include <linux/if.h> +#include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> -#include <linux/if_tun.h> -#include <linux/if.h> #include <sched.h> #include <stdbool.h> #include <stdio.h> -#include <sys/stat.h> #include <sys/mount.h> +#include <sys/stat.h> +#include <unistd.h> #include "test_progs.h" #include "network_helpers.h" @@ -391,9 +392,7 @@ done: static int test_ping(int family, const char *addr) { - const char *ping = family == AF_INET6 ? "ping6" : "ping"; - - SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping, addr); + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); return 0; fail: return -1; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c new file mode 100644 index 000000000000..6b186b4238d0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 + +/** + * Test XDP bonding support + * + * Sets up two bonded veth pairs between two fresh namespaces + * and verifies that XDP_TX program loaded on a bond device + * are correctly loaded onto the slave devices and XDP_TX'd + * packets are balanced using bonding. + */ + +#define _GNU_SOURCE +#include <sched.h> +#include <net/if.h> +#include <linux/if_link.h> +#include "test_progs.h" +#include "network_helpers.h" +#include <linux/if_bonding.h> +#include <linux/limits.h> +#include <linux/udp.h> + +#include "xdp_dummy.skel.h" +#include "xdp_redirect_multi_kern.skel.h" +#include "xdp_tx.skel.h" + +#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55} +#define BOND1_MAC_STR "00:11:22:33:44:55" +#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66} +#define BOND2_MAC_STR "00:22:33:44:55:66" +#define NPACKETS 100 + +static int root_netns_fd = -1; + +static void restore_root_netns(void) +{ + ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns"); +} + +static int setns_by_name(char *name) +{ + int nsfd, err; + char nspath[PATH_MAX]; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (nsfd < 0) + return -1; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + return err; +} + +static int get_rx_packets(const char *iface) +{ + FILE *f; + char line[512]; + int iface_len = strlen(iface); + + f = fopen("/proc/net/dev", "r"); + if (!f) + return -1; + + while (fgets(line, sizeof(line), f)) { + char *p = line; + + while (*p == ' ') + p++; /* skip whitespace */ + if (!strncmp(p, iface, iface_len)) { + p += iface_len; + if (*p++ != ':') + continue; + while (*p == ' ') + p++; /* skip whitespace */ + while (*p && *p != ' ') + p++; /* skip rx bytes */ + while (*p == ' ') + p++; /* skip whitespace */ + fclose(f); + return atoi(p); + } + } + fclose(f); + return -1; +} + +#define MAX_BPF_LINKS 8 + +struct skeletons { + struct xdp_dummy *xdp_dummy; + struct xdp_tx *xdp_tx; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + + int nlinks; + struct bpf_link *links[MAX_BPF_LINKS]; +}; + +static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface) +{ + struct bpf_link *link; + int ifindex; + + ifindex = if_nametoindex(iface); + if (!ASSERT_GT(ifindex, 0, "get ifindex")) + return -1; + + if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached")) + return -1; + + link = bpf_program__attach_xdp(prog, ifindex); + if (!ASSERT_OK_PTR(link, "attach xdp program")) + return -1; + + skeletons->links[skeletons->nlinks++] = link; + return 0; +} + +enum { + BOND_ONE_NO_ATTACH = 0, + BOND_BOTH_AND_ATTACH, +}; + +static const char * const mode_names[] = { + [BOND_MODE_ROUNDROBIN] = "balance-rr", + [BOND_MODE_ACTIVEBACKUP] = "active-backup", + [BOND_MODE_XOR] = "balance-xor", + [BOND_MODE_BROADCAST] = "broadcast", + [BOND_MODE_8023AD] = "802.3ad", + [BOND_MODE_TLB] = "balance-tlb", + [BOND_MODE_ALB] = "balance-alb", +}; + +static const char * const xmit_policy_names[] = { + [BOND_XMIT_POLICY_LAYER2] = "layer2", + [BOND_XMIT_POLICY_LAYER34] = "layer3+4", + [BOND_XMIT_POLICY_LAYER23] = "layer2+3", + [BOND_XMIT_POLICY_ENCAP23] = "encap2+3", + [BOND_XMIT_POLICY_ENCAP34] = "encap3+4", +}; + +static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, + int bond_both_attach) +{ +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + return -1; \ + }) + + SYS("ip netns add ns_dst"); + SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); + SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); + + SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s", + mode_names[mode], xmit_policy_names[xmit_policy]); + SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); + SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", + mode_names[mode], xmit_policy_names[xmit_policy]); + SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); + + SYS("ip link set veth1_1 master bond1"); + if (bond_both_attach == BOND_BOTH_AND_ATTACH) { + SYS("ip link set veth1_2 master bond1"); + } else { + SYS("ip link set veth1_2 up addrgenmode none"); + + if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2")) + return -1; + } + + SYS("ip -netns ns_dst link set veth2_1 master bond2"); + + if (bond_both_attach == BOND_BOTH_AND_ATTACH) + SYS("ip -netns ns_dst link set veth2_2 master bond2"); + else + SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none"); + + /* Load a dummy program on sending side as with veth peer needs to have a + * XDP program loaded as well. + */ + if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1")) + return -1; + + if (bond_both_attach == BOND_BOTH_AND_ATTACH) { + if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst")) + return -1; + + if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2")) + return -1; + + restore_root_netns(); + } + + return 0; + +#undef SYS +} + +static void bonding_cleanup(struct skeletons *skeletons) +{ + restore_root_netns(); + while (skeletons->nlinks) { + skeletons->nlinks--; + bpf_link__destroy(skeletons->links[skeletons->nlinks]); + } + ASSERT_OK(system("ip link delete bond1"), "delete bond1"); + ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1"); + ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2"); + ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst"); +} + +static int send_udp_packets(int vary_dst_ip) +{ + struct ethhdr eh = { + .h_source = BOND1_MAC, + .h_dest = BOND2_MAC, + .h_proto = htons(ETH_P_IP), + }; + uint8_t buf[128] = {}; + struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh)); + struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph)); + int i, s = -1; + int ifindex; + + s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_GE(s, 0, "socket")) + goto err; + + ifindex = if_nametoindex("bond1"); + if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex")) + goto err; + + memcpy(buf, &eh, sizeof(eh)); + iph->ihl = 5; + iph->version = 4; + iph->tos = 16; + iph->id = 1; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->saddr = 1; + iph->daddr = 2; + iph->tot_len = htons(sizeof(buf) - ETH_HLEN); + iph->check = 0; + + for (i = 1; i <= NPACKETS; i++) { + int n; + struct sockaddr_ll saddr_ll = { + .sll_ifindex = ifindex, + .sll_halen = ETH_ALEN, + .sll_addr = BOND2_MAC, + }; + + /* vary the UDP destination port for even distribution with roundrobin/xor modes */ + uh->dest++; + + if (vary_dst_ip) + iph->daddr++; + + n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll)); + if (!ASSERT_EQ(n, sizeof(buf), "sendto")) + goto err; + } + + return 0; + +err: + if (s >= 0) + close(s); + return -1; +} + +static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy) +{ + int bond1_rx; + + if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH)) + goto out; + + if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34)) + goto out; + + bond1_rx = get_rx_packets("bond1"); + ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets"); + + switch (mode) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_XOR: { + int veth1_rx = get_rx_packets("veth1_1"); + int veth2_rx = get_rx_packets("veth1_2"); + int diff = abs(veth1_rx - veth2_rx); + + ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets"); + + switch (xmit_policy) { + case BOND_XMIT_POLICY_LAYER2: + ASSERT_GE(diff, NPACKETS, + "expected packets on only one of the interfaces"); + break; + case BOND_XMIT_POLICY_LAYER23: + case BOND_XMIT_POLICY_LAYER34: + ASSERT_LT(diff, NPACKETS/2, + "expected even distribution of packets"); + break; + default: + PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy); + break; + } + break; + } + case BOND_MODE_ACTIVEBACKUP: { + int veth1_rx = get_rx_packets("veth1_1"); + int veth2_rx = get_rx_packets("veth1_2"); + int diff = abs(veth1_rx - veth2_rx); + + ASSERT_GE(diff, NPACKETS, + "expected packets on only one of the interfaces"); + break; + } + default: + PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy); + break; + } + +out: + bonding_cleanup(skeletons); +} + +/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding + * all the interfaces to it and checking that broadcasting won't send the packet + * to neither the ingress bond device (bond2) or its slave (veth2_1). + */ +static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons) +{ + static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"}; + int veth1_1_rx, veth1_2_rx; + int err; + + if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, + BOND_ONE_NO_ATTACH)) + goto out; + + + if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst")) + goto out; + + /* populate the devmap with the relevant interfaces */ + for (int i = 0; i < ARRAY_SIZE(ifaces); i++) { + int ifindex = if_nametoindex(ifaces[i]); + int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all); + + if (!ASSERT_GT(ifindex, 0, "could not get interface index")) + goto out; + + err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0); + if (!ASSERT_OK(err, "add interface to map_all")) + goto out; + } + + if (xdp_attach(skeletons, + skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog, + "bond2")) + goto out; + + restore_root_netns(); + + if (send_udp_packets(BOND_MODE_ROUNDROBIN)) + goto out; + + veth1_1_rx = get_rx_packets("veth1_1"); + veth1_2_rx = get_rx_packets("veth1_2"); + + ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1"); + ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2"); + +out: + restore_root_netns(); + bonding_cleanup(skeletons); +} + +/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */ +static void test_xdp_bonding_attach(struct skeletons *skeletons) +{ + struct bpf_link *link = NULL; + struct bpf_link *link2 = NULL; + int veth, bond; + int err; + + if (!ASSERT_OK(system("ip link add veth type veth"), "add veth")) + goto out; + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) + goto out; + + veth = if_nametoindex("veth"); + if (!ASSERT_GE(veth, 0, "if_nametoindex veth")) + goto out; + bond = if_nametoindex("bond"); + if (!ASSERT_GE(bond, 0, "if_nametoindex bond")) + goto out; + + /* enslaving with a XDP program loaded fails */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth); + if (!ASSERT_OK_PTR(link, "attach program to veth")) + goto out; + + err = system("ip link set veth master bond"); + if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail")) + goto out; + + bpf_link__destroy(link); + link = NULL; + + err = system("ip link set veth master bond"); + if (!ASSERT_OK(err, "set veth master")) + goto out; + + /* attaching to slave when master has no program is allowed */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth); + if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved")) + goto out; + + /* attaching to master not allowed when slave has program loaded */ + link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program")) + goto out; + + bpf_link__destroy(link); + link = NULL; + + /* attaching XDP program to master allowed when slave has no program */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + if (!ASSERT_OK_PTR(link, "attach program to master")) + goto out; + + /* attaching to slave not allowed when master has program loaded */ + link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + ASSERT_ERR_PTR(link2, "attach program to slave when master has program"); + +out: + bpf_link__destroy(link); + bpf_link__destroy(link2); + + system("ip link del veth"); + system("ip link del bond"); +} + +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level != LIBBPF_WARN) + vprintf(format, args); + return 0; +} + +struct bond_test_case { + char *name; + int mode; + int xmit_policy; +}; + +static struct bond_test_case bond_test_cases[] = { + { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, }, + { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 }, + + { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, }, + { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, }, + { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, +}; + +void test_xdp_bonding(void) +{ + libbpf_print_fn_t old_print_fn; + struct skeletons skeletons = {}; + int i; + + old_print_fn = libbpf_set_print(libbpf_debug_print); + + root_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) + goto out; + + skeletons.xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load")) + goto out; + + skeletons.xdp_tx = xdp_tx__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load")) + goto out; + + skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern, + "xdp_redirect_multi_kern__open_and_load")) + goto out; + + if (!test__start_subtest("xdp_bonding_attach")) + test_xdp_bonding_attach(&skeletons); + + for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { + struct bond_test_case *test_case = &bond_test_cases[i]; + + if (!test__start_subtest(test_case->name)) + test_xdp_bonding_with_mode( + &skeletons, + test_case->mode, + test_case->xmit_policy); + } + + if (!test__start_subtest("xdp_bonding_redirect_multi")) + test_xdp_bonding_redirect_multi(&skeletons); + +out: + xdp_dummy__destroy(skeletons.xdp_dummy); + xdp_tx__destroy(skeletons.xdp_tx); + xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern); + + libbpf_set_print(old_print_fn); + if (root_netns_fd >= 0) + close(root_netns_fd); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c new file mode 100644 index 000000000000..b77adfd55d73 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define bpf_tcp_sk(skc) ({ \ + struct sock_common *_skc = skc; \ + sk = NULL; \ + tp = NULL; \ + if (_skc) { \ + tp = bpf_skc_to_tcp_sock(_skc); \ + sk = (struct sock *)tp; \ + } \ + tp; \ +}) + +unsigned short reuse_listen_hport = 0; +unsigned short listen_hport = 0; +char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic"; +char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp"; +bool random_retry = false; + +static bool tcp_cc_eq(const char *a, const char *b) +{ + int i; + + for (i = 0; i < TCP_CA_NAME_MAX; i++) { + if (a[i] != b[i]) + return false; + if (!a[i]) + break; + } + + return true; +} + +SEC("iter/tcp") +int change_tcp_cc(struct bpf_iter__tcp *ctx) +{ + char cur_cc[TCP_CA_NAME_MAX]; + struct tcp_sock *tp; + struct sock *sk; + int ret; + + if (!bpf_tcp_sk(ctx->sk_common)) + return 0; + + if (sk->sk_family != AF_INET6 || + (sk->sk_state != TCP_LISTEN && + sk->sk_state != TCP_ESTABLISHED) || + (sk->sk_num != reuse_listen_hport && + sk->sk_num != listen_hport && + bpf_ntohs(sk->sk_dport) != listen_hport)) + return 0; + + if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION, + cur_cc, sizeof(cur_cc))) + return 0; + + if (!tcp_cc_eq(cur_cc, cubic_cc)) + return 0; + + if (random_retry && bpf_get_prandom_u32() % 4 == 1) + return 1; + + bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc)); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 2e4775c35414..92267abb462f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, } BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", - seq_num, src, srcp, destp, destp); + seq_num, src, srcp, dest, destp); BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", state, tp->write_seq - tp->snd_una, rx_queue, diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 01378911252b..3af0998a0623 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -5,6 +5,10 @@ #define AF_INET 2 #define AF_INET6 10 +#define SOL_TCP 6 +#define TCP_CONGESTION 13 +#define TCP_CA_NAME_MAX 16 + #define ICSK_TIME_RETRANS 1 #define ICSK_TIME_PROBE0 3 #define ICSK_TIME_LOSS_PROBE 5 @@ -32,6 +36,8 @@ #define ir_v6_rmt_addr req.__req_common.skc_v6_daddr #define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr +#define sk_num __sk_common.skc_num +#define sk_dport __sk_common.skc_dport #define sk_family __sk_common.skc_family #define sk_rmem_alloc sk_backlog.rmem_alloc #define sk_refcnt __sk_common.skc_refcnt diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c index acd587b6e859..a587aeca5ae0 100644 --- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -11,6 +11,7 @@ extern const void bpf_fentry_test3 __ksym; extern const void bpf_fentry_test4 __ksym; extern const void bpf_modify_return_test __ksym; extern const void bpf_fentry_test6 __ksym; +extern const void bpf_fentry_test7 __ksym; __u64 test1_result = 0; SEC("fentry/bpf_fentry_test1") @@ -71,3 +72,13 @@ int test6(struct pt_regs *ctx) test6_result = (const void *) addr == &bpf_fentry_test6 + 5; return 0; } + +__u64 test7_result = 0; +SEC("kprobe/bpf_fentry_test7+5") +int test7(struct pt_regs *ctx) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test7_result = (const void *) addr == &bpf_fentry_test7 + 5; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c index d071adf178bd..43649bce4c54 100644 --- a/tools/testing/selftests/bpf/progs/netcnt_prog.c +++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c @@ -13,21 +13,21 @@ struct { __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); __type(key, struct bpf_cgroup_storage_key); - __type(value, struct percpu_net_cnt); + __type(value, union percpu_net_cnt); } percpu_netcnt SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); __type(key, struct bpf_cgroup_storage_key); - __type(value, struct net_cnt); + __type(value, union net_cnt); } netcnt SEC(".maps"); SEC("cgroup/skb") int bpf_nextcnt(struct __sk_buff *skb) { - struct percpu_net_cnt *percpu_cnt; + union percpu_net_cnt *percpu_cnt; char fmt[] = "%d %llu %llu\n"; - struct net_cnt *cnt; + union net_cnt *cnt; __u64 ts, dt; int ret; diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c new file mode 100644 index 000000000000..703c08e06442 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Isovalent, Inc. */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct inner { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, int); + __uint(max_entries, 4); +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 0); /* This will make map creation to fail */ + __uint(key_size, sizeof(__u32)); + __array(values, struct inner); +} mim SEC(".maps"); + +SEC("xdp") +int xdp_noop0(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index e83d0b48d80c..8249075f088f 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_use_after_free") +SEC("classifier/err_use_after_free") int bpf_sk_lookup_uaf(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) return family; } -SEC("classifier/fail_modify_sk_pointer") +SEC("classifier/err_modify_sk_pointer") int bpf_sk_lookup_modptr(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_modify_sk_or_null_pointer") +SEC("classifier/err_modify_sk_or_null_pointer") int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_no_release") +SEC("classifier/err_no_release") int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_release_twice") +SEC("classifier/err_release_twice") int bpf_sk_lookup_test3(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_release_unchecked") +SEC("classifier/err_release_unchecked") int bpf_sk_lookup_test4(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("classifier/fail_no_release_subcall") +SEC("classifier/err_no_release_subcall") int bpf_sk_lookup_test5(struct __sk_buff *skb) { lookup_no_release(skb); diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c index 94e6c2b281cb..5f725c720e00 100644 --- a/tools/testing/selftests/bpf/progs/xdp_tx.c +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -3,7 +3,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> -SEC("tx") +SEC("xdp") int xdp_tx(struct xdp_md *xdp) { return XDP_TX; diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py new file mode 100755 index 000000000000..be54b7335a76 --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -0,0 +1,586 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# +# Copyright (C) 2021 Isovalent, Inc. + +import argparse +import re +import os, sys + +LINUX_ROOT = os.path.abspath(os.path.join(__file__, + os.pardir, os.pardir, os.pardir, os.pardir, os.pardir)) +BPFTOOL_DIR = os.path.join(LINUX_ROOT, 'tools/bpf/bpftool') +retval = 0 + +class BlockParser(object): + """ + A parser for extracting set of values from blocks such as enums. + @reader: a pointer to the open file to parse + """ + def __init__(self, reader): + self.reader = reader + + def search_block(self, start_marker): + """ + Search for a given structure in a file. + @start_marker: regex marking the beginning of a structure to parse + """ + offset = self.reader.tell() + array_start = re.search(start_marker, self.reader.read()) + if array_start is None: + raise Exception('Failed to find start of block') + self.reader.seek(offset + array_start.start()) + + def parse(self, pattern, end_marker): + """ + Parse a block and return a set of values. Values to extract must be + on separate lines in the file. + @pattern: pattern used to identify the values to extract + @end_marker: regex marking the end of the block to parse + """ + entries = set() + while True: + line = self.reader.readline() + if not line or re.match(end_marker, line): + break + capture = pattern.search(line) + if capture and pattern.groups >= 1: + entries.add(capture.group(1)) + return entries + +class ArrayParser(BlockParser): + """ + A parser for extracting dicionaries of values from some BPF-related arrays. + @reader: a pointer to the open file to parse + @array_name: name of the array to parse + """ + end_marker = re.compile('^};') + + def __init__(self, reader, array_name): + self.array_name = array_name + self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') + super().__init__(reader) + + def search_block(self): + """ + Search for the given array in a file. + """ + super().search_block(self.start_marker); + + def parse(self): + """ + Parse a block and return data as a dictionary. Items to extract must be + on separate lines in the file. + """ + pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') + entries = {} + while True: + line = self.reader.readline() + if line == '' or re.match(self.end_marker, line): + break + capture = pattern.search(line) + if capture: + entries[capture.group(1)] = capture.group(2) + return entries + +class InlineListParser(BlockParser): + """ + A parser for extracting set of values from inline lists. + """ + def parse(self, pattern, end_marker): + """ + Parse a block and return a set of values. Multiple values to extract + can be on a same line in the file. + @pattern: pattern used to identify the values to extract + @end_marker: regex marking the end of the block to parse + """ + entries = set() + while True: + line = self.reader.readline() + if not line: + break + entries.update(pattern.findall(line)) + if re.search(end_marker, line): + break + return entries + +class FileExtractor(object): + """ + A generic reader for extracting data from a given file. This class contains + several helper methods that wrap arround parser objects to extract values + from different structures. + This class does not offer a way to set a filename, which is expected to be + defined in children classes. + """ + def __init__(self): + self.reader = open(self.filename, 'r') + + def close(self): + """ + Close the file used by the parser. + """ + self.reader.close() + + def reset_read(self): + """ + Reset the file position indicator for this parser. This is useful when + parsing several structures in the file without respecting the order in + which those structures appear in the file. + """ + self.reader.seek(0) + + def get_types_from_array(self, array_name): + """ + Search for and parse an array associating names to BPF_* enum members, + for example: + + const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + }; + + Return a dictionary with the enum member names as keys and the + associated names as values, for example: + + {'BPF_PROG_TYPE_UNSPEC': 'unspec', + 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', + 'BPF_PROG_TYPE_KPROBE': 'kprobe'} + + @array_name: name of the array to parse + """ + array_parser = ArrayParser(self.reader, array_name) + array_parser.search_block() + return array_parser.parse() + + def get_enum(self, enum_name): + """ + Search for and parse an enum containing BPF_* members, for example: + + enum bpf_prog_type { + BPF_PROG_TYPE_UNSPEC, + BPF_PROG_TYPE_SOCKET_FILTER, + BPF_PROG_TYPE_KPROBE, + }; + + Return a set containing all member names, for example: + + {'BPF_PROG_TYPE_UNSPEC', + 'BPF_PROG_TYPE_SOCKET_FILTER', + 'BPF_PROG_TYPE_KPROBE'} + + @enum_name: name of the enum to parse + """ + start_marker = re.compile(f'enum {enum_name} {{\n') + pattern = re.compile('^\s*(BPF_\w+),?$') + end_marker = re.compile('^};') + parser = BlockParser(self.reader) + parser.search_block(start_marker) + return parser.parse(pattern, end_marker) + + def __get_description_list(self, start_marker, pattern, end_marker): + parser = InlineListParser(self.reader) + parser.search_block(start_marker) + return parser.parse(pattern, end_marker) + + def get_rst_list(self, block_name): + """ + Search for and parse a list of type names from RST documentation, for + example: + + | *TYPE* := { + | **socket** | **kprobe** | + | **kretprobe** + | } + + Return a set containing all type names, for example: + + {'socket', 'kprobe', 'kretprobe'} + + @block_name: name of the blog to parse, 'TYPE' in the example + """ + start_marker = re.compile(f'\*{block_name}\* := {{') + pattern = re.compile('\*\*([\w/-]+)\*\*') + end_marker = re.compile('}\n') + return self.__get_description_list(start_marker, pattern, end_marker) + + def get_help_list(self, block_name): + """ + Search for and parse a list of type names from a help message in + bpftool, for example: + + " TYPE := { socket | kprobe |\\n" + " kretprobe }\\n" + + Return a set containing all type names, for example: + + {'socket', 'kprobe', 'kretprobe'} + + @block_name: name of the blog to parse, 'TYPE' in the example + """ + start_marker = re.compile(f'"\s*{block_name} := {{') + pattern = re.compile('([\w/]+) [|}]') + end_marker = re.compile('}') + return self.__get_description_list(start_marker, pattern, end_marker) + + def get_help_list_macro(self, macro): + """ + Search for and parse a list of values from a help message starting with + a macro in bpftool, for example: + + " " HELP_SPEC_OPTIONS " |\\n" + " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n" + + Return a set containing all item names, for example: + + {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'} + + @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example + """ + start_marker = re.compile(f'"\s*{macro}\s*" [|}}]') + pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])') + end_marker = re.compile('}\\\\n') + return self.__get_description_list(start_marker, pattern, end_marker) + + def default_options(self): + """ + Return the default options contained in HELP_SPEC_OPTIONS + """ + return { '-j', '--json', '-p', '--pretty', '-d', '--debug' } + + def get_bashcomp_list(self, block_name): + """ + Search for and parse a list of type names from a variable in bash + completion file, for example: + + local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\ + kretprobe' + + Return a set containing all type names, for example: + + {'socket', 'kprobe', 'kretprobe'} + + @block_name: name of the blog to parse, 'TYPE' in the example + """ + start_marker = re.compile(f'local {block_name}=\'') + pattern = re.compile('(?:.*=\')?([\w/]+)') + end_marker = re.compile('\'$') + return self.__get_description_list(start_marker, pattern, end_marker) + +class SourceFileExtractor(FileExtractor): + """ + An abstract extractor for a source file with usage message. + This class does not offer a way to set a filename, which is expected to be + defined in children classes. + """ + def get_options(self): + return self.default_options().union(self.get_help_list_macro('HELP_SPEC_OPTIONS')) + +class ProgFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's prog.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'prog.c') + + def get_prog_types(self): + return self.get_types_from_array('prog_type_name') + + def get_attach_types(self): + return self.get_types_from_array('attach_type_strings') + + def get_prog_attach_help(self): + return self.get_help_list('ATTACH_TYPE') + +class MapFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's map.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'map.c') + + def get_map_types(self): + return self.get_types_from_array('map_type_name') + + def get_map_help(self): + return self.get_help_list('TYPE') + +class CgroupFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's cgroup.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'cgroup.c') + + def get_prog_attach_help(self): + return self.get_help_list('ATTACH_TYPE') + +class CommonFileExtractor(SourceFileExtractor): + """ + An extractor for bpftool's common.c. + """ + filename = os.path.join(BPFTOOL_DIR, 'common.c') + + def __init__(self): + super().__init__() + self.attach_types = {} + + def get_attach_types(self): + if not self.attach_types: + self.attach_types = self.get_types_from_array('attach_type_name') + return self.attach_types + + def get_cgroup_attach_types(self): + if not self.attach_types: + self.get_attach_types() + cgroup_types = {} + for (key, value) in self.attach_types.items(): + if key.find('BPF_CGROUP') != -1: + cgroup_types[key] = value + return cgroup_types + +class GenericSourceExtractor(SourceFileExtractor): + """ + An extractor for generic source code files. + """ + filename = "" + + def __init__(self, filename): + self.filename = os.path.join(BPFTOOL_DIR, filename) + super().__init__() + +class BpfHeaderExtractor(FileExtractor): + """ + An extractor for the UAPI BPF header. + """ + filename = os.path.join(LINUX_ROOT, 'tools/include/uapi/linux/bpf.h') + + def get_prog_types(self): + return self.get_enum('bpf_prog_type') + + def get_map_types(self): + return self.get_enum('bpf_map_type') + + def get_attach_types(self): + return self.get_enum('bpf_attach_type') + +class ManPageExtractor(FileExtractor): + """ + An abstract extractor for an RST documentation page. + This class does not offer a way to set a filename, which is expected to be + defined in children classes. + """ + def get_options(self): + return self.get_rst_list('OPTIONS') + +class ManProgExtractor(ManPageExtractor): + """ + An extractor for bpftool-prog.rst. + """ + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-prog.rst') + + def get_attach_types(self): + return self.get_rst_list('ATTACH_TYPE') + +class ManMapExtractor(ManPageExtractor): + """ + An extractor for bpftool-map.rst. + """ + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-map.rst') + + def get_map_types(self): + return self.get_rst_list('TYPE') + +class ManCgroupExtractor(ManPageExtractor): + """ + An extractor for bpftool-cgroup.rst. + """ + filename = os.path.join(BPFTOOL_DIR, 'Documentation/bpftool-cgroup.rst') + + def get_attach_types(self): + return self.get_rst_list('ATTACH_TYPE') + +class ManGenericExtractor(ManPageExtractor): + """ + An extractor for generic RST documentation pages. + """ + filename = "" + + def __init__(self, filename): + self.filename = os.path.join(BPFTOOL_DIR, filename) + super().__init__() + +class BashcompExtractor(FileExtractor): + """ + An extractor for bpftool's bash completion file. + """ + filename = os.path.join(BPFTOOL_DIR, 'bash-completion/bpftool') + + def get_prog_attach_types(self): + return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') + + def get_map_types(self): + return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES') + + def get_cgroup_attach_types(self): + return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES') + +def verify(first_set, second_set, message): + """ + Print all values that differ between two sets. + @first_set: one set to compare + @second_set: another set to compare + @message: message to print for values belonging to only one of the sets + """ + global retval + diff = first_set.symmetric_difference(second_set) + if diff: + print(message, diff) + retval = 1 + +def main(): + # No arguments supported at this time, but print usage for -h|--help + argParser = argparse.ArgumentParser(description=""" + Verify that bpftool's code, help messages, documentation and bash + completion are all in sync on program types, map types, attach types, and + options. Also check that bpftool is in sync with the UAPI BPF header. + """) + args = argParser.parse_args() + + # Map types (enum) + + bpf_info = BpfHeaderExtractor() + ref = bpf_info.get_map_types() + + map_info = MapFileExtractor() + source_map_items = map_info.get_map_types() + map_types_enum = set(source_map_items.keys()) + + verify(ref, map_types_enum, + f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') + + # Map types (names) + + source_map_types = set(source_map_items.values()) + source_map_types.discard('unspec') + + help_map_types = map_info.get_map_help() + help_map_options = map_info.get_options() + map_info.close() + + man_map_info = ManMapExtractor() + man_map_options = man_map_info.get_options() + man_map_types = man_map_info.get_map_types() + man_map_info.close() + + bashcomp_info = BashcompExtractor() + bashcomp_map_types = bashcomp_info.get_map_types() + + verify(source_map_types, help_map_types, + f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') + verify(source_map_types, man_map_types, + f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') + verify(help_map_options, man_map_options, + f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') + verify(source_map_types, bashcomp_map_types, + f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') + + # Program types (enum) + + ref = bpf_info.get_prog_types() + + prog_info = ProgFileExtractor() + prog_types = set(prog_info.get_prog_types().keys()) + + verify(ref, prog_types, + f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') + + # Attach types (enum) + + ref = bpf_info.get_attach_types() + bpf_info.close() + + common_info = CommonFileExtractor() + attach_types = common_info.get_attach_types() + + verify(ref, attach_types, + f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') + + # Attach types (names) + + source_prog_attach_types = set(prog_info.get_attach_types().values()) + + help_prog_attach_types = prog_info.get_prog_attach_help() + help_prog_options = prog_info.get_options() + prog_info.close() + + man_prog_info = ManProgExtractor() + man_prog_options = man_prog_info.get_options() + man_prog_attach_types = man_prog_info.get_attach_types() + man_prog_info.close() + + bashcomp_info.reset_read() # We stopped at map types, rewind + bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() + + verify(source_prog_attach_types, help_prog_attach_types, + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') + verify(source_prog_attach_types, man_prog_attach_types, + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') + verify(help_prog_options, man_prog_options, + f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') + verify(source_prog_attach_types, bashcomp_prog_attach_types, + f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') + + # Cgroup attach types + + source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) + common_info.close() + + cgroup_info = CgroupFileExtractor() + help_cgroup_attach_types = cgroup_info.get_prog_attach_help() + help_cgroup_options = cgroup_info.get_options() + cgroup_info.close() + + man_cgroup_info = ManCgroupExtractor() + man_cgroup_options = man_cgroup_info.get_options() + man_cgroup_attach_types = man_cgroup_info.get_attach_types() + man_cgroup_info.close() + + bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types() + bashcomp_info.close() + + verify(source_cgroup_attach_types, help_cgroup_attach_types, + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') + verify(source_cgroup_attach_types, man_cgroup_attach_types, + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') + verify(help_cgroup_options, man_cgroup_options, + f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') + verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, + f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') + + # Options for remaining commands + + for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]: + source_info = GenericSourceExtractor(cmd + '.c') + help_cmd_options = source_info.get_options() + source_info.close() + + man_cmd_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool-' + cmd + '.rst')) + man_cmd_options = man_cmd_info.get_options() + man_cmd_info.close() + + verify(help_cmd_options, man_cmd_options, + f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):') + + source_main_info = GenericSourceExtractor('main.c') + help_main_options = source_main_info.get_options() + source_main_info.close() + + man_main_info = ManGenericExtractor(os.path.join('Documentation', 'bpftool.rst')) + man_main_options = man_main_info.get_options() + man_main_info.close() + + verify(help_main_options, man_main_options, + f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):') + + sys.exit(retval) + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 30cbf5d98f7d..14cea869235b 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -764,8 +764,8 @@ static void test_sockmap(unsigned int tasks, void *data) udp = socket(AF_INET, SOCK_DGRAM, 0); i = 0; err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY); - if (!err) { - printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n", + if (err) { + printf("Failed socket update SOCK_DGRAM '%i:%i'\n", i, udp); goto out_sockmap; } @@ -1153,12 +1153,17 @@ out_sockmap: } #define MAPINMAP_PROG "./test_map_in_map.o" +#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o" static void test_map_in_map(void) { struct bpf_object *obj; struct bpf_map *map; int mim_fd, fd, err; int pos = 0; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + __u32 id = 0; + libbpf_print_fn_t old_print_fn; obj = bpf_object__open(MAPINMAP_PROG); @@ -1228,11 +1233,72 @@ static void test_map_in_map(void) } close(fd); + fd = -1; bpf_object__close(obj); + + /* Test that failing bpf_object__create_map() destroys the inner map */ + obj = bpf_object__open(MAPINMAP_INVALID_PROG); + err = libbpf_get_error(obj); + if (err) { + printf("Failed to load %s program: %d %d", + MAPINMAP_INVALID_PROG, err, errno); + goto out_map_in_map; + } + + map = bpf_object__find_map_by_name(obj, "mim"); + if (!map) { + printf("Failed to load array of maps from test prog\n"); + goto out_map_in_map; + } + + old_print_fn = libbpf_set_print(NULL); + + err = bpf_object__load(obj); + if (!err) { + printf("Loading obj supposed to fail\n"); + goto out_map_in_map; + } + + libbpf_set_print(old_print_fn); + + /* Iterate over all maps to check whether the internal map + * ("mim.internal") has been destroyed. + */ + while (true) { + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + printf("Failed to get next map: %d", errno); + goto out_map_in_map; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + printf("Failed to get map by id %u: %d", id, errno); + goto out_map_in_map; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + printf("Failed to get map info by fd %d: %d", fd, + errno); + goto out_map_in_map; + } + + if (!strcmp(info.name, "mim.inner")) { + printf("Inner map mim.inner was not destroyed\n"); + goto out_map_in_map; + } + } + return; out_map_in_map: - close(fd); + if (fd >= 0) + close(fd); exit(1); } diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c deleted file mode 100644 index a7b9a69f4fd5..000000000000 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> -#include <assert.h> -#include <sys/sysinfo.h> -#include <sys/time.h> - -#include <linux/bpf.h> -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "bpf_rlimit.h" -#include "netcnt_common.h" - -#define BPF_PROG "./netcnt_prog.o" -#define TEST_CGROUP "/test-network-counters/" - -static int bpf_find_map(const char *test, struct bpf_object *obj, - const char *name) -{ - struct bpf_map *map; - - map = bpf_object__find_map_by_name(obj, name); - if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); - return -1; - } - return bpf_map__fd(map); -} - -int main(int argc, char **argv) -{ - struct percpu_net_cnt *percpu_netcnt; - struct bpf_cgroup_storage_key key; - int map_fd, percpu_map_fd; - int error = EXIT_FAILURE; - struct net_cnt netcnt; - struct bpf_object *obj; - int prog_fd, cgroup_fd; - unsigned long packets; - unsigned long bytes; - int cpu, nproc; - __u32 prog_cnt; - - nproc = get_nprocs_conf(); - percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); - if (!percpu_netcnt) { - printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); - goto err; - } - - if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB, - &obj, &prog_fd)) { - printf("Failed to load bpf program\n"); - goto out; - } - - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - if (cgroup_fd < 0) - goto err; - - /* Attach bpf program */ - if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { - printf("Failed to attach bpf program"); - goto err; - } - - if (system("which ping6 &>/dev/null") == 0) - assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null")); - else - assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null")); - - if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, - &prog_cnt)) { - printf("Failed to query attached programs"); - goto err; - } - - map_fd = bpf_find_map(__func__, obj, "netcnt"); - if (map_fd < 0) { - printf("Failed to find bpf map with net counters"); - goto err; - } - - percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt"); - if (percpu_map_fd < 0) { - printf("Failed to find bpf map with percpu net counters"); - goto err; - } - - if (bpf_map_get_next_key(map_fd, NULL, &key)) { - printf("Failed to get key in cgroup storage\n"); - goto err; - } - - if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) { - printf("Failed to lookup cgroup storage\n"); - goto err; - } - - if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) { - printf("Failed to lookup percpu cgroup storage\n"); - goto err; - } - - /* Some packets can be still in per-cpu cache, but not more than - * MAX_PERCPU_PACKETS. - */ - packets = netcnt.packets; - bytes = netcnt.bytes; - for (cpu = 0; cpu < nproc; cpu++) { - if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) { - printf("Unexpected percpu value: %llu\n", - percpu_netcnt[cpu].packets); - goto err; - } - - packets += percpu_netcnt[cpu].packets; - bytes += percpu_netcnt[cpu].bytes; - } - - /* No packets should be lost */ - if (packets != 10000) { - printf("Unexpected packet count: %lu\n", packets); - goto err; - } - - /* Let's check that bytes counter matches the number of packets - * multiplied by the size of ipv6 ICMP packet. - */ - if (bytes != packets * 104) { - printf("Unexpected bytes count: %lu\n", bytes); - goto err; - } - - error = 0; - printf("test_netcnt:PASS\n"); - -err: - cleanup_cgroup_environment(); - free(percpu_netcnt); - -out: - return error; -} diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 8ef7f334e715..c8c2bf878f67 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -221,6 +221,18 @@ extern int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_STRNEQ(actual, expected, len, name) ({ \ + static int duration = 0; \ + const char *___act = actual; \ + const char *___exp = expected; \ + int ___len = len; \ + bool ___ok = strncmp(___act, ___exp, ___len) == 0; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual '%.*s' != expected '%.*s'\n", \ + (name), ___len, ___act, ___len, ___exp); \ + ___ok; \ +}) + #define ASSERT_OK(res, name) ({ \ static int duration = 0; \ long long ___res = (res); \ diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh index c9dde9b9d987..088fcad138c9 100755 --- a/tools/testing/selftests/bpf/test_tc_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh @@ -69,7 +69,7 @@ cleanup() { } server_listen() { - ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" & + ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" & server_pid=$! sleep 0.2 } diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index ba8ffcdaac30..995278e684b6 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy -ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx +ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy trap cleanup EXIT diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a3e593ddfafc..2debba4e8a3a 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -1,4 +1,233 @@ { + "map access: known scalar += value_ptr unknown vs const", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs unknown", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (ne)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr const vs const (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 2), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (eq)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (lt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ + "map access: known scalar += value_ptr unknown vs unknown (gt)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 1, 4), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + BPF_MOV64_IMM(BPF_REG_1, 6), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 5 }, + .fixup_map_array_48b = { 8 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths or scalars", + .result = ACCEPT, + .retval = 1, +}, +{ "map access: known scalar += value_ptr from different maps", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 06a351b4f93b..0709af0144c8 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -38,6 +38,7 @@ /x86_64/xen_vmcall_test /x86_64/xss_msr_test /x86_64/vmx_pmu_msrs_test +/access_tracking_perf_test /demand_paging_test /dirty_log_test /dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index b853be2ae3c6..5832f510a16c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -71,6 +71,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index a16c8f05366c..cc898181faab 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -1019,7 +1019,8 @@ static __u64 sve_rejects_set[] = { #define VREGS_SUBLIST \ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), } #define PMU_SUBLIST \ - { "pmu", .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } + { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \ + .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), } #define SVE_SUBLIST \ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \ diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c new file mode 100644 index 000000000000..e2baa187a21e --- /dev/null +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * access_tracking_perf_test + * + * Copyright (C) 2021, Google, Inc. + * + * This test measures the performance effects of KVM's access tracking. + * Access tracking is driven by the MMU notifiers test_young, clear_young, and + * clear_flush_young. These notifiers do not have a direct userspace API, + * however the clear_young notifier can be triggered by marking a pages as idle + * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to + * enable access tracking on guest memory. + * + * To measure performance this test runs a VM with a configurable number of + * vCPUs that each touch every page in disjoint regions of memory. Performance + * is measured in the time it takes all vCPUs to finish touching their + * predefined region. + * + * Note that a deterministic correctness test of access tracking is not possible + * by using page_idle as it exists today. This is for a few reasons: + * + * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This + * means subsequent guest accesses are not guaranteed to see page table + * updates made by KVM until some time in the future. + * + * 2. page_idle only operates on LRU pages. Newly allocated pages are not + * immediately allocated to LRU lists. Instead they are held in a "pagevec", + * which is drained to LRU lists some time in the future. There is no + * userspace API to force this drain to occur. + * + * These limitations are worked around in this test by using a large enough + * region of memory for each vCPU such that the number of translations cached in + * the TLB and the number of pages held in pagevecs are a small fraction of the + * overall workload. And if either of those conditions are not true this test + * will fail rather than silently passing. + */ +#include <inttypes.h> +#include <limits.h> +#include <pthread.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "perf_test_util.h" +#include "guest_modes.h" + +/* Global variable used to synchronize all of the vCPU threads. */ +static int iteration = -1; + +/* Defines what vCPU threads should do during a given iteration. */ +static enum { + /* Run the vCPU to access all its memory. */ + ITERATION_ACCESS_MEMORY, + /* Mark the vCPU's memory idle in page_idle. */ + ITERATION_MARK_IDLE, +} iteration_work; + +/* Set to true when vCPU threads should exit. */ +static bool done; + +/* The iteration that was last completed by each vCPU. */ +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +/* Whether to overlap the regions of memory vCPUs access. */ +static bool overlap_memory_access; + +struct test_params { + /* The backing source for the region of memory. */ + enum vm_mem_backing_src_type backing_src; + + /* The amount of memory to allocate for each vCPU. */ + uint64_t vcpu_memory_bytes; + + /* The number of vCPUs to create in the VM. */ + int vcpus; +}; + +static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) +{ + uint64_t value; + off_t offset = index * sizeof(value); + + TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value), + "pread from %s offset 0x%" PRIx64 " failed!", + filename, offset); + + return value; + +} + +#define PAGEMAP_PRESENT (1ULL << 63) +#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1) + +static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) +{ + uint64_t hva = (uint64_t) addr_gva2hva(vm, gva); + uint64_t entry; + uint64_t pfn; + + entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize()); + if (!(entry & PAGEMAP_PRESENT)) + return 0; + + pfn = entry & PAGEMAP_PFN_MASK; + if (!pfn) { + print_skip("Looking up PFNs requires CAP_SYS_ADMIN"); + exit(KSFT_SKIP); + } + + return pfn; +} + +static bool is_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64); + + return !!((bits >> (pfn % 64)) & 1); +} + +static void mark_page_idle(int page_idle_fd, uint64_t pfn) +{ + uint64_t bits = 1ULL << (pfn % 64); + + TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8, + "Set page_idle bits for PFN 0x%" PRIx64, pfn); +} + +static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) +{ + uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva; + uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages; + uint64_t page; + uint64_t still_idle = 0; + uint64_t no_pfn = 0; + int page_idle_fd; + int pagemap_fd; + + /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */ + if (overlap_memory_access && vcpu_id) + return; + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle."); + + pagemap_fd = open("/proc/self/pagemap", O_RDONLY); + TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap."); + + for (page = 0; page < pages; page++) { + uint64_t gva = base_gva + page * perf_test_args.guest_page_size; + uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva); + + if (!pfn) { + no_pfn++; + continue; + } + + if (is_page_idle(page_idle_fd, pfn)) { + still_idle++; + continue; + } + + mark_page_idle(page_idle_fd, pfn); + } + + /* + * Assumption: Less than 1% of pages are going to be swapped out from + * under us during this test. + */ + TEST_ASSERT(no_pfn < pages / 100, + "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.", + vcpu_id, no_pfn, pages); + + /* + * Test that at least 90% of memory has been marked idle (the rest might + * not be marked idle because the pages have not yet made it to an LRU + * list or the translations are still cached in the TLB). 90% is + * arbitrary; high enough that we ensure most memory access went through + * access tracking but low enough as to not make the test too brittle + * over time and across architectures. + */ + TEST_ASSERT(still_idle < pages / 10, + "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" + PRIu64 ").\n", + vcpu_id, still_idle, pages); + + close(page_idle_fd); + close(pagemap_fd); +} + +static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id, + uint64_t expected_ucall) +{ + struct ucall uc; + uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc); + + TEST_ASSERT(expected_ucall == actual_ucall, + "Guest exited unexpectedly (expected ucall %" PRIu64 + ", got %" PRIu64 ")", + expected_ucall, actual_ucall); +} + +static bool spin_wait_for_next_iteration(int *current_iteration) +{ + int last_iteration = *current_iteration; + + do { + if (READ_ONCE(done)) + return false; + + *current_iteration = READ_ONCE(iteration); + } while (last_iteration == *current_iteration); + + return true; +} + +static void *vcpu_thread_main(void *arg) +{ + struct perf_test_vcpu_args *vcpu_args = arg; + struct kvm_vm *vm = perf_test_args.vm; + int vcpu_id = vcpu_args->vcpu_id; + int current_iteration = -1; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + + while (spin_wait_for_next_iteration(¤t_iteration)) { + switch (READ_ONCE(iteration_work)) { + case ITERATION_ACCESS_MEMORY: + vcpu_run(vm, vcpu_id); + assert_ucall(vm, vcpu_id, UCALL_SYNC); + break; + case ITERATION_MARK_IDLE: + mark_vcpu_memory_idle(vm, vcpu_id); + break; + }; + + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + } + + return NULL; +} + +static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) +{ + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + target_iteration) { + continue; + } +} + +/* The type of memory accesses to perform in the VM. */ +enum access_type { + ACCESS_READ, + ACCESS_WRITE, +}; + +static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) +{ + struct timespec ts_start; + struct timespec ts_elapsed; + int next_iteration; + int vcpu_id; + + /* Kick off the vCPUs by incrementing iteration. */ + next_iteration = ++iteration; + + clock_gettime(CLOCK_MONOTONIC, &ts_start); + + /* Wait for all vCPUs to finish the iteration. */ + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) + spin_wait_for_vcpu(vcpu_id, next_iteration); + + ts_elapsed = timespec_elapsed(ts_start); + pr_info("%-30s: %ld.%09lds\n", + description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec); +} + +static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, + const char *description) +{ + perf_test_args.wr_fract = (access == ACCESS_READ) ? INT_MAX : 1; + sync_global_to_guest(vm, perf_test_args); + iteration_work = ITERATION_ACCESS_MEMORY; + run_iteration(vm, vcpus, description); +} + +static void mark_memory_idle(struct kvm_vm *vm, int vcpus) +{ + /* + * Even though this parallelizes the work across vCPUs, this is still a + * very slow operation because page_idle forces the test to mark one pfn + * at a time and the clear_young notifier serializes on the KVM MMU + * lock. + */ + pr_debug("Marking VM memory idle (slow)...\n"); + iteration_work = ITERATION_MARK_IDLE; + run_iteration(vm, vcpus, "Mark memory idle"); +} + +static pthread_t *create_vcpu_threads(int vcpus) +{ + pthread_t *vcpu_threads; + int i; + + vcpu_threads = malloc(vcpus * sizeof(vcpu_threads[0])); + TEST_ASSERT(vcpu_threads, "Failed to allocate vcpu_threads."); + + for (i = 0; i < vcpus; i++) { + vcpu_last_completed_iteration[i] = iteration; + pthread_create(&vcpu_threads[i], NULL, vcpu_thread_main, + &perf_test_args.vcpu_args[i]); + } + + return vcpu_threads; +} + +static void terminate_vcpu_threads(pthread_t *vcpu_threads, int vcpus) +{ + int i; + + /* Set done to signal the vCPU threads to exit */ + done = true; + + for (i = 0; i < vcpus; i++) + pthread_join(vcpu_threads[i], NULL); +} + +static void run_test(enum vm_guest_mode mode, void *arg) +{ + struct test_params *params = arg; + struct kvm_vm *vm; + pthread_t *vcpu_threads; + int vcpus = params->vcpus; + + vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, + params->backing_src); + + perf_test_setup_vcpus(vm, vcpus, params->vcpu_memory_bytes, + !overlap_memory_access); + + vcpu_threads = create_vcpu_threads(vcpus); + + pr_info("\n"); + access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); + + /* As a control, read and write to the populated memory first. */ + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory"); + access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory"); + + /* Repeat on memory that has been marked as idle. */ + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory"); + mark_memory_idle(vm, vcpus); + access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); + + terminate_vcpu_threads(vcpu_threads, vcpus); + free(vcpu_threads); + perf_test_destroy_vm(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n", + name); + puts(""); + printf(" -h: Display this help message."); + guest_modes_help(); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + printf(" -o: Overlap guest memory accesses instead of partitioning\n" + " them into a separate region of memory for each vCPU.\n"); + printf(" -s: specify the type of memory that should be used to\n" + " back the guest data region.\n\n"); + backing_src_help(); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + struct test_params params = { + .backing_src = VM_MEM_SRC_ANONYMOUS, + .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, + .vcpus = 1, + }; + int page_idle_fd; + int opt; + + guest_modes_append_default(); + + while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) { + switch (opt) { + case 'm': + guest_modes_cmdline(optarg); + break; + case 'b': + params.vcpu_memory_bytes = parse_size(optarg); + break; + case 'v': + params.vcpus = atoi(optarg); + break; + case 'o': + overlap_memory_access = true; + break; + case 's': + params.backing_src = parse_backing_src_type(optarg); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); + if (page_idle_fd < 0) { + print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled"); + exit(KSFT_SKIP); + } + close(page_idle_fd); + + for_each_guest_mode(run_test, ¶ms); + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 04a2641261be..80cbd3a748c0 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -312,6 +312,7 @@ int main(int argc, char *argv[]) break; case 'o': p.partition_vcpu_memory_access = false; + break; case 's': p.backing_src = parse_backing_src_type(optarg); break; diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index 412eaee7884a..b66910702c0a 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -117,7 +117,7 @@ #define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1) #define HV_X64_PERF_MONITOR_AVAILABLE BIT(2) #define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3) -#define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE BIT(4) +#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4) #define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5) #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) @@ -182,4 +182,7 @@ #define HV_STATUS_INVALID_CONNECTION_ID 18 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +/* hypercall options */ +#define HV_HYPERCALL_FAST_BIT BIT(16) + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index b0031f2d38fd..ecec30865a74 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -320,7 +320,7 @@ int main(int ac, char **av) run_delay = get_run_delay(); pthread_create(&thread, &attr, do_steal_time, NULL); do - pthread_yield(); + sched_yield(); while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS); pthread_join(thread, NULL); run_delay = get_run_delay() - run_delay; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index bab10ae787b6..e0b2bb1339b1 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -215,7 +215,7 @@ int main(void) vcpu_set_hv_cpuid(vm, VCPU_ID); tsc_page_gva = vm_vaddr_alloc_page(vm); - memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize()); + memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index af27c7e829c1..91d88aaa9899 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -47,6 +47,7 @@ static void do_wrmsr(u32 idx, u64 val) } static int nr_gp; +static int nr_ud; static inline u64 hypercall(u64 control, vm_vaddr_t input_address, vm_vaddr_t output_address) @@ -80,6 +81,12 @@ static void guest_gp_handler(struct ex_regs *regs) regs->rip = (uint64_t)&wrmsr_end; } +static void guest_ud_handler(struct ex_regs *regs) +{ + nr_ud++; + regs->rip += 3; +} + struct msr_data { uint32_t idx; bool available; @@ -90,6 +97,7 @@ struct msr_data { struct hcall_data { uint64_t control; uint64_t expect; + bool ud_expected; }; static void guest_msr(struct msr_data *msr) @@ -117,13 +125,26 @@ static void guest_msr(struct msr_data *msr) static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { int i = 0; + u64 res, input, output; wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); while (hcall->control) { - GUEST_ASSERT(hypercall(hcall->control, pgs_gpa, - pgs_gpa + 4096) == hcall->expect); + nr_ud = 0; + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; + } + + res = hypercall(hcall->control, input, output); + if (hcall->ud_expected) + GUEST_ASSERT(nr_ud == 1); + else + GUEST_ASSERT(res == hcall->expect); + GUEST_SYNC(i++); } @@ -552,8 +573,18 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall recomm.ebx = 0xfff; hcall->expect = HV_STATUS_SUCCESS; break; - case 17: + /* XMM fast hypercall */ + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; + hcall->ud_expected = true; + break; + case 18: + feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->ud_expected = false; + hcall->expect = HV_STATUS_SUCCESS; + break; + + case 19: /* END */ hcall->control = 0; break; @@ -625,6 +656,10 @@ int main(void) /* Test hypercalls */ vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b169e915679..4f9f73e7a299 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -38,8 +38,10 @@ TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser +TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_FILES += toeplitz TEST_FILES := settings diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile new file mode 100644 index 000000000000..cfc7f4f97fd1 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -0,0 +1,5 @@ +##TEST_GEN_FILES := test_unix_oob +TEST_PROGS := test_unix_oob +include ../../lib.mk + +all: $(TEST_PROGS) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c new file mode 100644 index 000000000000..0f3e3763f4f8 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include <errno.h> +#include <netinet/tcp.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/poll.h> + +static int pipefd[2]; +static int signal_recvd; +static pid_t producer_id; +static char sock_name[32]; + +static void sig_hand(int sn, siginfo_t *si, void *p) +{ + signal_recvd = sn; +} + +static int set_sig_handler(int signal) +{ + struct sigaction sa; + + sa.sa_sigaction = sig_hand; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO | SA_RESTART; + + return sigaction(signal, &sa, NULL); +} + +static void set_filemode(int fd, int set) +{ + int flags = fcntl(fd, F_GETFL, 0); + + if (set) + flags &= ~O_NONBLOCK; + else + flags |= O_NONBLOCK; + fcntl(fd, F_SETFL, flags); +} + +static void signal_producer(int fd) +{ + char cmd; + + cmd = 'S'; + write(fd, &cmd, sizeof(cmd)); +} + +static void wait_for_signal(int fd) +{ + char buf[5]; + + read(fd, buf, 5); +} + +static void die(int status) +{ + fflush(NULL); + unlink(sock_name); + kill(producer_id, SIGTERM); + exit(status); +} + +int is_sioctatmark(int fd) +{ + int ans = -1; + + if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) { +#ifdef DEBUG + perror("SIOCATMARK Failed"); +#endif + } + return ans; +} + +void read_oob(int fd, char *c) +{ + + *c = ' '; + if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) { +#ifdef DEBUG + perror("Reading MSG_OOB Failed"); +#endif + } +} + +int read_data(int pfd, char *buf, int size) +{ + int len = 0; + + memset(buf, size, '0'); + len = read(pfd, buf, size); +#ifdef DEBUG + if (len < 0) + perror("read failed"); +#endif + return len; +} + +static void wait_for_data(int pfd, int event) +{ + struct pollfd pfds[1]; + + pfds[0].fd = pfd; + pfds[0].events = event; + poll(pfds, 1, -1); +} + +void producer(struct sockaddr_un *consumer_addr) +{ + int cfd; + char buf[64]; + int i; + + memset(buf, 'x', sizeof(buf)); + cfd = socket(AF_UNIX, SOCK_STREAM, 0); + + wait_for_signal(pipefd[0]); + if (connect(cfd, (struct sockaddr *)consumer_addr, + sizeof(struct sockaddr)) != 0) { + perror("Connect failed"); + kill(0, SIGTERM); + exit(1); + } + + for (i = 0; i < 2; i++) { + /* Test 1: Test for SIGURG and OOB */ + wait_for_signal(pipefd[0]); + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 2: Test for OOB being overwitten */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '#'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + wait_for_signal(pipefd[0]); + + /* Test 3: Test for SIOCATMARK */ + memset(buf, 'x', sizeof(buf)); + buf[63] = '@'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + buf[63] = '%'; + send(cfd, buf, sizeof(buf), MSG_OOB); + + memset(buf, 'x', sizeof(buf)); + send(cfd, buf, sizeof(buf), 0); + + wait_for_signal(pipefd[0]); + + /* Test 4: Test for 1byte OOB msg */ + memset(buf, 'x', sizeof(buf)); + buf[0] = '@'; + send(cfd, buf, 1, MSG_OOB); + } +} + +int +main(int argc, char **argv) +{ + int lfd, pfd; + struct sockaddr_un consumer_addr, paddr; + socklen_t len = sizeof(consumer_addr); + char buf[1024]; + int on = 0; + char oob; + int flags; + int atmark; + char *tmp_file; + + lfd = socket(AF_UNIX, SOCK_STREAM, 0); + memset(&consumer_addr, 0, sizeof(consumer_addr)); + consumer_addr.sun_family = AF_UNIX; + sprintf(sock_name, "unix_oob_%d", getpid()); + unlink(sock_name); + strcpy(consumer_addr.sun_path, sock_name); + + if ((bind(lfd, (struct sockaddr *)&consumer_addr, + sizeof(consumer_addr))) != 0) { + perror("socket bind failed"); + exit(1); + } + + pipe(pipefd); + + listen(lfd, 1); + + producer_id = fork(); + if (producer_id == 0) { + producer(&consumer_addr); + exit(0); + } + + set_sig_handler(SIGURG); + signal_producer(pipefd[1]); + + pfd = accept(lfd, (struct sockaddr *) &paddr, &len); + fcntl(pfd, F_SETOWN, getpid()); + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1: + * veriyf that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + wait_for_data(pfd, POLLIN | POLLPRI); + read_oob(pfd, &oob); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 63 || oob != '@') { + fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2: + * Verify that the first OOB is over written by + * the 2nd one and the first OOB is returned as + * part of the read, and sigurg is received. + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = 0; + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + read_oob(pfd, &oob); + if (!signal_recvd || len != 127 || oob != '#') { + fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and atmark + * is set. + * oob is '%' and second read returns + * 64 bytes. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 150) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + read_oob(pfd, &oob); + + if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { + fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", + "atmark %d\n", signal_recvd, len, oob, atmark); + die(1); + } + + signal_recvd = 0; + + len = read_data(pfd, buf, 1024); + if (len != 64) { + fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4: + * verify that a single byte + * oob message is delivered. + * set non blocking mode and + * check proper error is + * returned and sigurg is + * received and correct + * oob is read. + */ + + set_filemode(pfd, 0); + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if ((len == -1) && (errno == 11)) + len = 0; + + read_oob(pfd, &oob); + + if (!signal_recvd || len != 0 || oob != '@') { + fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n", + signal_recvd, len, oob); + die(1); + } + + set_filemode(pfd, 1); + + /* Inline Testing */ + + on = 1; + if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) { + perror("SO_OOBINLINE"); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 1 -- Inline: + * Check that SIGURG is + * delivered and 63 bytes are + * read and oob is '@' + */ + + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + + if (!signal_recvd || len != 63) { + fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n", + signal_recvd, len); + die(1); + } + + len = read_data(pfd, buf, 1024); + + if (len != 1) { + fprintf(stderr, + "Test 1.1 Inline failed, sigurg %d len %d oob %c\n", + signal_recvd, len, oob); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 2 -- Inline: + * Verify that the first OOB is over written by + * the 2nd one and read breaks correctly on + * 2nd OOB boundary with the first OOB returned as + * part of the read, and sigurg is delivered and + * siocatmark returns true. + * next read returns one byte, the oob byte + * and siocatmark returns false. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 70) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 127 || atmark != 1 || !signal_recvd) { + fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n", + len, atmark); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 1 || buf[0] != '#' || atmark == 1) { + fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 3 -- Inline: + * verify that 2nd oob over writes + * the first one and read breaks at + * oob boundary returning 127 bytes + * and sigurg is received and siocatmark + * is true after the read. + * subsequent read returns 65 bytes + * because of oob which should be '%'. + */ + len = 0; + wait_for_data(pfd, POLLIN | POLLPRI); + while (len < 126) + len = recv(pfd, buf, 1024, MSG_PEEK); + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (!signal_recvd || len != 127 || !atmark) { + fprintf(stderr, + "Test 3 Inline failed, sigurg %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + + len = read_data(pfd, buf, 1024); + atmark = is_sioctatmark(pfd); + if (len != 65 || buf[0] != '%' || atmark != 0) { + fprintf(stderr, + "Test 3.1 Inline failed, len %d oob %c atmark %d\n", + len, buf[0], atmark); + die(1); + } + + signal_recvd = 0; + signal_producer(pipefd[1]); + + /* Test 4 -- Inline: + * verify that a single + * byte oob message is delivered + * and read returns one byte, the oob + * byte and sigurg is received + */ + wait_for_data(pfd, POLLIN | POLLPRI); + len = read_data(pfd, buf, 1024); + if (!signal_recvd || len != 1 || buf[0] != '@') { + fprintf(stderr, + "Test 4 Inline failed, signal %d len %d data %c\n", + signal_recvd, len, buf[0]); + die(1); + } + die(0); +} diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c new file mode 100644 index 000000000000..cf37ce86b0fd --- /dev/null +++ b/tools/testing/selftests/net/gro.c @@ -0,0 +1,1095 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This testsuite provides conformance testing for GRO coalescing. + * + * Test cases: + * 1.data + * Data packets of the same size and same header setup with correct + * sequence numbers coalesce. The one exception being the last data + * packet coalesced: it can be smaller than the rest and coalesced + * as long as it is in the same flow. + * 2.ack + * Pure ACK does not coalesce. + * 3.flags + * Specific test cases: no packets with PSH, SYN, URG, RST set will + * be coalesced. + * 4.tcp + * Packets with incorrect checksum, non-consecutive seqno and + * different TCP header options shouldn't coalesce. Nit: given that + * some extension headers have paddings, such as timestamp, headers + * that are padding differently would not be coalesced. + * 5.ip: + * Packets with different (ECN, TTL, TOS) header, ip options or + * ip fragments (ipv6) shouldn't coalesce. + * 6.large: + * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. + * + * MSS is defined as 4096 - header because if it is too small + * (i.e. 1500 MTU - header), it will result in many packets, + * increasing the "large" test case's flakiness. This is because + * due to time sensitivity in the coalescing window, the receiver + * may not coalesce all of the packets. + * + * Note the timing issue applies to all of the test cases, so some + * flakiness is to be expected. + * + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_packet.h> +#include <linux/ipv6.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> + +#define DPORT 8000 +#define SPORT 1500 +#define PAYLOAD_LEN 100 +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define NUM_PACKETS 4 +#define START_SEQ 100 +#define START_ACK 100 +#define SIP6 "fdaa::2" +#define DIP6 "fdaa::1" +#define SIP4 "192.168.1.200" +#define DIP4 "192.168.1.100" +#define ETH_P_NONE 0 +#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) +#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) +#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) + +static int proto = -1; +static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; +static char *testname = "data"; +static char *ifname = "eth0"; +static char *smac = "aa:00:00:00:00:02"; +static char *dmac = "aa:00:00:00:00:01"; +static bool verbose; +static bool tx_socket = true; +static int tcp_offset = -1; +static int total_hdr_len = -1; +static int ethhdr_proto = -1; + +static void vlog(const char *fmt, ...) +{ + va_list args; + + if (verbose) { + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + } +} + +static void setup_sock_filter(int fd) +{ + const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); + const int ethproto_off = offsetof(struct ethhdr, h_proto); + int optlen = 0; + int ipproto_off; + int next_off; + + if (proto == PF_INET) + next_off = offsetof(struct iphdr, protocol); + else + next_off = offsetof(struct ipv6hdr, nexthdr); + ipproto_off = ETH_HLEN + next_off; + + if (strcmp(testname, "ip") == 0) { + if (proto == PF_INET) + optlen = sizeof(struct ip_timestamp); + else + optlen = sizeof(struct ip6_frag); + } + + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), + BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), + BPF_STMT(BPF_RET + BPF_K, 0), + }; + + struct sock_fprog bpf = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) + error(1, errno, "error setting filter"); +} + +static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + int i; + + for (i = 0; i < len / 2; i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static uint16_t tcp_checksum(void *buf, int payload_len) +{ + struct pseudo_header6 { + struct in6_addr saddr; + struct in6_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph6; + struct pseudo_header4 { + struct in_addr saddr; + struct in_addr daddr; + uint16_t protocol; + uint16_t payload_len; + } ph4; + uint32_t sum = 0; + + if (proto == PF_INET6) { + if (inet_pton(AF_INET6, SIP6, &ph6.saddr) != 1) + error(1, errno, "inet_pton6 source ip pseudo"); + if (inet_pton(AF_INET6, DIP6, &ph6.daddr) != 1) + error(1, errno, "inet_pton6 dest ip pseudo"); + ph6.protocol = htons(IPPROTO_TCP); + ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph6, sizeof(ph6), 0); + } else if (proto == PF_INET) { + if (inet_pton(AF_INET, SIP4, &ph4.saddr) != 1) + error(1, errno, "inet_pton source ip pseudo"); + if (inet_pton(AF_INET, DIP4, &ph4.daddr) != 1) + error(1, errno, "inet_pton dest ip pseudo"); + ph4.protocol = htons(IPPROTO_TCP); + ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); + + sum = checksum_nofold(&ph4, sizeof(ph4), 0); + } + + return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); +} + +static void read_MAC(uint8_t *mac_addr, char *mac) +{ + if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", + &mac_addr[0], &mac_addr[1], &mac_addr[2], + &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) + error(1, 0, "sscanf"); +} + +static void fill_datalinklayer(void *buf) +{ + struct ethhdr *eth = buf; + + memcpy(eth->h_dest, dst_mac, ETH_ALEN); + memcpy(eth->h_source, src_mac, ETH_ALEN); + eth->h_proto = ethhdr_proto; +} + +static void fill_networklayer(void *buf, int payload_len) +{ + struct ipv6hdr *ip6h = buf; + struct iphdr *iph = buf; + + if (proto == PF_INET6) { + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->hop_limit = 8; + if (inet_pton(AF_INET6, SIP6, &ip6h->saddr) != 1) + error(1, errno, "inet_pton source ip6"); + if (inet_pton(AF_INET6, DIP6, &ip6h->daddr) != 1) + error(1, errno, "inet_pton dest ip6"); + } else if (proto == PF_INET) { + memset(iph, 0, sizeof(*iph)); + + iph->version = 4; + iph->ihl = 5; + iph->ttl = 8; + iph->protocol = IPPROTO_TCP; + iph->tot_len = htons(sizeof(struct tcphdr) + + payload_len + sizeof(struct iphdr)); + iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ + if (inet_pton(AF_INET, SIP4, &iph->saddr) != 1) + error(1, errno, "inet_pton source ip"); + if (inet_pton(AF_INET, DIP4, &iph->daddr) != 1) + error(1, errno, "inet_pton dest ip"); + iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); + } +} + +static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + struct tcphdr *tcph = buf; + + memset(tcph, 0, sizeof(*tcph)); + + tcph->source = htons(SPORT); + tcph->dest = htons(DPORT); + tcph->seq = ntohl(START_SEQ + seq_offset); + tcph->ack_seq = ntohl(START_ACK + ack_offset); + tcph->ack = 1; + tcph->fin = fin; + tcph->doff = 5; + tcph->window = htons(TCP_MAXWIN); + tcph->urg_ptr = 0; + tcph->check = tcp_checksum(tcph, payload_len); +} + +static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) +{ + int ret = -1; + + ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); + if (ret == -1) + error(1, errno, "sendto failure"); + if (ret != len) + error(1, errno, "sendto wrong length"); +} + +static void create_packet(void *buf, int seq_offset, int ack_offset, + int payload_len, int fin) +{ + memset(buf, 0, total_hdr_len); + memset(buf + total_hdr_len, 'a', payload_len); + fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, + payload_len, fin); + fill_networklayer(buf + ETH_HLEN, payload_len); + fill_datalinklayer(buf); +} + +/* send one extra flag, not first and not last pkt */ +static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, + int rst, int urg) +{ + static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int payload_len, pkt_size, flag, i; + struct tcphdr *tcph; + + payload_len = PAYLOAD_LEN * psh; + pkt_size = total_hdr_len + payload_len; + flag = NUM_PACKETS / 2; + + create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); + + tcph = (struct tcphdr *)(flag_buf + tcp_offset); + tcph->psh = psh; + tcph->syn = syn; + tcph->rst = rst; + tcph->urg = urg; + tcph->check = 0; + tcph->check = tcp_checksum(tcph, payload_len); + + for (i = 0; i < NUM_PACKETS + 1; i++) { + if (i == flag) { + write_packet(fd, flag_buf, pkt_size, daddr); + continue; + } + create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +/* Test for data of same length, smaller than previous + * and of different lengths + */ +static void send_data_pkts(int fd, struct sockaddr_ll *daddr, + int payload_len1, int payload_len2) +{ + static char buf[ETH_HLEN + IP_MAXPACKET]; + + create_packet(buf, 0, 0, payload_len1, 0); + write_packet(fd, buf, total_hdr_len + payload_len1, daddr); + create_packet(buf, payload_len1, 0, payload_len2, 0); + write_packet(fd, buf, total_hdr_len + payload_len2, daddr); +} + +/* If incoming segments make tracked segment length exceed + * legal IP datagram length, do not coalesce + */ +static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) +{ + static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; + static char last[TOTAL_HDR_LEN + MSS]; + static char new_seg[TOTAL_HDR_LEN + MSS]; + int i; + + for (i = 0; i < NUM_LARGE_PKT; i++) + create_packet(pkts[i], i * MSS, 0, MSS, 0); + create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); + create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); + + for (i = 0; i < NUM_LARGE_PKT; i++) + write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); + write_packet(fd, last, total_hdr_len + remainder, daddr); + write_packet(fd, new_seg, total_hdr_len + remainder, daddr); +} + +/* Pure acks and dup acks don't coalesce */ +static void send_ack(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN]; + + create_packet(buf, 0, 0, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); + write_packet(fd, buf, total_hdr_len, daddr); + create_packet(buf, 0, 1, 0, 0); + write_packet(fd, buf, total_hdr_len, daddr); +} + +static void recompute_packet(char *buf, char *no_ext, int extlen) +{ + struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + memmove(buf, no_ext, total_hdr_len); + memmove(buf + total_hdr_len + extlen, + no_ext + total_hdr_len, PAYLOAD_LEN); + + tcphdr->doff = tcphdr->doff + (extlen / 4); + tcphdr->check = 0; + tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); + if (proto == PF_INET) { + iph->tot_len = htons(ntohs(iph->tot_len) + extlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + } +} + +static void tcp_write_options(char *buf, int kind, int ts) +{ + struct tcp_option_ts { + uint8_t kind; + uint8_t len; + uint32_t tsval; + uint32_t tsecr; + } *opt_ts = (void *)buf; + struct tcp_option_window { + uint8_t kind; + uint8_t len; + uint8_t shift; + } *opt_window = (void *)buf; + + switch (kind) { + case TCPOPT_NOP: + buf[0] = TCPOPT_NOP; + break; + case TCPOPT_WINDOW: + memset(opt_window, 0, sizeof(struct tcp_option_window)); + opt_window->kind = TCPOPT_WINDOW; + opt_window->len = TCPOLEN_WINDOW; + opt_window->shift = 0; + break; + case TCPOPT_TIMESTAMP: + memset(opt_ts, 0, sizeof(struct tcp_option_ts)); + opt_ts->kind = TCPOPT_TIMESTAMP; + opt_ts->len = TCPOLEN_TIMESTAMP; + opt_ts->tsval = ts; + opt_ts->tsecr = 0; + break; + default: + error(1, 0, "unimplemented TCP option"); + break; + } +} + +/* TCP with options is always a permutation of {TS, NOP, NOP}. + * Implement different orders to verify coalescing stops. + */ +static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) +{ + switch (order) { + case 0: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, + TCPOPT_TIMESTAMP, ts); + break; + case 1: + tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + 1, + TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, + TCPOPT_NOP, 0); + break; + case 2: + tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, + TCPOPT_NOP, 0); + tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, + TCPOPT_NOP, 0); + break; + default: + error(1, 0, "unknown order"); + break; + } + recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); +} + +/* Packets with invalid checksum don't coalesce. */ +static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->check = tcph->check - 1; + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packets with non-consecutive sequence number don't coalesce.*/ +static void send_changed_seq(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + tcph->seq = ntohl(htonl(tcph->seq) + 1); + tcph->check = 0; + tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); + write_packet(fd, buf, pkt_size, daddr); +} + + /* Packet with different timestamp option or different timestamps + * don't coalesce. + */ +static void send_changed_ts(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 0, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 0); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 1); + write_packet(fd, extpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt, buf, 100, 2); + write_packet(fd, extpkt, pkt_size, daddr); +} + +/* Packet with different tcp options don't coalesce. */ +static void send_diff_opt(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; + static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; + int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; + int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + add_standard_tcp_options(extpkt1, buf, 0, 0); + write_packet(fd, extpkt1, extpkt1_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); + tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); + recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); + write_packet(fd, extpkt2, extpkt2_size, daddr); +} + +static void add_ipv4_ts_option(void *buf, void *optpkt) +{ + struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); + int optlen = sizeof(struct ip_timestamp); + struct iphdr *iph; + + if (optlen % 4) + error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); + + ts->ipt_code = IPOPT_TS; + ts->ipt_len = optlen; + ts->ipt_ptr = 5; + ts->ipt_flg = IPOPT_TS_TSONLY; + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph = (struct iphdr *)(optpkt + ETH_HLEN); + iph->ihl = 5 + (optlen / 4); + iph->tot_len = htons(ntohs(iph->tot_len) + optlen); + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); +} + +/* IPv4 options shouldn't coalesce */ +static void send_ip_options(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; + int optlen = sizeof(struct ip_timestamp); + int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv4_ts_option(buf, optpkt); + write_packet(fd, optpkt, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); +} + +/* IPv4 fragments shouldn't coalesce */ +static void send_fragment4(int fd, struct sockaddr_ll *daddr) +{ + static char buf[IP_MAXPACKET]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + int pkt_size = total_hdr_len + PAYLOAD_LEN; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + /* Once fragmented, packet would retain the total_len. + * Tcp header is prepared as if rest of data is in follow-up frags, + * but follow up frags aren't actually sent. + */ + memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); + fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); + fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); + fill_datalinklayer(buf); + + iph->frag_off = htons(0x6000); // DF = 1, MF = 1 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv4 packets with different ttl don't coalesce.*/ +static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + iph->ttl = 7; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different tos don't coalesce.*/ +static void send_changed_tos(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + iph->tos = 1; + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else if (proto == PF_INET6) { + ip6h->priority = 0xf; + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* Packets with different ECN don't coalesce.*/ +static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) +{ + int pkt_size = total_hdr_len + PAYLOAD_LEN; + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + if (proto == PF_INET) { + buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); + } else { + buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 + } + write_packet(fd, buf, pkt_size, daddr); +} + +/* IPv6 fragments and packets with extensions don't coalesce.*/ +static void send_fragment6(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + + sizeof(struct ip6_frag)]; + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); + struct ip6_frag *frag = (void *)(extpkt + tcp_offset); + int extlen = sizeof(struct ip6_frag); + int bufpkt_len = total_hdr_len + PAYLOAD_LEN; + int extpkt_len = bufpkt_len + extlen; + int i; + + for (i = 0; i < 2; i++) { + create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); + } + + create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + memset(extpkt, 0, extpkt_len); + + ip6h->nexthdr = IPPROTO_FRAGMENT; + ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); + frag->ip6f_nxt = IPPROTO_TCP; + + memcpy(extpkt, buf, tcp_offset); + memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + write_packet(fd, extpkt, extpkt_len, daddr); + + create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, bufpkt_len, daddr); +} + +static void bind_packetsocket(int fd) +{ + struct sockaddr_ll daddr = {}; + + daddr.sll_family = AF_PACKET; + daddr.sll_protocol = ethhdr_proto; + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + + if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) + error(1, errno, "could not bind socket"); +} + +static void set_timeout(int fd) +{ + struct timeval timeout; + + timeout.tv_sec = 120; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)) < 0) + error(1, errno, "cannot set timeout, setsockopt failed"); +} + +static void check_recv_pkts(int fd, int *correct_payload, + int correct_num_pkts) +{ + static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; + struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + struct tcphdr *tcph; + bool bad_packet = false; + int tcp_ext_len = 0; + int ip_ext_len = 0; + int pkt_size = -1; + int data_len = 0; + int num_pkt = 0; + int i; + + vlog("Expected {"); + for (i = 0; i < correct_num_pkts; i++) + vlog("%d ", correct_payload[i]); + vlog("}, Total %d packets\nReceived {", correct_num_pkts); + + while (1) { + pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + if (pkt_size < 0) + error(1, errno, "could not receive"); + + if (iph->version == 4) + ip_ext_len = (iph->ihl - 5) * 4; + else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) + ip_ext_len = sizeof(struct ip6_frag); + + tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); + + if (tcph->fin) + break; + + tcp_ext_len = (tcph->doff - 5) * 4; + data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; + /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. + * Ipv4/tcp packets without at least 6 bytes of data will be padded. + * Packet sockets are protocol agnostic, and will not trim the padding. + */ + if (pkt_size == ETH_ZLEN && iph->version == 4) { + data_len = ntohs(iph->tot_len) + - sizeof(struct tcphdr) - sizeof(struct iphdr); + } + vlog("%d ", data_len); + if (data_len != correct_payload[num_pkt]) { + vlog("[!=%d]", correct_payload[num_pkt]); + bad_packet = true; + } + num_pkt++; + } + vlog("}, Total %d packets.\n", num_pkt); + if (num_pkt != correct_num_pkts) + error(1, 0, "incorrect number of packets"); + if (bad_packet) + error(1, 0, "incorrect packet geometry"); + + printf("Test succeeded\n\n"); +} + +static void gro_sender(void) +{ + static char fin_pkt[MAX_HDR_LEN]; + struct sockaddr_ll daddr = {}; + int txfd = -1; + + txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (txfd < 0) + error(1, errno, "socket creation"); + + memset(&daddr, 0, sizeof(daddr)); + daddr.sll_ifindex = if_nametoindex(ifname); + if (daddr.sll_ifindex == 0) + error(1, errno, "if_nametoindex"); + daddr.sll_family = AF_PACKET; + memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); + daddr.sll_halen = ETH_ALEN; + create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); + + if (strcmp(testname, "data") == 0) { + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ack") == 0) { + send_ack(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "flags") == 0) { + send_flags(txfd, &daddr, 1, 0, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 1, 0, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 1, 0); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_flags(txfd, &daddr, 0, 0, 0, 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "tcp") == 0) { + send_changed_checksum(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_seq(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_ts(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_diff_opt(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (strcmp(testname, "ip") == 0) { + send_changed_ECN(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_changed_tos(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + if (proto == PF_INET) { + /* Modified packets may be received out of order. + * Sleep function added to enforce test boundaries + * so that fin pkts are not received prior to other pkts. + */ + sleep(1); + send_changed_ttl(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_ip_options(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + send_fragment4(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else if (proto == PF_INET6) { + send_fragment6(txfd, &daddr); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } + } else if (strcmp(testname, "large") == 0) { + /* 20 is the difference between min iphdr size + * and min ipv6hdr size. Like MAX_HDR_SIZE, + * MAX_PAYLOAD is defined with the larger header of the two. + */ + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + send_large(txfd, &daddr, remainder); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + send_large(txfd, &daddr, remainder + 1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { + error(1, 0, "Unknown testcase"); + } + + if (close(txfd)) + error(1, errno, "socket close"); +} + +static void gro_receiver(void) +{ + static int correct_payload[NUM_PACKETS]; + int rxfd = -1; + + rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); + if (rxfd < 0) + error(1, 0, "socket creation"); + setup_sock_filter(rxfd); + set_timeout(rxfd); + bind_packetsocket(rxfd); + + memset(correct_payload, 0, sizeof(correct_payload)); + + if (strcmp(testname, "data") == 0) { + printf("pure data packet of same size: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("large data packets followed by a smaller one: "); + correct_payload[0] = PAYLOAD_LEN * 1.5; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("small data packets followed by a larger one: "); + correct_payload[0] = PAYLOAD_LEN / 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ack") == 0) { + printf("duplicate ack and pure ack: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "flags") == 0) { + correct_payload[0] = PAYLOAD_LEN * 3; + correct_payload[1] = PAYLOAD_LEN * 2; + + printf("psh flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 2); + + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = 0; + correct_payload[2] = PAYLOAD_LEN * 2; + printf("syn flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("rst flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + + printf("urg flag ends coalescing: "); + check_recv_pkts(rxfd, correct_payload, 3); + } else if (strcmp(testname, "tcp") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + correct_payload[3] = PAYLOAD_LEN; + + printf("changed checksum does not coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Wrong Seq number doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("Different timestamp doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 4); + + printf("Different options doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } else if (strcmp(testname, "ip") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + + printf("different ECN doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("different tos doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + if (proto == PF_INET) { + printf("different ttl doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + + printf("ip options doesn't coalesce: "); + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("fragmented ip4 doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else if (proto == PF_INET6) { + /* GRO doesn't check for ipv6 hop limit when flushing. + * Hence no corresponding test to the ipv4 case. + */ + printf("fragmented ip6 doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 2); + } + } else if (strcmp(testname, "large") == 0) { + int offset = proto == PF_INET ? 20 : 0; + int remainder = (MAX_PAYLOAD + offset) % MSS; + + correct_payload[0] = (MAX_PAYLOAD + offset); + correct_payload[1] = remainder; + printf("Shouldn't coalesce if exceed IP max pkt size: "); + check_recv_pkts(rxfd, correct_payload, 2); + + /* last segment sent individually, doesn't start new segment */ + correct_payload[0] = correct_payload[0] - remainder; + correct_payload[1] = remainder + 1; + correct_payload[2] = remainder + 1; + check_recv_pkts(rxfd, correct_payload, 3); + } else { + error(1, 0, "Test case error, should never trigger"); + } + + if (close(rxfd)) + error(1, 0, "socket close"); +} + +static void parse_args(int argc, char **argv) +{ + static const struct option opts[] = { + { "dmac", required_argument, NULL, 'D' }, + { "iface", required_argument, NULL, 'i' }, + { "ipv4", no_argument, NULL, '4' }, + { "ipv6", no_argument, NULL, '6' }, + { "rx", no_argument, NULL, 'r' }, + { "smac", required_argument, NULL, 'S' }, + { "test", required_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { 0, 0, 0, 0 } + }; + int c; + + while ((c = getopt_long(argc, argv, "46D:i:rS:t:v", opts, NULL)) != -1) { + switch (c) { + case '4': + proto = PF_INET; + ethhdr_proto = htons(ETH_P_IP); + break; + case '6': + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_IPV6); + break; + case 'D': + dmac = optarg; + break; + case 'i': + ifname = optarg; + break; + case 'r': + tx_socket = false; + break; + case 'S': + smac = optarg; + break; + case 't': + testname = optarg; + break; + case 'v': + verbose = true; + break; + default: + error(1, 0, "%s invalid option %c\n", __func__, c); + break; + } + } +} + +int main(int argc, char **argv) +{ + parse_args(argc, argv); + + if (proto == PF_INET) { + tcp_offset = ETH_HLEN + sizeof(struct iphdr); + total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (proto == PF_INET6) { + tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); + total_hdr_len = MAX_HDR_LEN; + } else { + error(1, 0, "Protocol family is not ipv4 or ipv6"); + } + + read_MAC(src_mac, smac); + read_MAC(dst_mac, dmac); + + if (tx_socket) + gro_sender(); + else + gro_receiver(); + return 0; +} diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh new file mode 100755 index 000000000000..794d2bf36dd7 --- /dev/null +++ b/tools/testing/selftests/net/gro.sh @@ -0,0 +1,128 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source setup_loopback.sh +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" +readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") +readonly PROTOS=("ipv4" "ipv6") +dev="eth0" +test="all" +proto="ipv4" + +setup_interrupt() { + # Use timer on host to trigger the network stack + # Also disable device interrupt to not depend on NIC interrupt + # Reduce test flakiness caused by unexpected interrupts + echo 100000 >"${FLUSH_PATH}" + echo 50 >"${IRQ_PATH}" +} + +setup_ns() { + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}" +} + +cleanup_ns() { + cleanup_macvlan_ns server_ns server client_ns client +} + +setup() { + setup_loopback_environment "${dev}" + setup_interrupt +} + +cleanup() { + cleanup_loopback "${dev}" + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" +} + +run_test() { + local server_pid=0 + local exit_code=0 + local protocol=$1 + local test=$2 + local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ + "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) + + setup_ns + # Each test is run 3 times to deflake, because given the receive timing, + # not all packets that should coalesce will be considered in the same flow + # on every try. + for tries in {1..3}; do + # Actual test starts here + ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + 1>>log.txt & + server_pid=$! + sleep 0.5 # to allow for socket init + ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + 1>>log.txt + wait "${server_pid}" + exit_code=$? + if [[ "${exit_code}" -eq 0 ]]; then + break; + fi + done + cleanup_ns + echo ${exit_code} +} + +run_all_tests() { + local failed_tests=() + for proto in "${PROTOS[@]}"; do + for test in "${TESTS[@]}"; do + echo "running test ${proto} ${test}" >&2 + exit_code=$(run_test $proto $test) + if [[ "${exit_code}" -ne 0 ]]; then + failed_tests+=("${proto}_${test}") + fi; + done; + done + if [[ ${#failed_tests[@]} -ne 0 ]]; then + echo "failed tests: ${failed_tests[*]}. \ + Please see log.txt for more logs" + exit 1 + else + echo "All Tests Succeeded!" + fi; +} + +usage() { + echo "Usage: $0 \ + [-i <DEV>] \ + [-t data|ack|flags|tcp|ip|large] \ + [-p <ipv4|ipv6>]" 1>&2; + exit 1; +} + +while getopts "i:t:p:" opt; do + case "${opt}" in + i) + dev="${OPTARG}" + ;; + t) + test="${OPTARG}" + ;; + p) + proto="${OPTARG}" + ;; + *) + usage + ;; + esac +done + +readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" +readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" +readonly HARD_IRQS="$(< ${IRQ_PATH})" +setup +trap cleanup EXIT +if [[ "${test}" == "all" ]]; then + run_all_tests +else + run_test "${proto}" "${test}" +fi; diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index f23438d512c5..3d7dde2c321b 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -484,13 +484,16 @@ enum desc_type { MONITOR_ACQUIRE, EXPIRE_STATE, EXPIRE_POLICY, + SPDINFO_ATTRS, }; const char *desc_name[] = { "create tunnel", "alloc spi", "monitor acquire", "expire state", - "expire policy" + "expire policy", + "spdinfo attributes", + "" }; struct xfrm_desc { enum desc_type type; @@ -1593,6 +1596,155 @@ out_close: return ret; } +static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq, + unsigned thresh4_l, unsigned thresh4_r, + unsigned thresh6_l, unsigned thresh6_r, + bool add_bad_attr) + +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrmu_spdhthresh thresh; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + thresh.lbits = thresh4_l; + thresh.rbits = thresh4_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + thresh.lbits = thresh6_l; + thresh.rbits = thresh6_r; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh))) + return -1; + + if (add_bad_attr) { + BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) { + pr_err("adding attribute failed: no space"); + return -1; + } + } + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return -1; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + return 0; +} + +static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq) +{ + struct { + struct nlmsghdr nh; + union { + uint32_t unused; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) { + pr_err("Can't set SPD HTHRESH"); + return KSFT_FAIL; + } + + memset(&req, 0, sizeof(req)); + + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused)); + req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) { + size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused)); + struct rtattr *attr = (void *)req.attrbuf; + int got_thresh = 0; + + for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) { + if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 32 || t->rbits != 31) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) { + struct xfrmu_spdhthresh *t = RTA_DATA(attr); + + got_thresh++; + if (t->lbits != 120 || t->rbits != 16) { + pr_err("thresh differ: %u, %u", + t->lbits, t->rbits); + return KSFT_FAIL; + } + } + } + if (got_thresh != 2) { + pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh); + return KSFT_FAIL; + } + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + return KSFT_FAIL; + } else { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + return -1; + } + + /* Restore the default */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) { + pr_err("Can't restore SPD HTHRESH"); + return KSFT_FAIL; + } + + /* + * At this moment xfrm uses nlmsg_parse_deprecated(), which + * implies NL_VALIDATE_LIBERAL - ignoring attributes with + * (type > maxtype). nla_parse_depricated_strict() would enforce + * it. Or even stricter nla_parse(). + * Right now it's not expected to fail, but to be ignored. + */ + if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true)) + return KSFT_PASS; + + return KSFT_PASS; +} + static int child_serv(int xfrm_sock, uint32_t *seq, unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) { @@ -1717,6 +1869,9 @@ static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) case EXPIRE_POLICY: ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); break; + case SPDINFO_ATTRS: + ret = xfrm_spdinfo_attrs(xfrm_sock, &seq); + break; default: printk("Unknown desc type %d", desc.type); exit(KSFT_FAIL); @@ -1994,8 +2149,10 @@ static int write_proto_plan(int fd, int proto) * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 * * Check the affected by the UABI difference structures. + * Also, check translation for xfrm_set_spdinfo: it has it's own attributes + * which needs to be correctly copied, but not translated. */ -const unsigned int compat_plan = 4; +const unsigned int compat_plan = 5; static int write_compat_struct_tests(int test_desc_fd) { struct xfrm_desc desc = {}; @@ -2019,6 +2176,10 @@ static int write_compat_struct_tests(int test_desc_fd) if (__write_desc(test_desc_fd, &desc)) return -1; + desc.type = SPDINFO_ATTRS; + if (__write_desc(test_desc_fd, &desc)) + return -1; + return 0; } diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh index 170be65e0816..1cbfeb5052ec 100755 --- a/tools/testing/selftests/net/psock_snd.sh +++ b/tools/testing/selftests/net/psock_snd.sh @@ -86,9 +86,6 @@ echo "raw truncate hlen - 1 (expected to fail: EINVAL)" echo "raw gso min size" ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" -echo "raw gso min size - 1 (expected to fail)" -(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") - echo "raw gso max size" ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh new file mode 100755 index 000000000000..0a8ad97b07ea --- /dev/null +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +netdev_check_for_carrier() { + local -r dev="$1" + + for i in {1..5}; do + carrier="$(cat /sys/class/net/${dev}/carrier)" + if [[ "${carrier}" -ne 1 ]] ; then + echo "carrier not ready yet..." >&2 + sleep 1 + else + echo "carrier ready" >&2 + break + fi + done + echo "${carrier}" +} + +# Assumes that there is no existing ipvlan device on the physical device +setup_loopback_environment() { + local dev="$1" + + # Fail hard if cannot turn on loopback mode for current NIC + ethtool -K "${dev}" loopback on || exit 1 + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} + +setup_macvlan_ns(){ + local -r link_dev="$1" + local -r ns_name="$2" + local -r ns_dev="$3" + local -r ns_mac="$4" + local -r addr="$5" + + ip link add link "${link_dev}" dev "${ns_dev}" \ + address "${ns_mac}" type macvlan + exit_code=$? + if [[ "${exit_code}" -ne 0 ]]; then + echo "setup_macvlan_ns failed" + exit $exit_code + fi + + [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" + ip link set dev "${ns_dev}" netns "${ns_name}" + ip -netns "${ns_name}" link set dev "${ns_dev}" up + if [[ -n "${addr}" ]]; then + ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" + fi + + sleep 1 +} + +cleanup_macvlan_ns(){ + while (( $# >= 2 )); do + ns_name="$1" + ns_dev="$2" + ip -netns "${ns_name}" link del dev "${ns_dev}" + ip netns del "${ns_name}" + shift 2 + done +} + +cleanup_loopback(){ + local -r dev="$1" + + ethtool -K "${dev}" loopback off + sleep 1 + + # Check for the carrier + carrier=$(netdev_check_for_carrier ${dev}) + if [[ "${carrier}" -ne 1 ]] ; then + echo "setup_loopback_environment failed" + exit 1 + fi +} diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c new file mode 100644 index 000000000000..710ac956bdb3 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Toeplitz test + * + * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 + * 2. Compute the rx_hash in software based on the packet contents + * 3. Compare the two + * + * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. + * + * If '-C $rx_irq_cpu_list' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the rxqueue that RSS would select based on this rx_hash + * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq + * 7. Compare the cpus from 4 and 6 + * + * Else if '-r $rps_bitmap' is given, also + * + * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU + * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap + * 6. Compare the cpus from 4 and 5 + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <net/if.h> +#include <netdb.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <poll.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/sysinfo.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#define TOEPLITZ_KEY_MIN_LEN 40 +#define TOEPLITZ_KEY_MAX_LEN 60 + +#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ +#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) +#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) + +#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) + +#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ + +#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ + +/* configuration options (cmdline arguments) */ +static uint16_t cfg_dport = 8000; +static int cfg_family = AF_INET6; +static char *cfg_ifname = "eth0"; +static int cfg_num_queues; +static int cfg_num_rps_cpus; +static bool cfg_sink; +static int cfg_type = SOCK_STREAM; +static int cfg_timeout_msec = 1000; +static bool cfg_verbose; + +/* global vars */ +static int num_cpus; +static int ring_block_nr; +static int ring_block_sz; + +/* stats */ +static int frames_received; +static int frames_nohash; +static int frames_error; + +#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) + +/* tpacket ring */ +struct ring_state { + int fd; + char *mmap; + int idx; + int cpu; +}; + +static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ +static int rps_silo_to_cpu[RPS_MAX_CPUS]; +static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; +static struct ring_state rings[RSS_MAX_CPUS]; + +static inline uint32_t toeplitz(const unsigned char *four_tuple, + const unsigned char *key) +{ + int i, bit, ret = 0; + uint32_t key32; + + key32 = ntohl(*((uint32_t *)key)); + key += 4; + + for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { + for (bit = 7; bit >= 0; bit--) { + if (four_tuple[i] & (1 << bit)) + ret ^= key32; + + key32 <<= 1; + key32 |= !!(key[0] & (1 << bit)); + } + key++; + } + + return ret; +} + +/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ +static void verify_rss(uint32_t rx_hash, int cpu) +{ + int queue = rx_hash % cfg_num_queues; + + log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); + if (rx_irq_cpus[queue] != cpu) { + log_verbose(". error: rss cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void verify_rps(uint64_t rx_hash, int cpu) +{ + int silo = (rx_hash * cfg_num_rps_cpus) >> 32; + + log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); + if (rps_silo_to_cpu[silo] != cpu) { + log_verbose(". error: rps cpu mismatch (%d)", cpu); + frames_error++; + } +} + +static void log_rxhash(int cpu, uint32_t rx_hash, + const char *addrs, int addr_len) +{ + char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; + uint16_t *ports; + + if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || + !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) + error(1, 0, "address parse error"); + + ports = (void *)addrs + (addr_len * 2); + log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", + cpu, rx_hash, saddr, daddr, + ntohs(ports[0]), ntohs(ports[1])); +} + +/* Compare computed rxhash with rxhash received from tpacket_v3 */ +static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) +{ + unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; + uint32_t rx_hash_sw; + const char *addrs; + int addr_len; + + if (cfg_family == AF_INET) { + addr_len = sizeof(struct in_addr); + addrs = pkt + offsetof(struct iphdr, saddr); + } else { + addr_len = sizeof(struct in6_addr); + addrs = pkt + offsetof(struct ip6_hdr, ip6_src); + } + + memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); + rx_hash_sw = toeplitz(four_tuple, toeplitz_key); + + if (cfg_verbose) + log_rxhash(cpu, rx_hash, addrs, addr_len); + + if (rx_hash != rx_hash_sw) { + log_verbose(" != expected 0x%x\n", rx_hash_sw); + frames_error++; + return; + } + + log_verbose(" OK"); + if (cfg_num_queues) + verify_rss(rx_hash, cpu); + else if (cfg_num_rps_cpus) + verify_rps(rx_hash, cpu); + log_verbose("\n"); +} + +static char *recv_frame(const struct ring_state *ring, char *frame) +{ + struct tpacket3_hdr *hdr = (void *)frame; + + if (hdr->hv1.tp_rxhash) + verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, + ring->cpu); + else + frames_nohash++; + + return frame + hdr->tp_next_offset; +} + +/* A single TPACKET_V3 block can hold multiple frames */ +static void recv_block(struct ring_state *ring) +{ + struct tpacket_block_desc *block; + char *frame; + int i; + + block = (void *)(ring->mmap + ring->idx * ring_block_sz); + if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) + return; + + frame = (char *)block; + frame += block->hdr.bh1.offset_to_first_pkt; + + for (i = 0; i < block->hdr.bh1.num_pkts; i++) { + frame = recv_frame(ring, frame); + frames_received++; + } + + block->hdr.bh1.block_status = TP_STATUS_KERNEL; + ring->idx = (ring->idx + 1) % ring_block_nr; +} + +/* simple test: sleep once unconditionally and then process all rings */ +static void process_rings(void) +{ + int i; + + usleep(1000 * cfg_timeout_msec); + + for (i = 0; i < num_cpus; i++) + recv_block(&rings[i]); + + fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", + frames_received - frames_nohash - frames_error, + frames_nohash, frames_error); +} + +static char *setup_ring(int fd) +{ + struct tpacket_req3 req3 = {0}; + void *ring; + + req3.tp_retire_blk_tov = cfg_timeout_msec; + req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; + + req3.tp_frame_size = 2048; + req3.tp_frame_nr = 1 << 10; + req3.tp_block_nr = 2; + + req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; + req3.tp_block_size /= req3.tp_block_nr; + + if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) + error(1, errno, "setsockopt PACKET_RX_RING"); + + ring_block_sz = req3.tp_block_size; + ring_block_nr = req3.tp_block_nr; + + ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); + if (ring == MAP_FAILED) + error(1, 0, "mmap failed"); + + return ring; +} + +static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +/* filter on transport protocol and destination port */ +static void set_filter(int fd) +{ + const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ + uint8_t proto; + + proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; + if (cfg_family == AF_INET) + __set_filter(fd, offsetof(struct iphdr, protocol), proto, + sizeof(struct iphdr) + off_dport); + else + __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, + sizeof(struct ip6_hdr) + off_dport); +} + +/* drop everything: used temporarily during setup */ +static void set_filter_null(int fd) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET + BPF_K, 0), + }; + struct sock_fprog prog = {}; + + prog.filter = filter; + prog.len = sizeof(filter) / sizeof(struct sock_filter); + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) + error(1, errno, "setsockopt filter"); +} + +static int create_ring(char **ring) +{ + struct fanout_args args = { + .id = 1, + .type_flags = PACKET_FANOUT_CPU, + .max_num_members = RSS_MAX_CPUS + }; + struct sockaddr_ll ll = { 0 }; + int fd, val; + + fd = socket(PF_PACKET, SOCK_DGRAM, 0); + if (fd == -1) + error(1, errno, "socket creation failed"); + + val = TPACKET_V3; + if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) + error(1, errno, "setsockopt PACKET_VERSION"); + *ring = setup_ring(fd); + + /* block packets until all rings are added to the fanout group: + * else packets can arrive during setup and get misclassified + */ + set_filter_null(fd); + + ll.sll_family = AF_PACKET; + ll.sll_ifindex = if_nametoindex(cfg_ifname); + ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : + htons(ETH_P_IPV6); + if (bind(fd, (void *)&ll, sizeof(ll))) + error(1, errno, "bind"); + + /* must come after bind: verifies all programs in group match */ + if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { + /* on failure, retry using old API if that is sufficient: + * it has a hard limit of 256 sockets, so only try if + * (a) only testing rxhash, not RSS or (b) <= 256 cpus. + * in this API, the third argument is left implicit. + */ + if (cfg_num_queues || num_cpus > 256 || + setsockopt(fd, SOL_PACKET, PACKET_FANOUT, + &args, sizeof(uint32_t))) + error(1, errno, "setsockopt PACKET_FANOUT cpu"); + } + + return fd; +} + +/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ +static int setup_sink(void) +{ + int fd, val; + + fd = socket(cfg_family, cfg_type, 0); + if (fd == -1) + error(1, errno, "socket %d.%d", cfg_family, cfg_type); + + val = 1 << 20; + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) + error(1, errno, "setsockopt rcvbuf"); + + return fd; +} + +static void setup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + rings[i].cpu = i; + rings[i].fd = create_ring(&rings[i].mmap); + } + + /* accept packets once all rings in the fanout group are up */ + for (i = 0; i < num_cpus; i++) + set_filter(rings[i].fd); +} + +static void cleanup_rings(void) +{ + int i; + + for (i = 0; i < num_cpus; i++) { + if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) + error(1, errno, "munmap"); + if (close(rings[i].fd)) + error(1, errno, "close"); + } +} + +static void parse_cpulist(const char *arg) +{ + do { + rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); + + arg = strchr(arg, ','); + if (!arg) + break; + arg++; // skip ',' + } while (1); +} + +static void show_cpulist(void) +{ + int i; + + for (i = 0; i < cfg_num_queues; i++) + fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); +} + +static void show_silos(void) +{ + int i; + + for (i = 0; i < cfg_num_rps_cpus; i++) + fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); +} + +static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) +{ + int i, ret, off; + + if (slen < TOEPLITZ_STR_MIN_LEN || + slen > TOEPLITZ_STR_MAX_LEN + 1) + error(1, 0, "invalid toeplitz key"); + + for (i = 0, off = 0; off < slen; i++, off += 3) { + ret = sscanf(str + off, "%hhx", &key[i]); + if (ret != 1) + error(1, 0, "key parse error at %d off %d len %d", + i, off, slen); + } +} + +static void parse_rps_bitmap(const char *arg) +{ + unsigned long bitmap; + int i; + + bitmap = strtoul(arg, NULL, 0); + + if (bitmap & ~(RPS_MAX_CPUS - 1)) + error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", + bitmap, RPS_MAX_CPUS - 1); + + for (i = 0; i < RPS_MAX_CPUS; i++) + if (bitmap & 1UL << i) + rps_silo_to_cpu[cfg_num_rps_cpus++] = i; +} + +static void parse_opts(int argc, char **argv) +{ + static struct option long_options[] = { + {"dport", required_argument, 0, 'd'}, + {"cpus", required_argument, 0, 'C'}, + {"key", required_argument, 0, 'k'}, + {"iface", required_argument, 0, 'i'}, + {"ipv4", no_argument, 0, '4'}, + {"ipv6", no_argument, 0, '6'}, + {"sink", no_argument, 0, 's'}, + {"tcp", no_argument, 0, 't'}, + {"timeout", required_argument, 0, 'T'}, + {"udp", no_argument, 0, 'u'}, + {"verbose", no_argument, 0, 'v'}, + {"rps", required_argument, 0, 'r'}, + {0, 0, 0, 0} + }; + bool have_toeplitz = false; + int index, c; + + while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:u:v", long_options, &index)) != -1) { + switch (c) { + case '4': + cfg_family = AF_INET; + break; + case '6': + cfg_family = AF_INET6; + break; + case 'C': + parse_cpulist(optarg); + break; + case 'd': + cfg_dport = strtol(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'k': + parse_toeplitz_key(optarg, strlen(optarg), + toeplitz_key); + have_toeplitz = true; + break; + case 'r': + parse_rps_bitmap(optarg); + break; + case 's': + cfg_sink = true; + break; + case 't': + cfg_type = SOCK_STREAM; + break; + case 'T': + cfg_timeout_msec = strtol(optarg, NULL, 0); + break; + case 'u': + cfg_type = SOCK_DGRAM; + break; + case 'v': + cfg_verbose = true; + break; + + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!have_toeplitz) + error(1, 0, "Must supply rss key ('-k')"); + + num_cpus = get_nprocs(); + if (num_cpus > RSS_MAX_CPUS) + error(1, 0, "increase RSS_MAX_CPUS"); + + if (cfg_num_queues && cfg_num_rps_cpus) + error(1, 0, + "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); + if (cfg_verbose) { + show_cpulist(); + show_silos(); + } +} + +int main(int argc, char **argv) +{ + const int min_tests = 10; + int fd_sink = -1; + + parse_opts(argc, argv); + + if (cfg_sink) + fd_sink = setup_sink(); + + setup_rings(); + process_rings(); + cleanup_rings(); + + if (cfg_sink && close(fd_sink)) + error(1, errno, "close sink"); + + if (frames_received - frames_nohash < min_tests) + error(1, 0, "too few frames for verification"); + + return frames_error; +} diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh new file mode 100755 index 000000000000..0a49907cd4fe --- /dev/null +++ b/tools/testing/selftests/net/toeplitz.sh @@ -0,0 +1,199 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping +# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu +# ('-rps <rps_map>') +# +# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, +# which is a driver-specific encoding. +# +# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ +# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] + +source setup_loopback.sh +readonly SERVER_IP4="192.168.1.200/24" +readonly SERVER_IP6="fda8::1/64" +readonly SERVER_MAC="aa:00:00:00:00:02" + +readonly CLIENT_IP4="192.168.1.100/24" +readonly CLIENT_IP6="fda8::2/64" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +PORT=8000 +KEY="$(</proc/sys/net/core/netdev_rss_key)" +TEST_RSS=false +RPS_MAP="" +PROTO_FLAG="" +IP_FLAG="" +DEV="eth0" + +# Return the number of rxqs among which RSS is configured to spread packets. +# This is determined by reading the RSS indirection table using ethtool. +get_rss_cfg_num_rxqs() { + echo $(ethtool -x "${DEV}" | + egrep [[:space:]]+[0-9]+:[[:space:]]+ | + cut -d: -f2- | + awk '{$1=$1};1' | + tr ' ' '\n' | + sort -u | + wc -l) +} + +# Return a list of the receive irq handler cpus. +# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. +# Reads /sys/kernel/irq/ in order, so algorithm depends on +# irq_{rxq-0} < irq_{rxq-1}, etc. +get_rx_irq_cpus() { + CPUS="" + # sort so that irq 2 is read before irq 10 + SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) + # Consider only as many queues as RSS actually uses. We assume that + # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). + RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) + RXQ_COUNT=0 + + for i in ${SORTED_IRQS} + do + [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break + # lookup relevant IRQs by action name + [[ -e "$i/actions" ]] || continue + cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue + irqname=$(<"$i/actions") + + # does the IRQ get called + irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') + [[ -n "${irqcount}" ]] || continue + + # lookup CPU + irq=$(basename "$i") + cpu=$(cat "/proc/irq/$irq/smp_affinity_list") + + if [[ -z "${CPUS}" ]]; then + CPUS="${cpu}" + else + CPUS="${CPUS},${cpu}" + fi + RXQ_COUNT=$((RXQ_COUNT+1)) + done + + echo "${CPUS}" +} + +get_disable_rfs_cmd() { + echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" +} + +get_set_rps_bitmaps_cmd() { + CMD="" + for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus + do + CMD="${CMD} echo $1 > ${i};" + done + + echo "${CMD}" +} + +get_disable_rps_cmd() { + echo "$(get_set_rps_bitmaps_cmd 0)" +} + +die() { + echo "$1" + exit 1 +} + +check_nic_rxhash_enabled() { + local -r pattern="receive-hashing:\ on" + + ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" +} + +parse_opts() { + local prog=$0 + shift 1 + + while [[ "$1" =~ "-" ]]; do + if [[ "$1" = "-irq_prefix" ]]; then + shift + IRQ_PATTERN="^$1-[0-9]*$" + elif [[ "$1" = "-u" || "$1" = "-t" ]]; then + PROTO_FLAG="$1" + elif [[ "$1" = "-4" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP4}" + CLIENT_IP="${CLIENT_IP4}" + elif [[ "$1" = "-6" ]]; then + IP_FLAG="$1" + SERVER_IP="${SERVER_IP6}" + CLIENT_IP="${CLIENT_IP6}" + elif [[ "$1" = "-rss" ]]; then + TEST_RSS=true + elif [[ "$1" = "-rps" ]]; then + shift + RPS_MAP="$1" + elif [[ "$1" = "-i" ]]; then + shift + DEV="$1" + else + die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ + [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" + fi + shift + done +} + +setup() { + setup_loopback_environment "${DEV}" + + # Set up server_ns namespace and client_ns namespace + setup_macvlan_ns "${DEV}" server_ns server \ + "${SERVER_MAC}" "${SERVER_IP}" + setup_macvlan_ns "${DEV}" client_ns client \ + "${CLIENT_MAC}" "${CLIENT_IP}" +} + +cleanup() { + cleanup_macvlan_ns server_ns server client_ns client + cleanup_loopback "${DEV}" +} + +parse_opts $0 $@ + +setup +trap cleanup EXIT + +check_nic_rxhash_enabled + +# Actual test starts here +if [[ "${TEST_RSS}" = true ]]; then + # RPS/RFS must be disabled because they move packets between cpus, + # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. + eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -C "$(get_rx_irq_cpus)" -s -v & +elif [[ ! -z "${RPS_MAP}" ]]; then + eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ + -r "0x${RPS_MAP}" -s -v & +else + ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & +fi + +server_pid=$! + +ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ + "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & + +client_pid=$! + +wait "${server_pid}" +exit_code=$? +kill -9 "${client_pid}" +if [[ "${exit_code}" -eq 0 ]]; then + echo "Test Succeeded!" +fi +exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh new file mode 100755 index 000000000000..2fef34f4aba1 --- /dev/null +++ b/tools/testing/selftests/net/toeplitz_client.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A simple program for generating traffic for the toeplitz test. +# +# This program sends packets periodically for, conservatively, 20 seconds. The +# intent is for the calling program to kill this program once it is no longer +# needed, rather than waiting for the 20 second expiration. + +send_traffic() { + expiration=$((SECONDS+20)) + while [[ "${SECONDS}" -lt "${expiration}" ]] + do + if [[ "${PROTO}" == "-u" ]]; then + echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" + else + echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" + fi + sleep 0.001 + done +} + +PROTO=$1 +IPVER=$2 +ADDR=$3 +PORT=$4 + +send_traffic diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json new file mode 100644 index 000000000000..88a20c781e49 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -0,0 +1,137 @@ +[ + { + "id": "ce7d", + "name": "Add mq Qdisc to multi-queue device (4 queues)", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "2f82", + "name": "Add mq Qdisc to multi-queue device (256 queues)", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 256\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "256", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "c525", + "name": "Add duplicate mq Qdisc", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: mq" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "4", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "128a", + "name": "Delete nonexistent mq Qdisc", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "03a9", + "name": "Delete mq Qdisc twice", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 4\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: mq", + "$TC qdisc del dev $ETH root handle 1: mq" + ], + "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "be0f", + "name": "Add mq Qdisc to single-queue device", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1\" > /sys/bus/netdevsim/new_device" + ], + "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index cd4a27ee1466..ea04f04c173e 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -17,6 +17,7 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'DUMMY': 'dummy1', + 'ETH': 'eth0', 'BATCH_FILE': './batch.txt', 'BATCH_DIR': 'tmp', # Length of time in seconds to wait before terminating a command diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index e363bdaff59d..2ea438e6b8b1 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -210,8 +210,10 @@ static void anon_release_pages(char *rel_area) static void anon_allocate_area(void **alloc_area) { - if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) - err("posix_memalign() failed"); + *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + if (*alloc_area == MAP_FAILED) + err("mmap of anonymous memory failed"); } static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 986959833d70..b50dbe269f4b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -892,6 +892,8 @@ static void kvm_destroy_vm_debugfs(struct kvm *kvm) static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) { + static DEFINE_MUTEX(kvm_debugfs_lock); + struct dentry *dent; char dir_name[ITOA_MAX_LEN * 2]; struct kvm_stat_data *stat_data; const struct _kvm_stats_desc *pdesc; @@ -903,8 +905,20 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd); - kvm->debugfs_dentry = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_lock(&kvm_debugfs_lock); + dent = debugfs_lookup(dir_name, kvm_debugfs_dir); + if (dent) { + pr_warn_ratelimited("KVM: debugfs: duplicate directory %s\n", dir_name); + dput(dent); + mutex_unlock(&kvm_debugfs_lock); + return 0; + } + dent = debugfs_create_dir(dir_name, kvm_debugfs_dir); + mutex_unlock(&kvm_debugfs_lock); + if (IS_ERR(dent)) + return 0; + kvm->debugfs_dentry = dent; kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries, sizeof(*kvm->debugfs_stat_data), GFP_KERNEL_ACCOUNT); @@ -3110,6 +3124,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) ++vcpu->stat.generic.halt_poll_invalid; goto out; } + cpu_relax(); poll_end = cur = ktime_get(); } while (kvm_vcpu_can_poll(cur, stop)); } @@ -4390,6 +4405,16 @@ struct compat_kvm_dirty_log { }; }; +struct compat_kvm_clear_dirty_log { + __u32 slot; + __u32 num_pages; + __u64 first_page; + union { + compat_uptr_t dirty_bitmap; /* one bit per page */ + __u64 padding2; + }; +}; + static long kvm_vm_compat_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -4399,6 +4424,24 @@ static long kvm_vm_compat_ioctl(struct file *filp, if (kvm->mm != current->mm) return -EIO; switch (ioctl) { +#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT + case KVM_CLEAR_DIRTY_LOG: { + struct compat_kvm_clear_dirty_log compat_log; + struct kvm_clear_dirty_log log; + + if (copy_from_user(&compat_log, (void __user *)arg, + sizeof(compat_log))) + return -EFAULT; + log.slot = compat_log.slot; + log.num_pages = compat_log.num_pages; + log.first_page = compat_log.first_page; + log.padding2 = compat_log.padding2; + log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap); + + r = kvm_vm_ioctl_clear_dirty_log(kvm, &log); + break; + } +#endif case KVM_GET_DIRTY_LOG: { struct compat_kvm_dirty_log compat_log; struct kvm_dirty_log log; @@ -5172,7 +5215,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { + if (kvm->debugfs_dentry) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL_ACCOUNT); if (p) { |