diff options
129 files changed, 1115 insertions, 437 deletions
@@ -25,6 +25,8 @@ Aleksey Gorelov <aleksey_gorelov@phoenix.com> Alexander Lobakin <alobakin@pm.me> <alobakin@dlink.ru> Alexander Lobakin <alobakin@pm.me> <alobakin@marvell.com> Alexander Lobakin <alobakin@pm.me> <bloodyreaper@yandex.ru> +Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo.com> +Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com> Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com> Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com> Alexei Starovoitov <ast@kernel.org> <ast@fb.com> diff --git a/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst new file mode 100644 index 000000000000..875616d675fe --- /dev/null +++ b/Documentation/admin-guide/hw-vuln/cross-thread-rsb.rst @@ -0,0 +1,91 @@ + +.. SPDX-License-Identifier: GPL-2.0 + +Cross-Thread Return Address Predictions +======================================= + +Certain AMD and Hygon processors are subject to a cross-thread return address +predictions vulnerability. When running in SMT mode and one sibling thread +transitions out of C0 state, the other sibling thread could use return target +predictions from the sibling thread that transitioned out of C0. + +The Spectre v2 mitigations protect the Linux kernel, as it fills the return +address prediction entries with safe targets when context switching to the idle +thread. However, KVM does allow a VMM to prevent exiting guest mode when +transitioning out of C0. This could result in a guest-controlled return target +being consumed by the sibling thread. + +Affected processors +------------------- + +The following CPUs are vulnerable: + + - AMD Family 17h processors + - Hygon Family 18h processors + +Related CVEs +------------ + +The following CVE entry is related to this issue: + + ============== ======================================= + CVE-2022-27672 Cross-Thread Return Address Predictions + ============== ======================================= + +Problem +------- + +Affected SMT-capable processors support 1T and 2T modes of execution when SMT +is enabled. In 2T mode, both threads in a core are executing code. For the +processor core to enter 1T mode, it is required that one of the threads +requests to transition out of the C0 state. This can be communicated with the +HLT instruction or with an MWAIT instruction that requests non-C0. +When the thread re-enters the C0 state, the processor transitions back +to 2T mode, assuming the other thread is also still in C0 state. + +In affected processors, the return address predictor (RAP) is partitioned +depending on the SMT mode. For instance, in 2T mode each thread uses a private +16-entry RAP, but in 1T mode, the active thread uses a 32-entry RAP. Upon +transition between 1T/2T mode, the RAP contents are not modified but the RAP +pointers (which control the next return target to use for predictions) may +change. This behavior may result in return targets from one SMT thread being +used by RET predictions in the sibling thread following a 1T/2T switch. In +particular, a RET instruction executed immediately after a transition to 1T may +use a return target from the thread that just became idle. In theory, this +could lead to information disclosure if the return targets used do not come +from trustworthy code. + +Attack scenarios +---------------- + +An attack can be mounted on affected processors by performing a series of CALL +instructions with targeted return locations and then transitioning out of C0 +state. + +Mitigation mechanism +-------------------- + +Before entering idle state, the kernel context switches to the idle thread. The +context switch fills the RAP entries (referred to as the RSB in Linux) with safe +targets by performing a sequence of CALL instructions. + +Prevent a guest VM from directly putting the processor into an idle state by +intercepting HLT and MWAIT instructions. + +Both mitigations are required to fully address this issue. + +Mitigation control on the kernel command line +--------------------------------------------- + +Use existing Spectre v2 mitigations that will fill the RSB on context switch. + +Mitigation control for KVM - module parameter +--------------------------------------------- + +By default, the KVM hypervisor mitigates this issue by intercepting guest +attempts to transition out of C0. A VMM can use the KVM_CAP_X86_DISABLE_EXITS +capability to override those interceptions, but since this is not common, the +mitigation that covers this path is not enabled by default. + +The mitigation for the KVM_CAP_X86_DISABLE_EXITS capability can be turned on +using the boolean module parameter mitigate_smt_rsb, e.g. ``kvm.mitigate_smt_rsb=1``. diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst index 4df436e7c417..e0614760a99e 100644 --- a/Documentation/admin-guide/hw-vuln/index.rst +++ b/Documentation/admin-guide/hw-vuln/index.rst @@ -18,3 +18,4 @@ are configurable at compile, boot or run time. core-scheduling.rst l1d_flush.rst processor_mmio_stale_data.rst + cross-thread-rsb.rst diff --git a/MAINTAINERS b/MAINTAINERS index 39ff1a717625..135d93368d36 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8202,7 +8202,7 @@ F: drivers/fpga/microchip-spi.c FPU EMULATOR M: Bill Metzenthen <billm@melbpc.org.au> S: Maintained -W: http://floatingpoint.sourceforge.net/emulator/index.html +W: https://floatingpoint.billm.au/ F: arch/x86/math-emu/ FRAMEBUFFER CORE @@ -20323,8 +20323,7 @@ S: Maintained F: drivers/platform/x86/system76_acpi.c SYSV FILESYSTEM -M: Christoph Hellwig <hch@infradead.org> -S: Maintained +S: Orphan F: Documentation/filesystems/sysv-fs.rst F: fs/sysv/ F: include/linux/sysv_fs.h @@ -21819,11 +21818,9 @@ W: http://en.wikipedia.org/wiki/Util-linux T: git git://git.kernel.org/pub/scm/utils/util-linux/util-linux.git UUID HELPERS -M: Christoph Hellwig <hch@lst.de> R: Andy Shevchenko <andriy.shevchenko@linux.intel.com> L: linux-kernel@vger.kernel.org S: Maintained -T: git git://git.infradead.org/users/hch/uuid.git F: include/linux/uuid.h F: include/uapi/linux/uuid.h F: lib/test_uuid.c @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc8 +EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a5193f2146a6..dde06c0f97f3 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1023,12 +1023,6 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event, return 0; } -static bool armv8pmu_filter(struct pmu *pmu, int cpu) -{ - struct arm_pmu *armpmu = to_arm_pmu(pmu); - return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus); -} - static void armv8pmu_reset(void *info) { struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; @@ -1069,6 +1063,14 @@ static int __armv8_pmuv3_map_event(struct perf_event *event, &armv8_pmuv3_perf_cache_map, ARMV8_PMU_EVTYPE_EVENT); + /* + * CHAIN events only work when paired with an adjacent counter, and it + * never makes sense for a user to open one in isolation, as they'll be + * rotated arbitrarily. + */ + if (hw_event_id == ARMV8_PMUV3_PERFCTR_CHAIN) + return -EINVAL; + if (armv8pmu_event_is_64bit(event)) event->hw.flags |= ARMPMU_EVT_64BIT; @@ -1258,7 +1260,6 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name, cpu_pmu->stop = armv8pmu_stop; cpu_pmu->reset = armv8pmu_reset; cpu_pmu->set_event_filter = armv8pmu_set_event_filter; - cpu_pmu->filter = armv8pmu_filter; cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx; diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index d5cd16270c5d..2bbc0fcce04a 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -97,8 +97,8 @@ static inline void tlb_flush(struct mmu_gather *tlb) { if (radix_enabled()) radix__tlb_flush(tlb); - - return hash__tlb_flush(tlb); + else + hash__tlb_flush(tlb); } #ifdef CONFIG_SMP diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 85a63a41c471..d096b04bf80e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2974,17 +2974,19 @@ unsigned long perf_misc_flags(struct pt_regs *regs) void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) { - if (!x86_pmu_initialized()) { + /* This API doesn't currently support enumerating hybrid PMUs. */ + if (WARN_ON_ONCE(cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) || + !x86_pmu_initialized()) { memset(cap, 0, sizeof(*cap)); return; } - cap->version = x86_pmu.version; /* - * KVM doesn't support the hybrid PMU yet. - * Return the common value in global x86_pmu, - * which available for all cores. + * Note, hybrid CPU models get tracked as having hybrid PMUs even when + * all E-cores are disabled via BIOS. When E-cores are disabled, the + * base PMU holds the correct number of counters for P-cores. */ + cap->version = x86_pmu.version; cap->num_counters_gp = x86_pmu.num_counters; cap->num_counters_fixed = x86_pmu.num_counters_fixed; cap->bit_width_gp = x86_pmu.cntval_bits; diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 61012476d66e..8f39c46197b8 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -466,5 +466,6 @@ #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9cfca3d7d0e2..f3cc7699e1e1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1256,6 +1256,8 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define MMIO_SBDS BIT(2) /* CPU is affected by RETbleed, speculating where you would not expect it */ #define RETBLEED BIT(3) +/* CPU is affected by SMT (cross-thread) return predictions */ +#define SMT_RSB BIT(4) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1287,8 +1289,8 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED), - VULNBL_HYGON(0x18, RETBLEED), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB), + VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), {} }; @@ -1406,6 +1408,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) !(ia32_cap & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); + if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) + setup_force_cpu_bug(X86_BUG_SMT_RSB); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index cdb91009701d..ee67ba625094 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -165,15 +165,27 @@ static inline void kvm_init_pmu_capability(void) { bool is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL; - perf_get_x86_pmu_capability(&kvm_pmu_cap); - - /* - * For Intel, only support guest architectural pmu - * on a host with architectural pmu. - */ - if ((is_intel && !kvm_pmu_cap.version) || !kvm_pmu_cap.num_counters_gp) + /* + * Hybrid PMUs don't play nice with virtualization without careful + * configuration by userspace, and KVM's APIs for reporting supported + * vPMU features do not account for hybrid PMUs. Disable vPMU support + * for hybrid PMUs until KVM gains a way to let userspace opt-in. + */ + if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) enable_pmu = false; + if (enable_pmu) { + perf_get_x86_pmu_capability(&kvm_pmu_cap); + + /* + * For Intel, only support guest architectural pmu + * on a host with architectural pmu. + */ + if ((is_intel && !kvm_pmu_cap.version) || + !kvm_pmu_cap.num_counters_gp) + enable_pmu = false; + } + if (!enable_pmu) { memset(&kvm_pmu_cap, 0, sizeof(kvm_pmu_cap)); return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index da4bbd043a7b..a2c299d47e69 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -191,6 +191,10 @@ module_param(enable_pmu, bool, 0444); bool __read_mostly eager_page_split = true; module_param(eager_page_split, bool, 0644); +/* Enable/disable SMT_RSB bug mitigation */ +bool __read_mostly mitigate_smt_rsb; +module_param(mitigate_smt_rsb, bool, 0444); + /* * Restoring the host value for MSRs that are only consumed when running in * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU @@ -4448,10 +4452,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_CLOCK_VALID_FLAGS; break; case KVM_CAP_X86_DISABLE_EXITS: - r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE | - KVM_X86_DISABLE_EXITS_CSTATE; - if(kvm_can_mwait_in_guest()) - r |= KVM_X86_DISABLE_EXITS_MWAIT; + r = KVM_X86_DISABLE_EXITS_PAUSE; + + if (!mitigate_smt_rsb) { + r |= KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_CSTATE; + + if (kvm_can_mwait_in_guest()) + r |= KVM_X86_DISABLE_EXITS_MWAIT; + } break; case KVM_CAP_X86_SMM: if (!IS_ENABLED(CONFIG_KVM_SMM)) @@ -5254,12 +5263,11 @@ static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, { unsigned long val; + memset(dbgregs, 0, sizeof(*dbgregs)); memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); kvm_get_dr(vcpu, 6, &val); dbgregs->dr6 = val; dbgregs->dr7 = vcpu->arch.dr7; - dbgregs->flags = 0; - memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); } static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, @@ -6227,15 +6235,26 @@ split_irqchip_unlock: if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS) break; - if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && - kvm_can_mwait_in_guest()) - kvm->arch.mwait_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) - kvm->arch.hlt_in_guest = true; if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE) kvm->arch.pause_in_guest = true; - if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) - kvm->arch.cstate_in_guest = true; + +#define SMT_RSB_MSG "This processor is affected by the Cross-Thread Return Predictions vulnerability. " \ + "KVM_CAP_X86_DISABLE_EXITS should only be used with SMT disabled or trusted guests." + + if (!mitigate_smt_rsb) { + if (boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible() && + (cap->args[0] & ~KVM_X86_DISABLE_EXITS_PAUSE)) + pr_warn_once(SMT_RSB_MSG); + + if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && + kvm_can_mwait_in_guest()) + kvm->arch.mwait_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_HLT) + kvm->arch.hlt_in_guest = true; + if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE) + kvm->arch.cstate_in_guest = true; + } + r = 0; break; case KVM_CAP_MSR_PLATFORM_INFO: @@ -13456,6 +13475,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_vmgexit_msr_protocol_exit); static int __init kvm_x86_init(void) { kvm_mmu_x86_module_init(); + mitigate_smt_rsb &= boot_cpu_has_bug(X86_BUG_SMT_RSB) && cpu_smt_possible(); return 0; } module_init(kvm_x86_init); diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index fb4b1b5e0dea..46de9cf5c91d 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -387,8 +387,7 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, u8 mtrr_type, uniform; mtrr_type = mtrr_type_lookup(start, end, &uniform); - if (mtrr_type != MTRR_TYPE_WRBACK && - mtrr_type != MTRR_TYPE_INVALID) + if (mtrr_type != MTRR_TYPE_WRBACK) return _PAGE_CACHE_MODE_UC_MINUS; return _PAGE_CACHE_MODE_WB; diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 14a1c0d14916..3bb9bb483fe3 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -421,6 +421,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */ { PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */ { PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */ + { PCI_VDEVICE(INTEL, 0xa0d3), board_ahci_low_power }, /* Tiger Lake UP{3,4} AHCI */ /* JMicron 360/1/3/5/6, match class to avoid IDE function */ { PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 2ea572628b1c..c4c89d24f84c 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -4045,6 +4045,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM | ATA_HORKAGE_NO_NCQ_ON_ATI }, + { "SAMSUNG*MZ7LH*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NO_NCQ_ON_ATI, }, { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM }, diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index 35608a0cf552..4cbcdc5da038 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -67,7 +67,7 @@ module_param(enable_dma, int, 0444); MODULE_PARM_DESC(enable_dma, "Enable use of DMA on interfaces that support it (0=no dma [default], 1=use dma)"); -/** +/* * Convert nanosecond based time to setting used in the * boot bus timing register, based on timing multiple */ @@ -114,7 +114,7 @@ static void octeon_cf_set_boot_reg_cfg(int cs, unsigned int multiplier) cvmx_write_csr(CVMX_MIO_BOOT_REG_CFGX(cs), reg_cfg.u64); } -/** +/* * Called after libata determines the needed PIO mode. This * function programs the Octeon bootbus regions to support the * timing requirements of the PIO mode. @@ -278,7 +278,7 @@ static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev) cvmx_write_csr(cf_port->dma_base + DMA_TIM, dma_tim.u64); } -/** +/* * Handle an 8 bit I/O request. * * @qc: Queued command @@ -317,7 +317,7 @@ static unsigned int octeon_cf_data_xfer8(struct ata_queued_cmd *qc, return buflen; } -/** +/* * Handle a 16 bit I/O request. * * @qc: Queued command @@ -372,7 +372,7 @@ static unsigned int octeon_cf_data_xfer16(struct ata_queued_cmd *qc, return buflen; } -/** +/* * Read the taskfile for 16bit non-True IDE only. */ static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf) @@ -453,7 +453,7 @@ static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes, return 0; } -/** +/* * Load the taskfile for 16bit non-True IDE only. The device_addr is * not loaded, we do this as part of octeon_cf_exec_command16. */ @@ -525,7 +525,7 @@ static void octeon_cf_dma_setup(struct ata_queued_cmd *qc) ap->ops->sff_exec_command(ap, &qc->tf); } -/** +/* * Start a DMA transfer that was already setup * * @qc: Information about the DMA @@ -580,7 +580,7 @@ static void octeon_cf_dma_start(struct ata_queued_cmd *qc) cvmx_write_csr(cf_port->dma_base + DMA_CFG, mio_boot_dma_cfg.u64); } -/** +/* * * LOCKING: * spin_lock_irqsave(host lock) diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 957cf6bb8c05..d3f55ca06ed3 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -706,6 +706,8 @@ static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev) return -ENOMEM; qcom_cpufreq.soc_data = of_device_get_match_data(dev); + if (!qcom_cpufreq.soc_data) + return -ENODEV; clk_data = devm_kzalloc(dev, struct_size(clk_data, hws, num_domains), GFP_KERNEL); if (!clk_data) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index ec7cfd4f52b1..e9917a45b005 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1531,6 +1531,7 @@ config GPIO_MLXBF2 tristate "Mellanox BlueField 2 SoC GPIO" depends on (MELLANOX_PLATFORM && ARM64 && ACPI) || (64BIT && COMPILE_TEST) select GPIO_GENERIC + select GPIOLIB_IRQCHIP help Say Y here if you want GPIO support on Mellanox BlueField 2 SoC. diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 60514bc5454f..9e3893b19e4f 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -736,7 +736,7 @@ static void gpio_sim_remove_hogs(struct gpio_sim_device *dev) gpiod_remove_hogs(dev->hogs); - for (hog = dev->hogs; !hog->chip_label; hog++) { + for (hog = dev->hogs; hog->chip_label; hog++) { kfree(hog->chip_label); kfree(hog->line_name); } diff --git a/drivers/gpio/gpio-vf610.c b/drivers/gpio/gpio-vf610.c index 9db42f6a2043..9033db00c360 100644 --- a/drivers/gpio/gpio-vf610.c +++ b/drivers/gpio/gpio-vf610.c @@ -30,7 +30,6 @@ struct fsl_gpio_soc_data { struct vf610_gpio_port { struct gpio_chip gc; - struct irq_chip ic; void __iomem *base; void __iomem *gpio_base; const struct fsl_gpio_soc_data *sdata; @@ -207,20 +206,24 @@ static int vf610_gpio_irq_set_type(struct irq_data *d, u32 type) static void vf610_gpio_irq_mask(struct irq_data *d) { - struct vf610_gpio_port *port = - gpiochip_get_data(irq_data_get_irq_chip_data(d)); - void __iomem *pcr_base = port->base + PORT_PCR(d->hwirq); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct vf610_gpio_port *port = gpiochip_get_data(gc); + irq_hw_number_t gpio_num = irqd_to_hwirq(d); + void __iomem *pcr_base = port->base + PORT_PCR(gpio_num); vf610_gpio_writel(0, pcr_base); + gpiochip_disable_irq(gc, gpio_num); } static void vf610_gpio_irq_unmask(struct irq_data *d) { - struct vf610_gpio_port *port = - gpiochip_get_data(irq_data_get_irq_chip_data(d)); - void __iomem *pcr_base = port->base + PORT_PCR(d->hwirq); + struct gpio_chip *gc = irq_data_get_irq_chip_data(d); + struct vf610_gpio_port *port = gpiochip_get_data(gc); + irq_hw_number_t gpio_num = irqd_to_hwirq(d); + void __iomem *pcr_base = port->base + PORT_PCR(gpio_num); - vf610_gpio_writel(port->irqc[d->hwirq] << PORT_PCR_IRQC_OFFSET, + gpiochip_enable_irq(gc, gpio_num); + vf610_gpio_writel(port->irqc[gpio_num] << PORT_PCR_IRQC_OFFSET, pcr_base); } @@ -237,6 +240,17 @@ static int vf610_gpio_irq_set_wake(struct irq_data *d, u32 enable) return 0; } +static const struct irq_chip vf610_irqchip = { + .name = "gpio-vf610", + .irq_ack = vf610_gpio_irq_ack, + .irq_mask = vf610_gpio_irq_mask, + .irq_unmask = vf610_gpio_irq_unmask, + .irq_set_type = vf610_gpio_irq_set_type, + .irq_set_wake = vf610_gpio_irq_set_wake, + .flags = IRQCHIP_IMMUTABLE, + GPIOCHIP_IRQ_RESOURCE_HELPERS, +}; + static void vf610_gpio_disable_clk(void *data) { clk_disable_unprepare(data); @@ -249,7 +263,6 @@ static int vf610_gpio_probe(struct platform_device *pdev) struct vf610_gpio_port *port; struct gpio_chip *gc; struct gpio_irq_chip *girq; - struct irq_chip *ic; int i; int ret; @@ -315,14 +328,6 @@ static int vf610_gpio_probe(struct platform_device *pdev) gc->direction_output = vf610_gpio_direction_output; gc->set = vf610_gpio_set; - ic = &port->ic; - ic->name = "gpio-vf610"; - ic->irq_ack = vf610_gpio_irq_ack; - ic->irq_mask = vf610_gpio_irq_mask; - ic->irq_unmask = vf610_gpio_irq_unmask; - ic->irq_set_type = vf610_gpio_irq_set_type; - ic->irq_set_wake = vf610_gpio_irq_set_wake; - /* Mask all GPIO interrupts */ for (i = 0; i < gc->ngpio; i++) vf610_gpio_writel(0, port->base + PORT_PCR(i)); @@ -331,7 +336,7 @@ static int vf610_gpio_probe(struct platform_device *pdev) vf610_gpio_writel(~0, port->base + PORT_ISFR); girq = &gc->irq; - girq->chip = ic; + gpio_irq_chip_set_chip(girq, &vf610_irqchip); girq->parent_handler = vf610_gpio_irq_handler; girq->num_parents = 1; girq->parents = devm_kcalloc(&pdev->dev, 1, diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 17c53f484280..34ff048e70d0 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -1637,6 +1637,18 @@ static const struct dmi_system_id gpiolib_acpi_quirks[] __initconst = { .ignore_wake = "ELAN0415:00@9", }, }, + { + /* + * Spurious wakeups from TP_ATTN# pin + * Found in BIOS 1.7.7 + */ + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "NH5xAx"), + }, + .driver_data = &(struct acpi_gpiolib_dmi_quirk) { + .ignore_wake = "SYNA1202:00@16", + }, + }, {} /* Terminating entry */ }; diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h index 9475f99a9694..5a08693b8fb1 100644 --- a/drivers/gpio/gpiolib-acpi.h +++ b/drivers/gpio/gpiolib-acpi.h @@ -14,7 +14,6 @@ #include <linux/gpio/consumer.h> -struct acpi_device; struct device; struct fwnode_handle; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 315cbdf61979..9abfb482b615 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -53,7 +53,8 @@ config DRM_DEBUG_MM config DRM_USE_DYNAMIC_DEBUG bool "use dynamic debug to implement drm.debug" - default y + default n + depends on BROKEN depends on DRM depends on DYNAMIC_DEBUG || DYNAMIC_DEBUG_CORE depends on JUMP_LABEL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 2f28a8c02f64..fbf2f24169eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4268,6 +4268,9 @@ exit: } adev->in_suspend = false; + if (adev->enable_mes) + amdgpu_mes_self_test(adev); + if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) DRM_WARN("smart shift update failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 5dff79e8f301..1c4787000a5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -1344,7 +1344,7 @@ static int mes_v11_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* it's only intended for use in mes_self_test case, not for s0ix and reset */ - if (!amdgpu_in_reset(adev) && !adev->in_s0ix && + if (!amdgpu_in_reset(adev) && !adev->in_s0ix && !adev->in_suspend && (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))) amdgpu_mes_self_test(adev); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 93dee3d1a483..9c7b69d377bd 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -9658,7 +9658,11 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev, * `dcn10_can_pipe_disable_cursor`). By now, all modified planes are in * atomic state, so call drm helper to normalize zpos. */ - drm_atomic_normalize_zpos(dev, state); + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) { + drm_dbg(dev, "drm_atomic_normalize_zpos() failed\n"); + goto fail; + } /* Remove exiting planes if they are modified */ for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index c7443317c747..66a4a41c3fe9 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -714,7 +714,7 @@ static int ast_primary_plane_init(struct ast_private *ast) struct ast_plane *ast_primary_plane = &ast->primary_plane; struct drm_plane *primary_plane = &ast_primary_plane->base; void __iomem *vaddr = ast->vram; - u64 offset = ast->vram_base; + u64 offset = 0; /* with shmem, the primary plane is always at offset 0 */ unsigned long cursor_size = roundup(AST_HWC_SIZE + AST_HWC_SIGNATURE_SIZE, PAGE_SIZE); unsigned long size = ast->vram_fb_available - cursor_size; int ret; @@ -972,7 +972,7 @@ static int ast_cursor_plane_init(struct ast_private *ast) return -ENOMEM; vaddr = ast->vram + ast->vram_fb_available - size; - offset = ast->vram_base + ast->vram_fb_available - size; + offset = ast->vram_fb_available - size; ret = ast_plane_init(dev, ast_cursor_plane, vaddr, offset, size, 0x01, &ast_cursor_plane_funcs, diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index 949c19339015..a0740308555d 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -1355,6 +1355,13 @@ icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal) GAMT_CHKN_BIT_REG, GAMT_CHKN_DISABLE_L3_COH_PIPE); + /* + * Wa_1408615072:icl,ehl (vsunit) + * Wa_1407596294:icl,ehl (hsunit) + */ + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, + VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); + /* Wa_1407352427:icl,ehl */ wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, PSDUNIT_CLKGATE_DIS); @@ -2540,13 +2547,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN11_ENABLE_32_PLANE_MODE); /* - * Wa_1408615072:icl,ehl (vsunit) - * Wa_1407596294:icl,ehl (hsunit) - */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE, - VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS); - - /* * Wa_1408767742:icl[a2..forever],ehl[all] * Wa_1605460711:icl[a0..c0] */ diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 0108613e79d5..7258975331ca 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -711,7 +711,7 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); if (vc4_encoder->type == VC4_ENCODER_TYPE_HDMI0) { - vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 1000, + vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 8000, mode->clock * 9 / 10) * 1000; } else { vc4_state->hvs_load = mode->clock * 1000; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 55744216392b..7546103f1499 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -97,6 +97,10 @@ #define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_SHIFT 8 #define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_MASK VC4_MASK(15, 8) +#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_MASK VC4_MASK(7, 0) +#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_SET_AVMUTE BIT(0) +#define VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_CLEAR_AVMUTE BIT(4) + # define VC4_HD_M_SW_RST BIT(2) # define VC4_HD_M_ENABLE BIT(0) @@ -1306,7 +1310,6 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, VC4_HDMI_VERTB_VBP)); unsigned long flags; unsigned char gcp; - bool gcp_en; u32 reg; int idx; @@ -1341,16 +1344,13 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, switch (vc4_state->output_bpc) { case 12: gcp = 6; - gcp_en = true; break; case 10: gcp = 5; - gcp_en = true; break; case 8: default: - gcp = 4; - gcp_en = false; + gcp = 0; break; } @@ -1359,8 +1359,7 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, * doesn't signal in GCP. */ if (vc4_state->output_format == VC4_HDMI_OUTPUT_YUV422) { - gcp = 4; - gcp_en = false; + gcp = 0; } reg = HDMI_READ(HDMI_DEEP_COLOR_CONFIG_1); @@ -1373,11 +1372,12 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, reg = HDMI_READ(HDMI_GCP_WORD_1); reg &= ~VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1_MASK; reg |= VC4_SET_FIELD(gcp, VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_1); + reg &= ~VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_MASK; + reg |= VC5_HDMI_GCP_WORD_1_GCP_SUBPACKET_BYTE_0_CLEAR_AVMUTE; HDMI_WRITE(HDMI_GCP_WORD_1, reg); reg = HDMI_READ(HDMI_GCP_CONFIG); - reg &= ~VC5_HDMI_GCP_CONFIG_GCP_ENABLE; - reg |= gcp_en ? VC5_HDMI_GCP_CONFIG_GCP_ENABLE : 0; + reg |= VC5_HDMI_GCP_CONFIG_GCP_ENABLE; HDMI_WRITE(HDMI_GCP_CONFIG, reg); reg = HDMI_READ(HDMI_MISC_CONTROL); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 8b92a45a3c89..bd5acc4a8687 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -340,7 +340,7 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) { struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; - struct drm_gem_dma_object *bo = drm_fb_dma_get_gem_obj(fb, 0); + struct drm_gem_dma_object *bo; int num_planes = fb->format->num_planes; struct drm_crtc_state *crtc_state; u32 h_subsample = fb->format->hsub; @@ -359,8 +359,10 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) if (ret) return ret; - for (i = 0; i < num_planes; i++) + for (i = 0; i < num_planes; i++) { + bo = drm_fb_dma_get_gem_obj(fb, i); vc4_state->offsets[i] = bo->dma_addr + fb->offsets[i]; + } /* * We don't support subpixel source positioning for scaling, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index aa1cd5126a32..4dcf2eb7aa80 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -462,6 +462,9 @@ int vmw_bo_create(struct vmw_private *vmw, return -ENOMEM; } + /* + * vmw_bo_init will delete the *p_bo object if it fails + */ ret = vmw_bo_init(vmw, *p_bo, size, placement, interruptible, pin, bo_free); @@ -470,7 +473,6 @@ int vmw_bo_create(struct vmw_private *vmw, return ret; out_error: - kfree(*p_bo); *p_bo = NULL; return ret; } @@ -596,6 +598,7 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp, ttm_bo_put(&vmw_bo->base); } + drm_gem_object_put(&vmw_bo->base.base); return ret; } @@ -636,6 +639,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data, ret = vmw_user_bo_synccpu_grab(vbo, arg->flags); vmw_bo_unreference(&vbo); + drm_gem_object_put(&vbo->base.base); if (unlikely(ret != 0)) { if (ret == -ERESTARTSYS || ret == -EBUSY) return -EBUSY; @@ -693,7 +697,7 @@ int vmw_bo_unref_ioctl(struct drm_device *dev, void *data, * struct vmw_buffer_object should be placed. * Return: Zero on success, Negative error code on error. * - * The vmw buffer object pointer will be refcounted. + * The vmw buffer object pointer will be refcounted (both ttm and gem) */ int vmw_user_bo_lookup(struct drm_file *filp, uint32_t handle, @@ -710,7 +714,6 @@ int vmw_user_bo_lookup(struct drm_file *filp, *out = gem_to_vmw_bo(gobj); ttm_bo_get(&(*out)->base); - drm_gem_object_put(gobj); return 0; } @@ -791,7 +794,8 @@ int vmw_dumb_create(struct drm_file *file_priv, ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, args->size, &args->handle, &vbo); - + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->base.base); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index a44d53e33cdb..c0686283ffd1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -1160,6 +1160,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv, } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, true, false); ttm_bo_put(&vmw_bo->base); + drm_gem_object_put(&vmw_bo->base.base); if (unlikely(ret != 0)) return ret; @@ -1214,6 +1215,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv, } ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo, false, false); ttm_bo_put(&vmw_bo->base); + drm_gem_object_put(&vmw_bo->base.base); if (unlikely(ret != 0)) return ret; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index ce609e7d758f..4d2c28e39f4e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -146,14 +146,12 @@ int vmw_gem_object_create_with_handle(struct vmw_private *dev_priv, &vmw_sys_placement : &vmw_vram_sys_placement, true, false, &vmw_gem_destroy, p_vbo); - - (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs; if (ret != 0) goto out_no_bo; + (*p_vbo)->base.base.funcs = &vmw_gem_object_funcs; + ret = drm_gem_handle_create(filp, &(*p_vbo)->base.base, handle); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put(&(*p_vbo)->base.base); out_no_bo: return ret; } @@ -180,6 +178,8 @@ int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data, rep->map_handle = drm_vma_node_offset_addr(&vbo->base.base.vma_node); rep->cur_gmr_id = handle; rep->cur_gmr_offset = 0; + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->base.base); out_no_bo: return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 257f090071f1..445d619e1fdc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1815,8 +1815,10 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, err_out: /* vmw_user_lookup_handle takes one ref so does new_fb */ - if (bo) + if (bo) { vmw_bo_unreference(&bo); + drm_gem_object_put(&bo->base.base); + } if (surface) vmw_surface_unreference(&surface); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index e9f5c89b4ca6..b5b311f2a91a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -458,6 +458,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data, ret = vmw_overlay_update_stream(dev_priv, buf, arg, true); vmw_bo_unreference(&buf); + drm_gem_object_put(&buf->base.base); out_unlock: mutex_unlock(&overlay->mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c index 108a496b5d18..51e83dfa1cac 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c @@ -807,6 +807,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv, num_output_sig, tfile, shader_handle); out_bad_arg: vmw_bo_unreference(&buffer); + drm_gem_object_put(&buffer->base.base); return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 3bc63ae768f3..dcfb003841b3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -683,7 +683,7 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) container_of(base, struct vmw_user_surface, prime.base); struct vmw_resource *res = &user_srf->srf.res; - if (base->shareable && res && res->backup) + if (res && res->backup) drm_gem_object_put(&res->backup->base.base); *p_base = NULL; @@ -864,7 +864,11 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, goto out_unlock; } vmw_bo_reference(res->backup); - drm_gem_object_get(&res->backup->base.base); + /* + * We don't expose the handle to the userspace and surface + * already holds a gem reference + */ + drm_gem_handle_delete(file_priv, backup_handle); } tmp = vmw_resource_reference(&srf->res); @@ -1568,8 +1572,6 @@ vmw_gb_surface_define_internal(struct drm_device *dev, drm_vma_node_offset_addr(&res->backup->base.base.vma_node); rep->buffer_size = res->backup->base.base.size; rep->buffer_handle = backup_handle; - if (user_srf->prime.base.shareable) - drm_gem_object_get(&res->backup->base.base); } else { rep->buffer_map_handle = 0; rep->buffer_size = 0; diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c index babf21a0adeb..f191a2a76f3b 100644 --- a/drivers/mmc/core/sdio_bus.c +++ b/drivers/mmc/core/sdio_bus.c @@ -294,6 +294,12 @@ static void sdio_release_func(struct device *dev) if (!(func->card->quirks & MMC_QUIRK_NONSTD_SDIO)) sdio_free_func_cis(func); + /* + * We have now removed the link to the tuples in the + * card structure, so remove the reference. + */ + put_device(&func->card->dev); + kfree(func->info); kfree(func->tmpbuf); kfree(func); @@ -324,6 +330,12 @@ struct sdio_func *sdio_alloc_func(struct mmc_card *card) device_initialize(&func->dev); + /* + * We may link to tuples in the card structure, + * we need make sure we have a reference to it. + */ + get_device(&func->card->dev); + func->dev.parent = &card->dev; func->dev.bus = &sdio_bus_type; func->dev.release = sdio_release_func; @@ -377,10 +389,9 @@ int sdio_add_func(struct sdio_func *func) */ void sdio_remove_func(struct sdio_func *func) { - if (!sdio_func_present(func)) - return; + if (sdio_func_present(func)) + device_del(&func->dev); - device_del(&func->dev); of_node_put(func->dev.of_node); put_device(&func->dev); } diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c index a705ba6eff5b..afaa6cab1adc 100644 --- a/drivers/mmc/core/sdio_cis.c +++ b/drivers/mmc/core/sdio_cis.c @@ -404,12 +404,6 @@ int sdio_read_func_cis(struct sdio_func *func) return ret; /* - * Since we've linked to tuples in the card structure, - * we must make sure we have a reference to it. - */ - get_device(&func->card->dev); - - /* * Vendor/device id is optional for function CIS, so * copy it from the card structure as needed. */ @@ -434,11 +428,5 @@ void sdio_free_func_cis(struct sdio_func *func) } func->tuples = NULL; - - /* - * We have now removed the link to the tuples in the - * card structure, so remove the reference. - */ - put_device(&func->card->dev); } diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c index dc2db9c185ea..eda1e2ddcaca 100644 --- a/drivers/mmc/host/jz4740_mmc.c +++ b/drivers/mmc/host/jz4740_mmc.c @@ -1053,6 +1053,16 @@ static int jz4740_mmc_probe(struct platform_device* pdev) mmc->ops = &jz4740_mmc_ops; if (!mmc->f_max) mmc->f_max = JZ_MMC_CLK_RATE; + + /* + * There seems to be a problem with this driver on the JZ4760 and + * JZ4760B SoCs. There, when using the maximum rate supported (50 MHz), + * the communication fails with many SD cards. + * Until this bug is sorted out, limit the maximum rate to 24 MHz. + */ + if (host->version == JZ_MMC_JZ4760 && mmc->f_max > JZ_MMC_CLK_RATE) + mmc->f_max = JZ_MMC_CLK_RATE; + mmc->f_min = mmc->f_max / 128; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 6e5ea0213b47..5c94ad4661ce 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -435,7 +435,8 @@ static int meson_mmc_clk_init(struct meson_host *host) clk_reg |= FIELD_PREP(CLK_CORE_PHASE_MASK, CLK_PHASE_180); clk_reg |= FIELD_PREP(CLK_TX_PHASE_MASK, CLK_PHASE_0); clk_reg |= FIELD_PREP(CLK_RX_PHASE_MASK, CLK_PHASE_0); - clk_reg |= CLK_IRQ_SDIO_SLEEP(host); + if (host->mmc->caps & MMC_CAP_SDIO_IRQ) + clk_reg |= CLK_IRQ_SDIO_SLEEP(host); writel(clk_reg, host->regs + SD_EMMC_CLOCK); /* get the mux parents */ @@ -948,16 +949,18 @@ static irqreturn_t meson_mmc_irq(int irq, void *dev_id) { struct meson_host *host = dev_id; struct mmc_command *cmd; - u32 status, raw_status; + u32 status, raw_status, irq_mask = IRQ_EN_MASK; irqreturn_t ret = IRQ_NONE; + if (host->mmc->caps & MMC_CAP_SDIO_IRQ) + irq_mask |= IRQ_SDIO; raw_status = readl(host->regs + SD_EMMC_STATUS); - status = raw_status & (IRQ_EN_MASK | IRQ_SDIO); + status = raw_status & irq_mask; if (!status) { dev_dbg(host->dev, - "Unexpected IRQ! irq_en 0x%08lx - status 0x%08x\n", - IRQ_EN_MASK | IRQ_SDIO, raw_status); + "Unexpected IRQ! irq_en 0x%08x - status 0x%08x\n", + irq_mask, raw_status); return IRQ_NONE; } @@ -1204,6 +1207,11 @@ static int meson_mmc_probe(struct platform_device *pdev) goto free_host; } + mmc->caps |= MMC_CAP_CMD23; + + if (mmc->caps & MMC_CAP_SDIO_IRQ) + mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; + host->data = (struct meson_mmc_data *) of_device_get_match_data(&pdev->dev); if (!host->data) { @@ -1277,11 +1285,6 @@ static int meson_mmc_probe(struct platform_device *pdev) spin_lock_init(&host->lock); - mmc->caps |= MMC_CAP_CMD23; - - if (mmc->caps & MMC_CAP_SDIO_IRQ) - mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD; - if (host->dram_access_quirk) { /* Limit segments to 1 due to low available sram memory */ mmc->max_segs = 1; diff --git a/drivers/mmc/host/mmc_spi.c b/drivers/mmc/host/mmc_spi.c index 106dd204b1a7..cc333ad67cac 100644 --- a/drivers/mmc/host/mmc_spi.c +++ b/drivers/mmc/host/mmc_spi.c @@ -1437,7 +1437,7 @@ static int mmc_spi_probe(struct spi_device *spi) status = mmc_add_host(mmc); if (status != 0) - goto fail_add_host; + goto fail_glue_init; /* * Index 0 is card detect @@ -1445,7 +1445,7 @@ static int mmc_spi_probe(struct spi_device *spi) */ status = mmc_gpiod_request_cd(mmc, NULL, 0, false, 1000); if (status == -EPROBE_DEFER) - goto fail_add_host; + goto fail_gpiod_request; if (!status) { /* * The platform has a CD GPIO signal that may support @@ -1460,7 +1460,7 @@ static int mmc_spi_probe(struct spi_device *spi) /* Index 1 is write protect/read only */ status = mmc_gpiod_request_ro(mmc, NULL, 1, 0); if (status == -EPROBE_DEFER) - goto fail_add_host; + goto fail_gpiod_request; if (!status) has_ro = true; @@ -1474,7 +1474,7 @@ static int mmc_spi_probe(struct spi_device *spi) ? ", cd polling" : ""); return 0; -fail_add_host: +fail_gpiod_request: mmc_remove_host(mmc); fail_glue_init: mmc_spi_dma_free(host); diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 02bd3cf9a260..6e4f36aaf5db 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -240,12 +240,12 @@ static int bgmac_probe(struct bcma_device *core) bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_FLW_CTRL1; bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_PHY; - if (ci->pkg == BCMA_PKG_ID_BCM47188 || - ci->pkg == BCMA_PKG_ID_BCM47186) { + if ((ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM47186) || + (ci->id == BCMA_CHIP_ID_BCM53572 && ci->pkg == BCMA_PKG_ID_BCM47188)) { bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_RGMII; bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED; } - if (ci->pkg == BCMA_PKG_ID_BCM5358) + if (ci->id == BCMA_CHIP_ID_BCM5357 && ci->pkg == BCMA_PKG_ID_BCM5358) bgmac->feature_flags |= BGMAC_FEAT_SW_TYPE_EPHYRMII; break; case BCMA_CHIP_ID_BCM53573: diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 240a7e8a7652..6c32f5c427b5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -9274,10 +9274,14 @@ int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init) netdev_err(bp->dev, "ring reservation/IRQ init failure rc: %d\n", rc); return rc; } - if (tcs && (bp->tx_nr_rings_per_tc * tcs != bp->tx_nr_rings)) { + if (tcs && (bp->tx_nr_rings_per_tc * tcs != + bp->tx_nr_rings - bp->tx_nr_rings_xdp)) { netdev_err(bp->dev, "tx ring reservation failure\n"); netdev_reset_tc(bp->dev); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + if (bp->tx_nr_rings_xdp) + bp->tx_nr_rings_per_tc = bp->tx_nr_rings_xdp; + else + bp->tx_nr_rings_per_tc = bp->tx_nr_rings; return -ENOMEM; } return 0; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 53d0083e35da..52eec0a50492 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2921,7 +2921,7 @@ static int i40e_change_mtu(struct net_device *netdev, int new_mtu) struct i40e_pf *pf = vsi->back; if (i40e_enabled_xdp_vsi(vsi)) { - int frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int frame_size = new_mtu + I40E_PACKET_HDR_PAD; if (frame_size > i40e_max_xdp_frame_size(vsi)) return -EINVAL; @@ -13167,6 +13167,8 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev, } br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (!br_spec) + return -EINVAL; nla_for_each_nested(attr, br_spec, rem) { __u16 mode; diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index 8286e47b4bae..0fae0186bd85 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -899,7 +899,7 @@ static int ice_set_object_tx_priority(struct ice_port_info *pi, struct ice_sched { int status; - if (node->tx_priority >= 8) { + if (priority >= 8) { NL_SET_ERR_MSG_MOD(extack, "Priority should be less than 8"); return -EINVAL; } @@ -929,7 +929,7 @@ static int ice_set_object_tx_weight(struct ice_port_info *pi, struct ice_sched_n { int status; - if (node->tx_weight > 200 || node->tx_weight < 1) { + if (weight > 200 || weight < 1) { NL_SET_ERR_MSG_MOD(extack, "Weight must be between 1 and 200"); return -EINVAL; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index b288a01a321a..8ec24f6cf6be 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -275,6 +275,8 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) if (status && status != -EEXIST) return status; + netdev_dbg(vsi->netdev, "set promisc filter bits for VSI %i: 0x%x\n", + vsi->vsi_num, promisc_m); return 0; } @@ -300,6 +302,8 @@ static int ice_clear_promisc(struct ice_vsi *vsi, u8 promisc_m) promisc_m, 0); } + netdev_dbg(vsi->netdev, "clear promisc filter bits for VSI %i: 0x%x\n", + vsi->vsi_num, promisc_m); return status; } @@ -414,6 +418,16 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) } err = 0; vlan_ops->dis_rx_filtering(vsi); + + /* promiscuous mode implies allmulticast so + * that VSIs that are in promiscuous mode are + * subscribed to multicast packets coming to + * the port + */ + err = ice_set_promisc(vsi, + ICE_MCAST_PROMISC_BITS); + if (err) + goto out_promisc; } } else { /* Clear Rx filter to remove traffic from wire */ @@ -430,6 +444,18 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) NETIF_F_HW_VLAN_CTAG_FILTER) vlan_ops->ena_rx_filtering(vsi); } + + /* disable allmulti here, but only if allmulti is not + * still enabled for the netdev + */ + if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) { + err = ice_clear_promisc(vsi, + ICE_MCAST_PROMISC_BITS); + if (err) { + netdev_err(netdev, "Error %d clearing multicast promiscuous on VSI %i\n", + err, vsi->vsi_num); + } + } } } goto exit; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 7105de6fb344..374b7f10b549 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -800,6 +800,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) struct ice_tx_desc *tx_desc; u16 cnt = xdp_ring->count; struct ice_tx_buf *tx_buf; + u16 completed_frames = 0; u16 xsk_frames = 0; u16 last_rs; int i; @@ -809,19 +810,21 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) if ((tx_desc->cmd_type_offset_bsz & cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) { if (last_rs >= ntc) - xsk_frames = last_rs - ntc + 1; + completed_frames = last_rs - ntc + 1; else - xsk_frames = last_rs + cnt - ntc + 1; + completed_frames = last_rs + cnt - ntc + 1; } - if (!xsk_frames) + if (!completed_frames) return; - if (likely(!xdp_ring->xdp_tx_active)) + if (likely(!xdp_ring->xdp_tx_active)) { + xsk_frames = completed_frames; goto skip; + } ntc = xdp_ring->next_to_clean; - for (i = 0; i < xsk_frames; i++) { + for (i = 0; i < completed_frames; i++) { tx_buf = &xdp_ring->tx_buf[ntc]; if (tx_buf->raw_buf) { @@ -837,7 +840,7 @@ static void ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) } skip: tx_desc->cmd_type_offset_bsz = 0; - xdp_ring->next_to_clean += xsk_frames; + xdp_ring->next_to_clean += completed_frames; if (xdp_ring->next_to_clean >= cnt) xdp_ring->next_to_clean -= cnt; if (xsk_frames) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3c0c35ecea10..b5b443883da9 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2256,6 +2256,30 @@ static void igb_enable_mas(struct igb_adapter *adapter) } } +#ifdef CONFIG_IGB_HWMON +/** + * igb_set_i2c_bb - Init I2C interface + * @hw: pointer to hardware structure + **/ +static void igb_set_i2c_bb(struct e1000_hw *hw) +{ + u32 ctrl_ext; + s32 i2cctl; + + ctrl_ext = rd32(E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_I2C_ENA; + wr32(E1000_CTRL_EXT, ctrl_ext); + wrfl(); + + i2cctl = rd32(E1000_I2CPARAMS); + i2cctl |= E1000_I2CBB_EN + | E1000_I2C_CLK_OE_N + | E1000_I2C_DATA_OE_N; + wr32(E1000_I2CPARAMS, i2cctl); + wrfl(); +} +#endif + void igb_reset(struct igb_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; @@ -2400,7 +2424,8 @@ void igb_reset(struct igb_adapter *adapter) * interface. */ if (adapter->ets) - mac->ops.init_thermal_sensor_thresh(hw); + igb_set_i2c_bb(hw); + mac->ops.init_thermal_sensor_thresh(hw); } } #endif @@ -3117,21 +3142,12 @@ static void igb_init_mas(struct igb_adapter *adapter) **/ static s32 igb_init_i2c(struct igb_adapter *adapter) { - struct e1000_hw *hw = &adapter->hw; s32 status = 0; - s32 i2cctl; /* I2C interface supported on i350 devices */ if (adapter->hw.mac.type != e1000_i350) return 0; - i2cctl = rd32(E1000_I2CPARAMS); - i2cctl |= E1000_I2CBB_EN - | E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N - | E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; - wr32(E1000_I2CPARAMS, i2cctl); - wrfl(); - /* Initialize the i2c bus which is controlled by the registers. * This bus will use the i2c_algo_bit structure that implements * the protocol through toggling of the 4 bits in the register. @@ -3521,6 +3537,12 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->ets = true; else adapter->ets = false; + /* Only enable I2C bit banging if an external thermal + * sensor is supported. + */ + if (adapter->ets) + igb_set_i2c_bb(hw); + hw->mac.ops.init_thermal_sensor_thresh(hw); if (igb_sysfs_init(adapter)) dev_err(&pdev->dev, "failed to allocate sysfs resources\n"); @@ -6794,7 +6816,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) struct timespec64 ts; u32 tsauxc; - if (pin < 0 || pin >= IGB_N_PEROUT) + if (pin < 0 || pin >= IGB_N_SDP) return; spin_lock(&adapter->tmreg_lock); @@ -6802,7 +6824,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) if (hw->mac.type == e1000_82580 || hw->mac.type == e1000_i354 || hw->mac.type == e1000_i350) { - s64 ns = timespec64_to_ns(&adapter->perout[pin].period); + s64 ns = timespec64_to_ns(&adapter->perout[tsintr_tt].period); u32 systiml, systimh, level_mask, level, rem; u64 systim, now; @@ -6850,8 +6872,8 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) ts.tv_nsec = (u32)systim; ts.tv_sec = ((u32)(systim >> 32)) & 0xFF; } else { - ts = timespec64_add(adapter->perout[pin].start, - adapter->perout[pin].period); + ts = timespec64_add(adapter->perout[tsintr_tt].start, + adapter->perout[tsintr_tt].period); } /* u32 conversion of tv_sec is safe until y2106 */ @@ -6860,7 +6882,7 @@ static void igb_perout(struct igb_adapter *adapter, int tsintr_tt) tsauxc = rd32(E1000_TSAUXC); tsauxc |= TSAUXC_EN_TT0; wr32(E1000_TSAUXC, tsauxc); - adapter->perout[pin].start = ts; + adapter->perout[tsintr_tt].start = ts; spin_unlock(&adapter->tmreg_lock); } @@ -6874,7 +6896,7 @@ static void igb_extts(struct igb_adapter *adapter, int tsintr_tt) struct ptp_clock_event event; struct timespec64 ts; - if (pin < 0 || pin >= IGB_N_EXTTS) + if (pin < 0 || pin >= IGB_N_SDP) return; if (hw->mac.type == e1000_82580 || diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index bc68b8f2176d..8736ca4b2628 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -73,6 +73,8 @@ #define IXGBE_RXBUFFER_4K 4096 #define IXGBE_MAX_RXBUFFER 16384 /* largest size for a single descriptor */ +#define IXGBE_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) + /* Attempt to maximize the headroom available for incoming frames. We * use a 2K buffer for receives and need 1536/1534 to store the data for * the frame. This leaves us with 512 bytes of room. From that we need diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ab8370c413f3..4507fba8747a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6778,6 +6778,18 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) } /** + * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP + * @adapter: device handle, pointer to adapter + */ +static int ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter) +{ + if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) + return IXGBE_RXBUFFER_2K; + else + return IXGBE_RXBUFFER_3K; +} + +/** * ixgbe_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size @@ -6788,18 +6800,12 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) { struct ixgbe_adapter *adapter = netdev_priv(netdev); - if (adapter->xdp_prog) { - int new_frame_size = new_mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; - int i; - - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbe_ring *ring = adapter->rx_ring[i]; + if (ixgbe_enabled_xdp_adapter(adapter)) { + int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD; - if (new_frame_size > ixgbe_rx_bufsz(ring)) { - e_warn(probe, "Requested MTU size is not supported with XDP\n"); - return -EINVAL; - } + if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) { + e_warn(probe, "Requested MTU size is not supported with XDP\n"); + return -EINVAL; } } diff --git a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c index 4632268695cb..063cd371033a 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/ipsec.c @@ -129,26 +129,31 @@ struct nfp_ipsec_cfg_mssg { }; }; -static int nfp_ipsec_cfg_cmd_issue(struct nfp_net *nn, int type, int saidx, - struct nfp_ipsec_cfg_mssg *msg) +static int nfp_net_ipsec_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry) { + unsigned int offset = nn->tlv_caps.mbox_off + NFP_NET_CFG_MBOX_SIMPLE_VAL; + struct nfp_ipsec_cfg_mssg *msg = (struct nfp_ipsec_cfg_mssg *)entry->msg; int i, msg_size, ret; - msg->cmd = type; - msg->sa_idx = saidx; - msg->rsp = 0; - msg_size = ARRAY_SIZE(msg->raw); + ret = nfp_net_mbox_lock(nn, sizeof(*msg)); + if (ret) + return ret; + msg_size = ARRAY_SIZE(msg->raw); for (i = 0; i < msg_size; i++) - nn_writel(nn, NFP_NET_CFG_MBOX_VAL + 4 * i, msg->raw[i]); + nn_writel(nn, offset + 4 * i, msg->raw[i]); - ret = nfp_net_mbox_reconfig(nn, NFP_NET_CFG_MBOX_CMD_IPSEC); - if (ret < 0) + ret = nfp_net_mbox_reconfig(nn, entry->cmd); + if (ret < 0) { + nn_ctrl_bar_unlock(nn); return ret; + } /* For now we always read the whole message response back */ for (i = 0; i < msg_size; i++) - msg->raw[i] = nn_readl(nn, NFP_NET_CFG_MBOX_VAL + 4 * i); + msg->raw[i] = nn_readl(nn, offset + 4 * i); + + nn_ctrl_bar_unlock(nn); switch (msg->rsp) { case NFP_IPSEC_CFG_MSSG_OK: @@ -477,7 +482,10 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x) } /* Allocate saidx and commit the SA */ - err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_ADD_SA, saidx, &msg); + msg.cmd = NFP_IPSEC_CFG_MSSG_ADD_SA; + msg.sa_idx = saidx; + err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, + sizeof(msg), nfp_net_ipsec_cfg); if (err) { xa_erase(&nn->xa_ipsec, saidx); nn_err(nn, "Failed to issue IPsec command err ret=%d\n", err); @@ -491,14 +499,17 @@ static int nfp_net_xfrm_add_state(struct xfrm_state *x) static void nfp_net_xfrm_del_state(struct xfrm_state *x) { + struct nfp_ipsec_cfg_mssg msg = { + .cmd = NFP_IPSEC_CFG_MSSG_INV_SA, + .sa_idx = x->xso.offload_handle - 1, + }; struct net_device *netdev = x->xso.dev; - struct nfp_ipsec_cfg_mssg msg; struct nfp_net *nn; int err; nn = netdev_priv(netdev); - err = nfp_ipsec_cfg_cmd_issue(nn, NFP_IPSEC_CFG_MSSG_INV_SA, - x->xso.offload_handle - 1, &msg); + err = nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_IPSEC, &msg, + sizeof(msg), nfp_net_ipsec_cfg); if (err) nn_warn(nn, "Failed to invalidate SA in hardware\n"); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 432d79d691c2..939cfce15830 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -617,9 +617,10 @@ struct nfp_net_dp { * @vnic_no_name: For non-port PF vNIC make ndo_get_phys_port_name return * -EOPNOTSUPP to keep backwards compatibility (set by app) * @port: Pointer to nfp_port structure if vNIC is a port - * @mc_lock: Protect mc_addrs list - * @mc_addrs: List of mc addrs to add/del to HW - * @mc_work: Work to update mc addrs + * @mbox_amsg: Asynchronously processed message via mailbox + * @mbox_amsg.lock: Protect message list + * @mbox_amsg.list: List of message to process + * @mbox_amsg.work: Work to process message asynchronously * @app_priv: APP private data for this vNIC */ struct nfp_net { @@ -721,13 +722,25 @@ struct nfp_net { struct nfp_port *port; - spinlock_t mc_lock; - struct list_head mc_addrs; - struct work_struct mc_work; + struct { + spinlock_t lock; + struct list_head list; + struct work_struct work; + } mbox_amsg; void *app_priv; }; +struct nfp_mbox_amsg_entry { + struct list_head list; + int (*cfg)(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry); + u32 cmd; + char msg[]; +}; + +int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len, + int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *)); + /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 18fc9971f1c8..70d7484c82af 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1334,14 +1334,54 @@ err_unlock: return err; } -struct nfp_mc_addr_entry { - u8 addr[ETH_ALEN]; - u32 cmd; - struct list_head list; -}; +int nfp_net_sched_mbox_amsg_work(struct nfp_net *nn, u32 cmd, const void *data, size_t len, + int (*cb)(struct nfp_net *, struct nfp_mbox_amsg_entry *)) +{ + struct nfp_mbox_amsg_entry *entry; + + entry = kmalloc(sizeof(*entry) + len, GFP_ATOMIC); + if (!entry) + return -ENOMEM; + + memcpy(entry->msg, data, len); + entry->cmd = cmd; + entry->cfg = cb; + + spin_lock_bh(&nn->mbox_amsg.lock); + list_add_tail(&entry->list, &nn->mbox_amsg.list); + spin_unlock_bh(&nn->mbox_amsg.lock); + + schedule_work(&nn->mbox_amsg.work); + + return 0; +} + +static void nfp_net_mbox_amsg_work(struct work_struct *work) +{ + struct nfp_net *nn = container_of(work, struct nfp_net, mbox_amsg.work); + struct nfp_mbox_amsg_entry *entry, *tmp; + struct list_head tmp_list; + + INIT_LIST_HEAD(&tmp_list); + + spin_lock_bh(&nn->mbox_amsg.lock); + list_splice_init(&nn->mbox_amsg.list, &tmp_list); + spin_unlock_bh(&nn->mbox_amsg.lock); + + list_for_each_entry_safe(entry, tmp, &tmp_list, list) { + int err = entry->cfg(nn, entry); + + if (err) + nn_err(nn, "Config cmd %d to HW failed %d.\n", entry->cmd, err); + + list_del(&entry->list); + kfree(entry); + } +} -static int nfp_net_mc_cfg(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) +static int nfp_net_mc_cfg(struct nfp_net *nn, struct nfp_mbox_amsg_entry *entry) { + unsigned char *addr = entry->msg; int ret; ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ); @@ -1353,26 +1393,7 @@ static int nfp_net_mc_cfg(struct nfp_net *nn, const unsigned char *addr, const u nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO, get_unaligned_be16(addr + 4)); - return nfp_net_mbox_reconfig_and_unlock(nn, cmd); -} - -static int nfp_net_mc_prep(struct nfp_net *nn, const unsigned char *addr, const u32 cmd) -{ - struct nfp_mc_addr_entry *entry; - - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (!entry) - return -ENOMEM; - - ether_addr_copy(entry->addr, addr); - entry->cmd = cmd; - spin_lock_bh(&nn->mc_lock); - list_add_tail(&entry->list, &nn->mc_addrs); - spin_unlock_bh(&nn->mc_lock); - - schedule_work(&nn->mc_work); - - return 0; + return nfp_net_mbox_reconfig_and_unlock(nn, entry->cmd); } static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) @@ -1385,35 +1406,16 @@ static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) return -EINVAL; } - return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD); + return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD, addr, + NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg); } static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr) { struct nfp_net *nn = netdev_priv(netdev); - return nfp_net_mc_prep(nn, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL); -} - -static void nfp_net_mc_addr_config(struct work_struct *work) -{ - struct nfp_net *nn = container_of(work, struct nfp_net, mc_work); - struct nfp_mc_addr_entry *entry, *tmp; - struct list_head tmp_list; - - INIT_LIST_HEAD(&tmp_list); - - spin_lock_bh(&nn->mc_lock); - list_splice_init(&nn->mc_addrs, &tmp_list); - spin_unlock_bh(&nn->mc_lock); - - list_for_each_entry_safe(entry, tmp, &tmp_list, list) { - if (nfp_net_mc_cfg(nn, entry->addr, entry->cmd)) - nn_err(nn, "Config mc address to HW failed.\n"); - - list_del(&entry->list); - kfree(entry); - } + return nfp_net_sched_mbox_amsg_work(nn, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL, addr, + NFP_NET_CFG_MULTICAST_SZ, nfp_net_mc_cfg); } static void nfp_net_set_rx_mode(struct net_device *netdev) @@ -2681,9 +2683,9 @@ int nfp_net_init(struct nfp_net *nn) if (!nn->dp.netdev) return 0; - spin_lock_init(&nn->mc_lock); - INIT_LIST_HEAD(&nn->mc_addrs); - INIT_WORK(&nn->mc_work, nfp_net_mc_addr_config); + spin_lock_init(&nn->mbox_amsg.lock); + INIT_LIST_HEAD(&nn->mbox_amsg.list); + INIT_WORK(&nn->mbox_amsg.work, nfp_net_mbox_amsg_work); return register_netdev(nn->dp.netdev); @@ -2704,6 +2706,6 @@ void nfp_net_clean(struct nfp_net *nn) unregister_netdev(nn->dp.netdev); nfp_net_ipsec_clean(nn); nfp_ccm_mbox_clean(nn); - flush_work(&nn->mc_work); + flush_work(&nn->mbox_amsg.work); nfp_net_reconfig_wait_posted(nn); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 51124309ae1f..f03dcadff738 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -403,7 +403,6 @@ */ #define NFP_NET_CFG_MBOX_BASE 0x1800 #define NFP_NET_CFG_MBOX_VAL_MAX_SZ 0x1F8 -#define NFP_NET_CFG_MBOX_VAL 0x1808 #define NFP_NET_CFG_MBOX_SIMPLE_CMD 0x0 #define NFP_NET_CFG_MBOX_SIMPLE_RET 0x4 #define NFP_NET_CFG_MBOX_SIMPLE_VAL 0x8 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c index 413f66017219..e95d35f1e5a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c @@ -541,9 +541,9 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, return 0; } - val |= PPSCMDx(index, 0x2); val |= TRGTMODSELx(index, 0x2); val |= PPSEN0; + writel(val, ioaddr + MAC_PPS_CONTROL); writel(cfg->start.tv_sec, ioaddr + MAC_PPSx_TARGET_TIME_SEC(index)); @@ -568,6 +568,7 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index, writel(period - 1, ioaddr + MAC_PPSx_WIDTH(index)); /* Finally, activate it */ + val |= PPSCMDx(index, 0x2); writel(val, ioaddr + MAC_PPS_CONTROL); return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index eb6d9cd8e93f..0046a4ee6e64 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -559,7 +559,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst"); plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode"); - if (plat->force_thresh_dma_mode) { + if (plat->force_thresh_dma_mode && plat->force_sf_dma_mode) { plat->force_sf_dma_mode = 0; dev_warn(&pdev->dev, "force_sf_dma_mode is ignored if force_thresh_dma_mode is set.\n"); diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ecbde83b5243..6cda4b7c10cb 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -501,7 +501,15 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) k3_udma_glue_disable_tx_chn(common->tx_chns[i].tx_chn); } + reinit_completion(&common->tdown_complete); k3_udma_glue_tdown_rx_chn(common->rx_chns.rx_chn, true); + + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) { + i = wait_for_completion_timeout(&common->tdown_complete, msecs_to_jiffies(1000)); + if (!i) + dev_err(common->dev, "rx teardown timeout\n"); + } + napi_disable(&common->napi_rx); for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++) @@ -721,6 +729,8 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common, if (cppi5_desc_is_tdcm(desc_dma)) { dev_dbg(dev, "%s RX tdown flow: %u\n", __func__, flow_idx); + if (common->pdata.quirks & AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ) + complete(&common->tdown_complete); return 0; } @@ -2672,7 +2682,7 @@ static const struct am65_cpsw_pdata j721e_pdata = { }; static const struct am65_cpsw_pdata am64x_cpswxg_pdata = { - .quirks = 0, + .quirks = AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ, .ale_dev_id = "am64-cpswxg", .fdqring_mode = K3_RINGACC_RING_MODE_RING, }; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h index 4b75620f8d28..e5f1c44788c1 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h @@ -90,6 +90,7 @@ struct am65_cpsw_rx_chn { }; #define AM65_CPSW_QUIRK_I2027_NO_TX_CSUM BIT(0) +#define AM64_CPSW_QUIRK_DMA_RX_TDOWN_IRQ BIT(1) struct am65_cpsw_pdata { u32 quirks; diff --git a/drivers/net/usb/kalmia.c b/drivers/net/usb/kalmia.c index 9f2b70ef39aa..613fc6910f14 100644 --- a/drivers/net/usb/kalmia.c +++ b/drivers/net/usb/kalmia.c @@ -65,8 +65,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, init_msg, init_msg_len, &act_len, KALMIA_USB_TIMEOUT); if (status != 0) { netdev_err(dev->net, - "Error sending init packet. Status %i, length %i\n", - status, act_len); + "Error sending init packet. Status %i\n", + status); return status; } else if (act_len != init_msg_len) { @@ -83,8 +83,8 @@ kalmia_send_init_packet(struct usbnet *dev, u8 *init_msg, u8 init_msg_len, if (status != 0) netdev_err(dev->net, - "Error receiving init result. Status %i, length %i\n", - status, act_len); + "Error receiving init result. Status %i\n", + status); else if (act_len != expected_len) netdev_err(dev->net, "Unexpected init result length: %i\n", act_len); diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 56267c327f0b..682987040ea8 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1546,31 +1546,6 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, rxd->len = rbi->len; } -#ifdef VMXNET3_RSS - if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && - (adapter->netdev->features & NETIF_F_RXHASH)) { - enum pkt_hash_types hash_type; - - switch (rcd->rssType) { - case VMXNET3_RCD_RSS_TYPE_IPV4: - case VMXNET3_RCD_RSS_TYPE_IPV6: - hash_type = PKT_HASH_TYPE_L3; - break; - case VMXNET3_RCD_RSS_TYPE_TCPIPV4: - case VMXNET3_RCD_RSS_TYPE_TCPIPV6: - case VMXNET3_RCD_RSS_TYPE_UDPIPV4: - case VMXNET3_RCD_RSS_TYPE_UDPIPV6: - hash_type = PKT_HASH_TYPE_L4; - break; - default: - hash_type = PKT_HASH_TYPE_L3; - break; - } - skb_set_hash(ctx->skb, - le32_to_cpu(rcd->rssHash), - hash_type); - } -#endif skb_record_rx_queue(ctx->skb, rq->qid); skb_put(ctx->skb, rcd->len); @@ -1653,6 +1628,31 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, u32 mtu = adapter->netdev->mtu; skb->len += skb->data_len; +#ifdef VMXNET3_RSS + if (rcd->rssType != VMXNET3_RCD_RSS_TYPE_NONE && + (adapter->netdev->features & NETIF_F_RXHASH)) { + enum pkt_hash_types hash_type; + + switch (rcd->rssType) { + case VMXNET3_RCD_RSS_TYPE_IPV4: + case VMXNET3_RCD_RSS_TYPE_IPV6: + hash_type = PKT_HASH_TYPE_L3; + break; + case VMXNET3_RCD_RSS_TYPE_TCPIPV4: + case VMXNET3_RCD_RSS_TYPE_TCPIPV6: + case VMXNET3_RCD_RSS_TYPE_UDPIPV4: + case VMXNET3_RCD_RSS_TYPE_UDPIPV6: + hash_type = PKT_HASH_TYPE_L4; + break; + default: + hash_type = PKT_HASH_TYPE_L3; + break; + } + skb_set_hash(skb, + le32_to_cpu(rcd->rssHash), + hash_type); + } +#endif vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c734934c407c..c11e0cfeef0f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -110,6 +110,7 @@ struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); static void nvme_delete_io_queues(struct nvme_dev *dev); +static void nvme_update_attrs(struct nvme_dev *dev); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -1923,6 +1924,8 @@ static void nvme_map_cmb(struct nvme_dev *dev) if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) pci_p2pmem_publish(pdev, true); + + nvme_update_attrs(dev); } static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) @@ -2209,6 +2212,11 @@ static const struct attribute_group *nvme_pci_dev_attr_groups[] = { NULL, }; +static void nvme_update_attrs(struct nvme_dev *dev) +{ + sysfs_update_group(&dev->ctrl.device->kobj, &nvme_pci_dev_attrs_group); +} + /* * nirqs is the number of interrupts available for write and read * queues. The core already reserved an interrupt for the admin queue. @@ -2509,18 +2517,12 @@ static int nvme_pci_enable(struct nvme_dev *dev) { int result = -ENOMEM; struct pci_dev *pdev = to_pci_dev(dev->dev); - int dma_address_bits = 64; if (pci_enable_device_mem(pdev)) return result; pci_set_master(pdev); - if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48) - dma_address_bits = 48; - if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(dma_address_bits))) - goto disable; - if (readl(dev->bar + NVME_REG_CSTS) == -1) { result = -ENODEV; goto disable; @@ -2970,7 +2972,7 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) - return NULL; + return ERR_PTR(-ENOMEM); INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); mutex_init(&dev->shutdown_lock); @@ -2998,7 +3000,11 @@ static struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, quirks); if (ret) goto out_put_device; - + + if (dev->ctrl.quirks & NVME_QUIRK_DMA_ADDRESS_BITS_48) + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + else + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); dma_set_min_align_mask(&pdev->dev, NVME_CTRL_PAGE_SIZE - 1); dma_set_max_seg_size(&pdev->dev, 0xffffffff); @@ -3031,8 +3037,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int result = -ENOMEM; dev = nvme_pci_alloc_dev(pdev, id); - if (!dev) - return -ENOMEM; + if (IS_ERR(dev)) + return PTR_ERR(dev); result = nvme_dev_map(dev); if (result) @@ -3423,6 +3429,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x10ec, 0x5763), /* ADATA SX6000PNP */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 65f3b02a0e4e..f90975e00446 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -48,9 +48,10 @@ static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, err = memblock_mark_nomap(base, size); if (err) memblock_phys_free(base, size); - kmemleak_ignore_phys(base); } + kmemleak_ignore_phys(base); + return err; } diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 9b593f985805..40f70f83daba 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -550,13 +550,7 @@ static void armpmu_disable(struct pmu *pmu) static bool armpmu_filter(struct pmu *pmu, int cpu) { struct arm_pmu *armpmu = to_arm_pmu(pmu); - bool ret; - - ret = cpumask_test_cpu(cpu, &armpmu->supported_cpus); - if (ret && armpmu->filter) - return armpmu->filter(pmu, cpu); - - return ret; + return !cpumask_test_cpu(cpu, &armpmu->supported_cpus); } static ssize_t cpus_show(struct device *dev, diff --git a/drivers/platform/x86/intel/vsec.c b/drivers/platform/x86/intel/vsec.c index bb81b8b1f7e9..89c5374e33b3 100644 --- a/drivers/platform/x86/intel/vsec.c +++ b/drivers/platform/x86/intel/vsec.c @@ -408,14 +408,23 @@ static const struct intel_vsec_platform_info dg1_info = { .quirks = VSEC_QUIRK_NO_DVSEC | VSEC_QUIRK_EARLY_HW, }; +/* MTL info */ +static const struct intel_vsec_platform_info mtl_info = { + .quirks = VSEC_QUIRK_NO_WATCHER | VSEC_QUIRK_NO_CRASHLOG, +}; + #define PCI_DEVICE_ID_INTEL_VSEC_ADL 0x467d #define PCI_DEVICE_ID_INTEL_VSEC_DG1 0x490e +#define PCI_DEVICE_ID_INTEL_VSEC_MTL_M 0x7d0d +#define PCI_DEVICE_ID_INTEL_VSEC_MTL_S 0xad0d #define PCI_DEVICE_ID_INTEL_VSEC_OOBMSM 0x09a7 #define PCI_DEVICE_ID_INTEL_VSEC_RPL 0xa77d #define PCI_DEVICE_ID_INTEL_VSEC_TGL 0x9a0d static const struct pci_device_id intel_vsec_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, VSEC_ADL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_DG1, &dg1_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_MTL_M, &mtl_info) }, + { PCI_DEVICE_DATA(INTEL, VSEC_MTL_S, &mtl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_OOBMSM, &(struct intel_vsec_platform_info) {}) }, { PCI_DEVICE_DATA(INTEL, VSEC_RPL, &tgl_info) }, { PCI_DEVICE_DATA(INTEL, VSEC_TGL, &tgl_info) }, diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 15f174f4e056..3f33934f5429 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2220,11 +2220,26 @@ void spi_flush_queue(struct spi_controller *ctlr) /*-------------------------------------------------------------------------*/ #if defined(CONFIG_OF) +static void of_spi_parse_dt_cs_delay(struct device_node *nc, + struct spi_delay *delay, const char *prop) +{ + u32 value; + + if (!of_property_read_u32(nc, prop, &value)) { + if (value > U16_MAX) { + delay->value = DIV_ROUND_UP(value, 1000); + delay->unit = SPI_DELAY_UNIT_USECS; + } else { + delay->value = value; + delay->unit = SPI_DELAY_UNIT_NSECS; + } + } +} + static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, struct device_node *nc) { u32 value; - u16 cs_setup; int rc; /* Mode (clock phase/polarity/etc.) */ @@ -2310,10 +2325,8 @@ static int of_spi_parse_dt(struct spi_controller *ctlr, struct spi_device *spi, if (!of_property_read_u32(nc, "spi-max-frequency", &value)) spi->max_speed_hz = value; - if (!of_property_read_u16(nc, "spi-cs-setup-delay-ns", &cs_setup)) { - spi->cs_setup.value = cs_setup; - spi->cs_setup.unit = SPI_DELAY_UNIT_NSECS; - } + /* Device CS delays */ + of_spi_parse_dt_cs_delay(nc, &spi->cs_setup, "spi-cs-setup-delay-ns"); return 0; } diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c index c730253ab85c..583cbcf09446 100644 --- a/drivers/video/fbdev/core/fb_defio.c +++ b/drivers/video/fbdev/core/fb_defio.c @@ -313,7 +313,7 @@ void fb_deferred_io_open(struct fb_info *info, } EXPORT_SYMBOL_GPL(fb_deferred_io_open); -void fb_deferred_io_cleanup(struct fb_info *info) +void fb_deferred_io_release(struct fb_info *info) { struct fb_deferred_io *fbdefio = info->fbdefio; struct page *page; @@ -327,6 +327,14 @@ void fb_deferred_io_cleanup(struct fb_info *info) page = fb_deferred_io_page(info, i); page->mapping = NULL; } +} +EXPORT_SYMBOL_GPL(fb_deferred_io_release); + +void fb_deferred_io_cleanup(struct fb_info *info) +{ + struct fb_deferred_io *fbdefio = info->fbdefio; + + fb_deferred_io_release(info); kvfree(info->pagerefs); mutex_destroy(&fbdefio->lock); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 3a6c8458eb8d..ab3545a00abc 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1454,6 +1454,10 @@ __releases(&info->lock) struct fb_info * const info = file->private_data; lock_fb_info(info); +#if IS_ENABLED(CONFIG_FB_DEFERRED_IO) + if (info->fbdefio) + fb_deferred_io_release(info); +#endif if (info->fbops->fb_release) info->fbops->fb_release(info,1); module_put(info->fbops->owner); @@ -361,6 +361,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) spin_lock(&mm->ioctx_lock); rcu_read_lock(); table = rcu_dereference(mm->ioctx_table); + if (!table) + goto out_unlock; + for (i = 0; i < table->nr; i++) { struct kioctx *ctx; @@ -374,6 +377,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) } } +out_unlock: rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); return res; @@ -1271,8 +1271,9 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) if (ret < 0) goto out_unlock; - ret = copy_mc_to_kernel(daddr, saddr, length); - if (ret) + if (copy_mc_to_kernel(daddr, saddr, length) == 0) + ret = length; + else ret = -EIO; out_unlock: diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 1ead5bd740c2..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -209,8 +209,8 @@ config NFS_DISABLE_UDP_SUPPORT config NFS_V4_2_READ_PLUS bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" depends on NFS_V4_2 - default y + default n help - Choose Y here to enable the use of READ_PLUS over NFS v4.2. READ_PLUS - attempts to improve read performance by compressing out sparse holes - in the file contents. + This is intended for developers only. The READ_PLUS operation has + been shown to have issues under specific conditions and should not + be used in production. diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4ef529379065..c69f27d3adb7 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8182,7 +8182,6 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); - rhltable_destroy(&nfs4_file_rhltable); #ifdef CONFIG_NFSD_V4_2_INTER_SSC nfsd4_ssc_shutdown_umount(nn); #endif @@ -8192,6 +8191,7 @@ void nfs4_state_shutdown(void) { nfsd4_destroy_callback_queue(); + rhltable_destroy(&nfs4_file_rhltable); } static void diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 87e1004b606d..b4041d0566a9 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1114,7 +1114,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) minseg = range[0] + segbytes - 1; do_div(minseg, segbytes); + + if (range[1] < 4096) + goto out; + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); + if (maxseg < segbytes) + goto out; + do_div(maxseg, segbytes); maxseg--; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 6edb6e0dd61f..1422b8ba24ed 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -409,6 +409,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) goto out; /* + * Prevent underflow in second superblock position calculation. + * The exact minimum size check is done in nilfs_sufile_resize(). + */ + if (newsize < 4096) { + ret = -ENOSPC; + goto out; + } + + /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 2064e6473d30..3a4c9c150cbf 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -544,9 +544,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, { struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; - u64 sb2off = NILFS_SB2_OFFSET_BYTES(bdev_nr_bytes(nilfs->ns_bdev)); + u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev); int valid[2], swp = 0; + if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { + nilfs_err(sb, "device size too small"); + return -EINVAL; + } + sb2off = NILFS_SB2_OFFSET_BYTES(devsize); + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, &sbh[0]); sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index b88d19e9581e..c8469c656e0d 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -76,7 +76,7 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, /* Sanity check values */ /* there is always at least one xattr id */ - if (*xattr_ids <= 0) + if (*xattr_ids == 0) return ERR_PTR(-EINVAL); len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); diff --git a/include/linux/fb.h b/include/linux/fb.h index 96b96323e9cb..73eb1f85ea8e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -662,6 +662,7 @@ extern int fb_deferred_io_init(struct fb_info *info); extern void fb_deferred_io_open(struct fb_info *info, struct inode *inode, struct file *file); +extern void fb_deferred_io_release(struct fb_info *info); extern void fb_deferred_io_cleanup(struct fb_info *info); extern int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index db194e2ba69f..9ab9d3105d5c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -743,7 +743,10 @@ static inline struct hstate *hstate_sizelog(int page_size_log) if (!page_size_log) return &default_hstate; - return size_to_hstate(1UL << page_size_log); + if (page_size_log < BITS_PER_LONG) + return size_to_hstate(1UL << page_size_log); + + return NULL; } static inline struct hstate *hstate_vma(struct vm_area_struct *vma) diff --git a/include/linux/mm.h b/include/linux/mm.h index 2041e6d4fa27..bd3197748562 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -137,7 +137,7 @@ extern int mmap_rnd_compat_bits __read_mostly; * define their own version of this macro in <asm/pgtable.h> */ #if BITS_PER_LONG == 64 -/* This function must be updated when the size of struct page grows above 80 +/* This function must be updated when the size of struct page grows above 96 * or reduces below 56. The idea that compiler optimizes out switch() * statement, and only leaves move/store instructions. Also the compiler can * combine write statements if they are both assignments and can be reordered, @@ -148,12 +148,18 @@ static inline void __mm_zero_struct_page(struct page *page) { unsigned long *_pp = (void *)page; - /* Check that struct page is either 56, 64, 72, or 80 bytes */ + /* Check that struct page is either 56, 64, 72, 80, 88 or 96 bytes */ BUILD_BUG_ON(sizeof(struct page) & 7); BUILD_BUG_ON(sizeof(struct page) < 56); - BUILD_BUG_ON(sizeof(struct page) > 80); + BUILD_BUG_ON(sizeof(struct page) > 96); switch (sizeof(struct page)) { + case 96: + _pp[11] = 0; + fallthrough; + case 88: + _pp[10] = 0; + fallthrough; case 80: _pp[9] = 0; fallthrough; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index aad12a179e54..e6e02184c25a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2839,8 +2839,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb); int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb); int unregister_netdevice_notifier_net(struct net *net, struct notifier_block *nb); -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb); int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn); diff --git a/include/linux/pci.h b/include/linux/pci.h index adffd65e84b4..254c8a4126a8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1621,6 +1621,18 @@ pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, flags, NULL); } +static inline struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, + const struct irq_affinity_desc *affdesc) +{ + struct msi_map map = { .index = -ENOSYS, }; + + return map; +} + +static inline void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map) +{ +} + static inline void pci_free_irq_vectors(struct pci_dev *dev) { } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index ef914a600087..525b5d64e394 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -100,7 +100,6 @@ struct arm_pmu { void (*stop)(struct arm_pmu *); void (*reset)(void *); int (*map_event)(struct perf_event *event); - bool (*filter)(struct pmu *pmu, int cpu); int num_events; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 71310efe2fab..7bde8e1c228a 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -107,7 +107,7 @@ extern void synchronize_shrinkers(void); #ifdef CONFIG_SHRINKER_DEBUG extern int shrinker_debugfs_add(struct shrinker *shrinker); -extern void shrinker_debugfs_remove(struct shrinker *shrinker); +extern struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker); extern int __printf(2, 3) shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...); #else /* CONFIG_SHRINKER_DEBUG */ @@ -115,8 +115,9 @@ static inline int shrinker_debugfs_add(struct shrinker *shrinker) { return 0; } -static inline void shrinker_debugfs_remove(struct shrinker *shrinker) +static inline struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker) { + return NULL; } static inline __printf(2, 3) int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) diff --git a/include/net/sock.h b/include/net/sock.h index dcd72e6285b2..556209727633 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2434,6 +2434,19 @@ static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struc return false; } +static inline struct sk_buff *skb_clone_and_charge_r(struct sk_buff *skb, struct sock *sk) +{ + skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + if (skb) { + if (sk_rmem_schedule(sk, skb, skb->truesize)) { + skb_set_owner_r(skb, sk); + return skb; + } + __kfree_skb(skb); + } + return NULL; +} + static inline void skb_prepare_for_gro(struct sk_buff *skb) { if (skb->destructor != sock_wfree) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e838feb6adc5..2a4918a1faa9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2951,8 +2951,11 @@ static int __set_cpus_allowed_ptr_locked(struct task_struct *p, } if (!(ctx->flags & SCA_MIGRATE_ENABLE)) { - if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) + if (cpumask_equal(&p->cpus_mask, ctx->new_mask)) { + if (ctx->flags & SCA_USER) + swap(p->user_cpus_ptr, ctx->user_mask); goto out; + } if (WARN_ON_ONCE(p == current && is_migration_disabled(p) && diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 8ac8b81bfee6..02e011cabe91 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1343,10 +1343,11 @@ void psi_trigger_destroy(struct psi_trigger *t) group = t->group; /* - * Wakeup waiters to stop polling. Can happen if cgroup is deleted - * from under a polling process. + * Wakeup waiters to stop polling and clear the queue to prevent it from + * being accessed later. Can happen if cgroup is deleted from under a + * polling process. */ - wake_up_interruptible(&t->event_wait); + wake_up_pollfree(&t->event_wait); mutex_lock(&group->trigger_lock); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 5897828b9d7e..7e5dff602585 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -470,11 +470,35 @@ u64 alarm_forward(struct alarm *alarm, ktime_t now, ktime_t interval) } EXPORT_SYMBOL_GPL(alarm_forward); -u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) +static u64 __alarm_forward_now(struct alarm *alarm, ktime_t interval, bool throttle) { struct alarm_base *base = &alarm_bases[alarm->type]; + ktime_t now = base->get_ktime(); + + if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS) && throttle) { + /* + * Same issue as with posix_timer_fn(). Timers which are + * periodic but the signal is ignored can starve the system + * with a very small interval. The real fix which was + * promised in the context of posix_timer_fn() never + * materialized, but someone should really work on it. + * + * To prevent DOS fake @now to be 1 jiffie out which keeps + * the overrun accounting correct but creates an + * inconsistency vs. timer_gettime(2). + */ + ktime_t kj = NSEC_PER_SEC / HZ; + + if (interval < kj) + now = ktime_add(now, kj); + } + + return alarm_forward(alarm, now, interval); +} - return alarm_forward(alarm, base->get_ktime(), interval); +u64 alarm_forward_now(struct alarm *alarm, ktime_t interval) +{ + return __alarm_forward_now(alarm, interval, false); } EXPORT_SYMBOL_GPL(alarm_forward_now); @@ -551,9 +575,10 @@ static enum alarmtimer_restart alarm_handle_timer(struct alarm *alarm, if (posix_timer_event(ptr, si_private) && ptr->it_interval) { /* * Handle ignored signals and rearm the timer. This will go - * away once we handle ignored signals proper. + * away once we handle ignored signals proper. Ensure that + * small intervals cannot starve the system. */ - ptr->it_overrun += alarm_forward_now(alarm, ptr->it_interval); + ptr->it_overrun += __alarm_forward_now(alarm, ptr->it_interval, true); ++ptr->it_requeue_pending; ptr->it_active = 1; result = ALARMTIMER_RESTART; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6a4696719297..6a942fa275c7 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -155,7 +155,7 @@ int trace_define_field(struct trace_event_call *call, const char *type, } EXPORT_SYMBOL_GPL(trace_define_field); -int trace_define_field_ext(struct trace_event_call *call, const char *type, +static int trace_define_field_ext(struct trace_event_call *call, const char *type, const char *name, int offset, int size, int is_signed, int filter_type, int len) { diff --git a/kernel/umh.c b/kernel/umh.c index 850631518665..fbf872c624cb 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -438,21 +438,27 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) if (wait == UMH_NO_WAIT) /* task has freed sub_info */ goto unlock; - if (wait & UMH_KILLABLE) - state |= TASK_KILLABLE; - if (wait & UMH_FREEZABLE) state |= TASK_FREEZABLE; - retval = wait_for_completion_state(&done, state); - if (!retval) - goto wait_done; - if (wait & UMH_KILLABLE) { + retval = wait_for_completion_state(&done, state | TASK_KILLABLE); + if (!retval) + goto wait_done; + /* umh_complete() will see NULL and free sub_info */ if (xchg(&sub_info->complete, NULL)) goto unlock; + + /* + * fallthrough; in case of -ERESTARTSYS now do uninterruptible + * wait_for_completion_state(). Since umh_complete() shall call + * complete() in a moment if xchg() above returned NULL, this + * uninterruptible wait_for_completion_state() will not block + * SIGKILL'ed processes for long. + */ } + wait_for_completion_state(&done, state); wait_done: retval = sub_info->retval; diff --git a/lib/parser.c b/lib/parser.c index bcb23484100e..2b5e2b480253 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -11,6 +11,15 @@ #include <linux/slab.h> #include <linux/string.h> +/* + * max size needed by different bases to express U64 + * HEX: "0xFFFFFFFFFFFFFFFF" --> 18 + * DEC: "18446744073709551615" --> 20 + * OCT: "01777777777777777777777" --> 23 + * pick the max one to define NUMBER_BUF_LEN + */ +#define NUMBER_BUF_LEN 24 + /** * match_one - Determines if a string matches a simple pattern * @s: the string to examine for presence of the pattern @@ -129,14 +138,12 @@ EXPORT_SYMBOL(match_token); static int match_number(substring_t *s, int *result, int base) { char *endp; - char *buf; + char buf[NUMBER_BUF_LEN]; int ret; long val; - buf = match_strdup(s); - if (!buf) - return -ENOMEM; - + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; ret = 0; val = simple_strtol(buf, &endp, base); if (endp == buf) @@ -145,7 +152,6 @@ static int match_number(substring_t *s, int *result, int base) ret = -ERANGE; else *result = (int) val; - kfree(buf); return ret; } @@ -163,18 +169,15 @@ static int match_number(substring_t *s, int *result, int base) */ static int match_u64int(substring_t *s, u64 *result, int base) { - char *buf; + char buf[NUMBER_BUF_LEN]; int ret; u64 val; - buf = match_strdup(s); - if (!buf) - return -ENOMEM; - + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; ret = kstrtoull(buf, base, &val); if (!ret) *result = val; - kfree(buf); return ret; } @@ -206,14 +209,12 @@ EXPORT_SYMBOL(match_int); */ int match_uint(substring_t *s, unsigned int *result) { - int err = -ENOMEM; - char *buf = match_strdup(s); + char buf[NUMBER_BUF_LEN]; - if (buf) { - err = kstrtouint(buf, 10, result); - kfree(buf); - } - return err; + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; + + return kstrtouint(buf, 10, result); } EXPORT_SYMBOL(match_uint); diff --git a/mm/filemap.c b/mm/filemap.c index c4d4ace9cc70..0e20a8d6dd93 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2588,18 +2588,19 @@ static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio; int err = 0; + /* "last_index" is the index of the page beyond the end of the read */ last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE); retry: if (fatal_signal_pending(current)) return -EINTR; - filemap_get_read_batch(mapping, index, last_index, fbatch); + filemap_get_read_batch(mapping, index, last_index - 1, fbatch); if (!folio_batch_count(fbatch)) { if (iocb->ki_flags & IOCB_NOIO) return -EAGAIN; page_cache_sync_readahead(mapping, ra, filp, index, last_index - index); - filemap_get_read_batch(mapping, index, last_index, fbatch); + filemap_get_read_batch(mapping, index, last_index - 1, fbatch); } if (!folio_batch_count(fbatch)) { if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ)) @@ -1914,7 +1914,7 @@ static unsigned long collect_longterm_unpinnable_pages( drain_allow = false; } - if (!folio_isolate_lru(folio)) + if (folio_isolate_lru(folio)) continue; list_add_tail(&folio->lru, movable_page_list); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index abe6cfd92ffa..1b791b26d72d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3272,8 +3272,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) pmde = mk_huge_pmd(new, READ_ONCE(vma->vm_page_prot)); if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); - if (is_writable_migration_entry(entry)) - pmde = maybe_pmd_mkwrite(pmde, vma); if (pmd_swp_uffd_wp(*pvmw->pmd)) pmde = pmd_wrprotect(pmd_mkuffd_wp(pmde)); if (!is_migration_entry_young(entry)) @@ -3281,6 +3279,10 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) /* NOTE: this may contain setting soft-dirty on some archs */ if (PageDirty(new) && is_migration_entry_dirty(entry)) pmde = pmd_mkdirty(pmde); + if (is_writable_migration_entry(entry)) + pmde = maybe_pmd_mkwrite(pmde, vma); + else + pmde = pmd_wrprotect(pmde); if (PageAnon(new)) { rmap_t rmap_flags = RMAP_COMPOUND; diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 833bf2cfd2a3..21e66d7f261d 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -246,6 +246,9 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip) { + if (!kasan_arch_is_ready()) + return false; + if (ptr != page_address(virt_to_head_page(ptr))) { kasan_report_invalid_free(ptr, ip, KASAN_REPORT_INVALID_FREE); return true; diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index b076f597a378..cb762982c8ba 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -191,7 +191,12 @@ bool kasan_check_range(unsigned long addr, size_t size, bool write, bool kasan_byte_accessible(const void *addr) { - s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr)); + s8 shadow_byte; + + if (!kasan_arch_is_ready()) + return true; + + shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr)); return shadow_byte >= 0 && shadow_byte < KASAN_GRANULE_SIZE; } diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 2fba1f51f042..15cfb34d16a1 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -291,6 +291,9 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) unsigned long shadow_start, shadow_end; int ret; + if (!kasan_arch_is_ready()) + return 0; + if (!is_vmalloc_or_module_addr((void *)addr)) return 0; @@ -459,6 +462,9 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long region_start, region_end; unsigned long size; + if (!kasan_arch_is_ready()) + return; + region_start = ALIGN(start, KASAN_MEMORY_PER_SHADOW_PAGE); region_end = ALIGN_DOWN(end, KASAN_MEMORY_PER_SHADOW_PAGE); @@ -502,6 +508,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, * with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored. */ + if (!kasan_arch_is_ready()) + return (void *)start; + if (!is_vmalloc_or_module_addr(start)) return (void *)start; @@ -524,6 +533,9 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, */ void __kasan_poison_vmalloc(const void *start, unsigned long size) { + if (!kasan_arch_is_ready()) + return; + if (!is_vmalloc_or_module_addr(start)) return; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 90acfea40c13..a26a28e3738c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2611,6 +2611,7 @@ static int madvise_collapse_errno(enum scan_result r) case SCAN_CGROUP_CHARGE_FAIL: return -EBUSY; /* Resource temporary unavailable - trying again might succeed */ + case SCAN_PAGE_COUNT: case SCAN_PAGE_LOCK: case SCAN_PAGE_LRU: case SCAN_DEL_PAGE_LRU: @@ -2629,8 +2629,11 @@ struct page *ksm_might_need_to_copy(struct page *page, new_page = NULL; } if (new_page) { - copy_user_highpage(new_page, page, address, vma); - + if (copy_mc_user_highpage(new_page, page, address, vma)) { + put_page(new_page); + memory_failure_queue(page_to_pfn(page), 0); + return ERR_PTR(-EHWPOISON); + } SetPageDirty(new_page); __SetPageUptodate(new_page); __SetPageLocked(new_page); diff --git a/mm/memory.c b/mm/memory.c index 3e836fecd035..f526b9152bef 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3840,6 +3840,9 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (unlikely(!page)) { ret = VM_FAULT_OOM; goto out_page; + } else if (unlikely(PTR_ERR(page) == -EHWPOISON)) { + ret = VM_FAULT_HWPOISON; + goto out_page; } folio = page_folio(page); diff --git a/mm/migrate.c b/mm/migrate.c index a4d3fc65085f..cc5455614e01 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -224,6 +224,8 @@ static bool remove_migration_pte(struct folio *folio, pte = maybe_mkwrite(pte, vma); else if (pte_swp_uffd_wp(*pvmw.pte)) pte = pte_mkuffd_wp(pte); + else + pte = pte_wrprotect(pte); if (folio_test_anon(folio) && !is_readable_migration_entry(entry)) rmap_flags |= RMAP_EXCLUSIVE; diff --git a/mm/shrinker_debug.c b/mm/shrinker_debug.c index b05295bab322..39c3491e28a3 100644 --- a/mm/shrinker_debug.c +++ b/mm/shrinker_debug.c @@ -246,18 +246,21 @@ int shrinker_debugfs_rename(struct shrinker *shrinker, const char *fmt, ...) } EXPORT_SYMBOL(shrinker_debugfs_rename); -void shrinker_debugfs_remove(struct shrinker *shrinker) +struct dentry *shrinker_debugfs_remove(struct shrinker *shrinker) { + struct dentry *entry = shrinker->debugfs_entry; + lockdep_assert_held(&shrinker_rwsem); kfree_const(shrinker->name); shrinker->name = NULL; - if (!shrinker->debugfs_entry) - return; + if (entry) { + ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id); + shrinker->debugfs_entry = NULL; + } - debugfs_remove_recursive(shrinker->debugfs_entry); - ida_free(&shrinker_debugfs_ida, shrinker->debugfs_id); + return entry; } static int __init shrinker_debugfs_init(void) diff --git a/mm/swapfile.c b/mm/swapfile.c index 4fa440e87cd6..eb9b0bf1fcdd 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1764,12 +1764,15 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, struct page *swapcache; spinlock_t *ptl; pte_t *pte, new_pte; + bool hwposioned = false; int ret = 1; swapcache = page; page = ksm_might_need_to_copy(page, vma, addr); if (unlikely(!page)) return -ENOMEM; + else if (unlikely(PTR_ERR(page) == -EHWPOISON)) + hwposioned = true; pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); if (unlikely(!pte_same_as_swp(*pte, swp_entry_to_pte(entry)))) { @@ -1777,15 +1780,19 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, goto out; } - if (unlikely(!PageUptodate(page))) { - pte_t pteval; + if (unlikely(hwposioned || !PageUptodate(page))) { + swp_entry_t swp_entry; dec_mm_counter(vma->vm_mm, MM_SWAPENTS); - pteval = swp_entry_to_pte(make_swapin_error_entry()); - set_pte_at(vma->vm_mm, addr, pte, pteval); - swap_free(entry); + if (hwposioned) { + swp_entry = make_hwpoison_entry(swapcache); + page = swapcache; + } else { + swp_entry = make_swapin_error_entry(); + } + new_pte = swp_entry_to_pte(swp_entry); ret = 0; - goto out; + goto setpte; } /* See do_swap_page() */ @@ -1817,6 +1824,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd, new_pte = pte_mksoft_dirty(new_pte); if (pte_swp_uffd_wp(*pte)) new_pte = pte_mkuffd_wp(new_pte); +setpte: set_pte_at(vma->vm_mm, addr, pte, new_pte); swap_free(entry); out: diff --git a/mm/vmscan.c b/mm/vmscan.c index bf3eedf0209c..5b7b8d4f5297 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -741,6 +741,8 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + struct dentry *debugfs_entry; + if (!(shrinker->flags & SHRINKER_REGISTERED)) return; @@ -749,9 +751,11 @@ void unregister_shrinker(struct shrinker *shrinker) shrinker->flags &= ~SHRINKER_REGISTERED; if (shrinker->flags & SHRINKER_MEMCG_AWARE) unregister_memcg_shrinker(shrinker); - shrinker_debugfs_remove(shrinker); + debugfs_entry = shrinker_debugfs_remove(shrinker); up_write(&shrinker_rwsem); + debugfs_remove_recursive(debugfs_entry); + kfree(shrinker->nr_deferred); shrinker->nr_deferred = NULL; } diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 748be7253248..78c9729a6057 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct sock *sk) return; } sk_stream_kill_queues(&cf_sk->sk); + WARN_ON_ONCE(sk->sk_forward_alloc); caif_free_client(&cf_sk->layer); } diff --git a/net/core/dev.c b/net/core/dev.c index b76fb37b381e..f23e287602b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1869,14 +1869,6 @@ static void __move_netdevice_notifier_net(struct net *src_net, __register_netdevice_notifier_net(dst_net, nb, true); } -void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net, - struct notifier_block *nb) -{ - rtnl_lock(); - __move_netdevice_notifier_net(src_net, dst_net, nb); - rtnl_unlock(); -} - int register_netdevice_notifier_dev_net(struct net_device *dev, struct notifier_block *nb, struct netdev_net_notifier *nn) @@ -10375,7 +10367,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64, BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64)); for (i = 0; i < n; i++) - dst[i] = atomic_long_read(&src[i]); + dst[i] = (unsigned long)atomic_long_read(&src[i]); /* zero out counters that only exist in rtnl_link_stats64 */ memset((char *)stats64 + n * sizeof(u64), 0, sizeof(*stats64) - n * sizeof(u64)); diff --git a/net/core/devlink.c b/net/core/devlink.c index 909a10e4b0dd..0bfc144df8b9 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4742,11 +4742,8 @@ static int devlink_reload(struct devlink *devlink, struct net *dest_net, if (err) return err; - if (dest_net && !net_eq(dest_net, curr_net)) { - move_netdevice_notifier_net(curr_net, dest_net, - &devlink->netdevice_nb); + if (dest_net && !net_eq(dest_net, curr_net)) write_pnet(&devlink->_net, dest_net); - } err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack); devlink_reload_failed_set(devlink, !!err); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 078a0a420c8a..7b69cf882b8e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -304,6 +304,12 @@ struct net *get_net_ns_by_id(const struct net *net, int id) } EXPORT_SYMBOL_GPL(get_net_ns_by_id); +/* init code that must occur even if setup_net() is not called. */ +static __net_init void preinit_net(struct net *net) +{ + ref_tracker_dir_init(&net->notrefcnt_tracker, 128); +} + /* * setup_net runs the initializers for the network namespace object. */ @@ -316,7 +322,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128); - ref_tracker_dir_init(&net->notrefcnt_tracker, 128); refcount_set(&net->passive, 1); get_random_bytes(&net->hash_mix, sizeof(u32)); @@ -472,6 +477,8 @@ struct net *copy_net_ns(unsigned long flags, rv = -ENOMEM; goto dec_ucounts; } + + preinit_net(net); refcount_set(&net->passive, 1); net->ucounts = ucounts; get_user_ns(user_ns); @@ -1118,6 +1125,7 @@ void __init net_ns_init(void) init_net.key_domain = &init_net_key_domain; #endif down_write(&pernet_ops_rwsem); + preinit_net(&init_net); if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); diff --git a/net/core/stream.c b/net/core/stream.c index cd06750dd329..434446ab14c5 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk) sk_mem_reclaim_final(sk); WARN_ON_ONCE(sk->sk_wmem_queued); - WARN_ON_ONCE(sk->sk_forward_alloc); /* It is _impossible_ for the backlog to contain anything * when we get here. All user references to this socket diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 4260fe466993..b9d7c3dd1cb3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL); /* Clone pktoptions received with SYN, if we own the req */ if (*own_req && ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) - skb_set_owner_r(newnp->pktoptions, newsk); } return newsk; @@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) @@ -679,7 +677,6 @@ ipv6_pktoptions: np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &DCCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); memmove(IP6CB(opt_skb), &DCCP_SKB_CB(opt_skb)->header.h6, sizeof(struct inet6_skb_parm)); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index e624497fa992..9b6818453afe 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk) fl6->flowi6_mark = sk->sk_mark; fl6->fl6_dport = inet->inet_dport; fl6->fl6_sport = inet->inet_sport; - fl6->flowlabel = np->flow_label; + fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6->flowi6_uid = sk->sk_uid; if (!oif) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 11b736a76bd7..a52a4f12f146 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; + fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; @@ -1387,14 +1388,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { - newnp->pktoptions = skb_clone(ireq->pktopts, - sk_gfp_mask(sk, GFP_ATOMIC)); + newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; - if (newnp->pktoptions) { + if (newnp->pktoptions) tcp_v6_restore_cb(newnp->pktoptions); - skb_set_owner_r(newnp->pktoptions, newsk); - } } } else { if (!req_unhash && found_dup_sk) { @@ -1466,7 +1464,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); + opt_skb = skb_clone_and_charge_r(skb, sk); reason = SKB_DROP_REASON_NOT_SPECIFIED; if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ @@ -1552,7 +1550,6 @@ ipv6_pktoptions: if (np->repflow) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { - skb_set_owner_r(opt_skb, sk); tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { diff --git a/net/key/af_key.c b/net/key/af_key.c index 2bdbcec781cd..a815f5ab4c49 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1261,7 +1261,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; - x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL); + x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 35b5f806fdda..dc5165d3eec4 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev, free: kfree(table); out: + mdev->sysctl = NULL; return -ENOBUFS; } @@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev, struct net *net = dev_net(dev); struct ctl_table *table; + if (!mdev->sysctl) + return; + table = mdev->sysctl->ctl_table_arg; unregister_net_sysctl_table(mdev->sysctl); kfree(table); diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 6e38f68f88c2..f2698d2316df 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -449,7 +449,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) err = attach_meter(meter_tbl, meter); if (err) - goto exit_unlock; + goto exit_free_old_meter; ovs_unlock(); @@ -472,6 +472,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) genlmsg_end(reply, ovs_reply_header); return genlmsg_reply(reply, info); +exit_free_old_meter: + ovs_meter_free(old_meter); exit_unlock: ovs_unlock(); nlmsg_free(reply); diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 4b1b59da5c0b..4d15b6a6169c 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -93,7 +93,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb, cp = rcu_dereference_bh(ca->params); tcf_lastuse_update(&ca->tcf_tm); - bstats_update(&ca->tcf_bstats, skb); + tcf_action_update_bstats(&ca->common, skb); action = READ_ONCE(ca->tcf_action); wlen = skb_network_offset(skb); @@ -212,8 +212,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, index = actparm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (!err) { - ret = tcf_idr_create(tn, index, est, a, - &act_ctinfo_ops, bind, false, flags); + ret = tcf_idr_create_from_flags(tn, index, est, a, + &act_ctinfo_ops, bind, flags); if (ret) { tcf_idr_cleanup(tn, index); return ret; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index ee2a050c887b..6640e75eaa02 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -12,6 +12,7 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/refcount.h> +#include <linux/rcupdate.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> @@ -339,6 +340,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcf_result cr = {}; int err, balloc = 0; struct tcf_exts e; + bool update_h = false; err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) @@ -456,10 +458,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } } - if (cp->perfect) + if (cp->perfect) { r = cp->perfect + handle; - else - r = tcindex_lookup(cp, handle) ? : &new_filter_result; + } else { + /* imperfect area is updated in-place using rcu */ + update_h = !!tcindex_lookup(cp, handle); + r = &new_filter_result; + } if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -485,7 +490,28 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, rcu_assign_pointer(tp->root, cp); - if (r == &new_filter_result) { + if (update_h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *cf; + + f->result.res = r->res; + tcf_exts_change(&f->result.exts, &r->exts); + + /* imperfect area bucket */ + fp = cp->h + (handle % cp->hash); + + /* lookup the filter, guaranteed to exist */ + for (cf = rcu_dereference_bh_rtnl(*fp); cf; + fp = &cf->next, cf = rcu_dereference_bh_rtnl(*fp)) + if (cf->key == (u16)handle) + break; + + f->next = cf->next; + + cf = rcu_replace_pointer(*fp, f, 1); + tcf_exts_get_net(&cf->result.exts); + tcf_queue_work(&cf->rwork, tcindex_destroy_fexts_work); + } else if (r == &new_filter_result) { struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; diff --git a/net/sctp/diag.c b/net/sctp/diag.c index a557009e9832..c3d6b92dd386 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -343,11 +343,9 @@ static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; - struct sctp_association *assoc = - list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ - if (tsp->asoc != assoc) + if (!list_is_first(&tsp->asoc->asocs, &ep->asocs)) return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) diff --git a/net/socket.c b/net/socket.c index 888cd618a968..c12af3c84d3a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -971,9 +971,12 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, static void sock_recv_mark(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - if (sock_flag(sk, SOCK_RCVMARK) && skb) - put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), - &skb->mark); + if (sock_flag(sk, SOCK_RCVMARK) && skb) { + /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */ + __u32 mark = skb->mark; + + put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark); + } } void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b35c8701876a..a38733f2197a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2614,6 +2614,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, /* Send a 'SYN-' to destination */ m.msg_name = dest; m.msg_namelen = destlen; + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); /* If connect is in non-blocking case, set MSG_DONTWAIT to * indicate send_msg() is never blocked. @@ -2776,6 +2777,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, __skb_queue_head(&new_sk->sk_receive_queue, buf); skb_set_owner_r(buf, new_sk); } + iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0); __tipc_sendstream(new_sock, &m, 0); release_sock(new_sk); exit: diff --git a/scripts/gdb/linux/cpus.py b/scripts/gdb/linux/cpus.py index 15fc4626d236..9ee99f9fae8d 100644 --- a/scripts/gdb/linux/cpus.py +++ b/scripts/gdb/linux/cpus.py @@ -163,7 +163,7 @@ def get_current_task(cpu): task_ptr_type = task_type.get_type().pointer() if utils.is_target_arch("x86"): - var_ptr = gdb.parse_and_eval("¤t_task") + var_ptr = gdb.parse_and_eval("&pcpu_hot.current_task") return per_cpu(var_ptr, cpu).dereference() elif utils.is_target_arch("aarch64"): current_task_addr = gdb.parse_and_eval("$SP_EL0") diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c index 9e52e218bf30..cc89d1e88fb7 100644 --- a/security/apparmor/policy_compat.c +++ b/security/apparmor/policy_compat.c @@ -160,8 +160,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa) if (!table) return NULL; - /* zero init so skip the trap state (state == 0) */ - for (state = 1; state < state_count; state++) { + for (state = 0; state < state_count; state++) { table[state * 2] = compute_fperms_user(dfa, state); table[state * 2 + 1] = compute_fperms_other(dfa, state); } diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index ac1cc7c5290e..2e728aad6771 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -927,7 +927,6 @@ snd_hda_codec_device_init(struct hda_bus *bus, unsigned int codec_addr, codec->depop_delay = -1; codec->fixup_id = HDA_FIXUP_ID_NOT_SET; codec->core.dev.release = snd_hda_codec_dev_release; - codec->core.exec_verb = codec_exec_verb; codec->core.type = HDA_DEV_LEGACY; mutex_init(&codec->spdif_mutex); @@ -998,6 +997,7 @@ int snd_hda_codec_device_new(struct hda_bus *bus, struct snd_card *card, if (snd_BUG_ON(codec_addr > HDA_MAX_CODEC_ADDRESS)) return -EINVAL; + codec->core.exec_verb = codec_exec_verb; codec->card = card; codec->addr = codec_addr; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 7b1a30a551f6..75e1d00074b9 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -1125,6 +1125,7 @@ static const struct hda_device_id snd_hda_id_conexant[] = { HDA_CODEC_ENTRY(0x14f11f87, "SN6140", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f120d0, "CX11970", patch_conexant_auto), + HDA_CODEC_ENTRY(0x14f120d1, "SN6180", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto), HDA_CODEC_ENTRY(0x14f15051, "CX20561 (Hermosa)", patch_conexant_auto), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 1134a493d225..e103bb3693c0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -832,7 +832,7 @@ do_sku: alc_setup_gpio(codec, 0x02); break; case 7: - alc_setup_gpio(codec, 0x03); + alc_setup_gpio(codec, 0x04); break; case 5: default: @@ -9432,10 +9432,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8abb, "HP ZBook Firefly 14 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad1, "HP EliteBook 840 14 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ad2, "HP EliteBook 860 16 inch G9 Notebook PC", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b42, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b43, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b44, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b45, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b46, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b47, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8b7a, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b7d, "HP", ALC236_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8b87, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED), diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index 1c3d4887aa30..a642c3067ec5 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -216,6 +216,10 @@ static int hda_link_dma_hw_params(struct snd_pcm_substream *substream, sdev = snd_soc_component_get_drvdata(cpu_dai->component); bus = sof_to_bus(sdev); + hlink = snd_hdac_ext_bus_get_hlink_by_name(bus, codec_dai->component->name); + if (!hlink) + return -EINVAL; + hext_stream = snd_soc_dai_get_dma_data(cpu_dai, substream); if (!hext_stream) { hext_stream = hda_link_stream_assign(bus, substream); @@ -225,10 +229,6 @@ static int hda_link_dma_hw_params(struct snd_pcm_substream *substream, snd_soc_dai_set_dma_data(cpu_dai, substream, (void *)hext_stream); } - hlink = snd_hdac_ext_bus_get_hlink_by_name(bus, codec_dai->component->name); - if (!hlink) - return -EINVAL; - /* set the hdac_stream in the codec dai */ snd_soc_dai_set_stream(codec_dai, hdac_stream(hext_stream), substream->stream); diff --git a/sound/soc/sof/ops.h b/sound/soc/sof/ops.h index c52752250565..3b3f3cf7af38 100644 --- a/sound/soc/sof/ops.h +++ b/sound/soc/sof/ops.h @@ -357,7 +357,7 @@ static inline u64 snd_sof_dsp_read64(struct snd_sof_dev *sdev, u32 bar, } static inline void snd_sof_dsp_update8(struct snd_sof_dev *sdev, u32 bar, - u32 offset, u8 value, u8 mask) + u32 offset, u8 mask, u8 value) { u8 reg; diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index c245476fa29d..63c3eaec8d30 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -10,8 +10,10 @@ ret=0 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} IP="ip -netns testns" +IP_PEER="ip -netns peerns" RTABLE=100 +RTABLE_PEER=101 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -20,7 +22,9 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4" +TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" + +SELFTEST_PATH="" log_test() { @@ -52,6 +56,31 @@ log_section() echo "######################################################################" } +check_nettest() +{ + if which nettest > /dev/null 2>&1; then + return 0 + fi + + # Add the selftest directory to PATH if not already done + if [ "${SELFTEST_PATH}" = "" ]; then + SELFTEST_PATH="$(dirname $0)" + PATH="${PATH}:${SELFTEST_PATH}" + + # Now retry with the new path + if which nettest > /dev/null 2>&1; then + return 0 + fi + + if [ "${ret}" -eq 0 ]; then + ret="${ksft_skip}" + fi + echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" + fi + + return 1 +} + setup() { set -e @@ -72,6 +101,39 @@ cleanup() ip netns del testns } +setup_peer() +{ + set -e + + ip netns add peerns + $IP_PEER link set dev lo up + + ip link add name veth0 netns testns type veth \ + peer name veth1 netns peerns + $IP link set dev veth0 up + $IP_PEER link set dev veth1 up + + $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0 + $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1 + + $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad + $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad + + $IP_PEER address add 198.51.100.11/32 dev lo + $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11 + + $IP_PEER address add 2001:db8::1:11/128 dev lo + $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11 + + set +e +} + +cleanup_peer() +{ + $IP link del dev veth0 + ip netns del peerns +} + fib_check_iproute_support() { ip rule help 2>&1 | grep -q $1 @@ -190,6 +252,37 @@ fib_rule6_test() fi } +# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly +# taken into account when connecting the socket and when sending packets. +fib_rule6_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -6 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + fib_rule4_del() { $IP rule del $1 @@ -296,6 +389,37 @@ fib_rule4_test() fi } +# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken +# into account when connecting the socket and when sending packets. +fib_rule4_connect_test() +{ + local dsfield + + if ! check_nettest; then + echo "SKIP: Could not run test without nettest tool" + return + fi + + setup_peer + $IP -4 rule add dsfield 0x04 table $RTABLE_PEER + + # Combine the base DS Field value (0x04) with all possible ECN values + # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). + # The ECN bits shouldn't influence the result of the test. + for dsfield in 0x04 0x05 0x06 0x07; do + nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" + + nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" + done + + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER + cleanup_peer +} + run_fibrule_tests() { log_section "IPv4 fib rule" @@ -345,6 +469,8 @@ do case $t in fib_rule6_test|fib_rule6) fib_rule6_test;; fib_rule4_test|fib_rule4) fib_rule4_test;; + fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; + fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 7900fa98eccb..ee9a72982705 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -87,6 +87,7 @@ struct sock_args { int use_setsockopt; int use_freebind; int use_cmsg; + uint8_t dsfield; const char *dev; const char *server_dev; int ifindex; @@ -580,6 +581,36 @@ static int set_reuseaddr(int sd) return rc; } +static int set_dsfield(int sd, int version, int dsfield) +{ + if (!dsfield) + return 0; + + switch (version) { + case AF_INET: + if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IP_TOS)"); + return -1; + } + break; + + case AF_INET6: + if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield, + sizeof(dsfield)) < 0) { + log_err_errno("setsockopt(IPV6_TCLASS)"); + return -1; + } + break; + + default: + log_error("Invalid address family\n"); + return -1; + } + + return 0; +} + static int str_to_uint(const char *str, int min, int max, unsigned int *value) { int number; @@ -1317,6 +1348,9 @@ static int msock_init(struct sock_args *args, int server) (char *)&one, sizeof(one)) < 0) log_err_errno("Setting SO_BROADCAST error"); + if (set_dsfield(sd, AF_INET, args->dsfield) != 0) + goto out_err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto out_err; else if (args->use_setsockopt && @@ -1445,6 +1479,9 @@ static int lsock_init(struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1658,6 +1695,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (set_reuseport(sd) != 0) goto err; + if (set_dsfield(sd, args->version, args->dsfield) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1862,7 +1902,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) return client_status; } -#define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" +#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" #define OPT_FORCE_BIND_KEY_IFINDEX 1001 #define OPT_NO_BIND_KEY_IFINDEX 1002 @@ -1893,6 +1933,8 @@ static void print_usage(char *prog) " -D|R datagram (D) / raw (R) socket (default stream)\n" " -l addr local address to bind to in server mode\n" " -c addr local address to bind to in client mode\n" + " -Q dsfield DS Field value of the socket (the IP_TOS or\n" + " IPV6_TCLASS socket option)\n" " -x configure XFRM policy on socket\n" "\n" " -d dev bind socket to given device name\n" @@ -1971,6 +2013,13 @@ int main(int argc, char *argv[]) args.has_local_ip = 1; args.client_local_addr_str = optarg; break; + case 'Q': + if (str_to_uint(optarg, 0, 255, &tmp) != 0) { + fprintf(stderr, "Invalid DS Field\n"); + return 1; + } + args.dsfield = tmp; + break; case 'p': if (str_to_uint(optarg, 1, 65535, &tmp) != 0) { fprintf(stderr, "Invalid port\n"); |