diff options
128 files changed, 1640 insertions, 596 deletions
diff --git a/Documentation/driver-api/firmware/other_interfaces.rst b/Documentation/driver-api/firmware/other_interfaces.rst index b81794e0cfbb..06ac89adaafb 100644 --- a/Documentation/driver-api/firmware/other_interfaces.rst +++ b/Documentation/driver-api/firmware/other_interfaces.rst @@ -13,6 +13,12 @@ EDD Interfaces .. kernel-doc:: drivers/firmware/edd.c :internal: +Generic System Framebuffers Interface +------------------------------------- + +.. kernel-doc:: drivers/firmware/sysfb.c + :export: + Intel Stratix10 SoC Service Layer --------------------------------- Some features of the Intel Stratix10 SoC require a level of privilege diff --git a/MAINTAINERS b/MAINTAINERS index fe5daf141501..66bffb24a348 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4976,6 +4976,7 @@ Q: http://patchwork.kernel.org/project/linux-clk/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git F: Documentation/devicetree/bindings/clock/ F: drivers/clk/ +F: include/dt-bindings/clock/ F: include/linux/clk-pr* F: include/linux/clk/ F: include/linux/of_clk.h @@ -14397,9 +14398,8 @@ F: Documentation/devicetree/bindings/sound/nxp,tfa989x.yaml F: sound/soc/codecs/tfa989x.c NXP-NCI NFC DRIVER -R: Charles Gorand <charles.gorand@effinnov.com> L: linux-nfc@lists.01.org (subscribers-only) -S: Supported +S: Orphan F: Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml F: drivers/nfc/nxp-nci diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c index e07782b1fbb6..43177c20ce4f 100644 --- a/drivers/acpi/acpi_video.c +++ b/drivers/acpi/acpi_video.c @@ -73,6 +73,7 @@ module_param(device_id_scheme, bool, 0444); static int only_lcd = -1; module_param(only_lcd, int, 0444); +static bool has_backlight; static int register_count; static DEFINE_MUTEX(register_count_mutex); static DEFINE_MUTEX(video_list_lock); @@ -1222,6 +1223,9 @@ acpi_video_bus_get_one_device(struct acpi_device *device, acpi_video_device_bind(video, data); acpi_video_device_find_cap(data); + if (data->cap._BCM && data->cap._BCL) + has_backlight = true; + mutex_lock(&video->device_list_lock); list_add_tail(&data->entry, &video->video_device_list); mutex_unlock(&video->device_list_lock); @@ -2249,6 +2253,7 @@ void acpi_video_unregister(void) if (register_count) { acpi_bus_unregister_driver(&acpi_video_bus); register_count = 0; + has_backlight = false; } mutex_unlock(®ister_count_mutex); } @@ -2270,13 +2275,7 @@ void acpi_video_unregister_backlight(void) bool acpi_video_handles_brightness_key_presses(void) { - bool have_video_busses; - - mutex_lock(&video_list_lock); - have_video_busses = !list_empty(&video_bus_head); - mutex_unlock(&video_list_lock); - - return have_video_busses && + return has_backlight && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS); } EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses); diff --git a/drivers/ata/pata_cs5535.c b/drivers/ata/pata_cs5535.c index 6725931f3c35..c2c3238ff84b 100644 --- a/drivers/ata/pata_cs5535.c +++ b/drivers/ata/pata_cs5535.c @@ -90,7 +90,7 @@ static void cs5535_set_piomode(struct ata_port *ap, struct ata_device *adev) static const u16 pio_cmd_timings[5] = { 0xF7F4, 0x53F3, 0x13F1, 0x5131, 0x1131 }; - u32 reg, dummy; + u32 reg, __maybe_unused dummy; struct ata_device *pair = ata_dev_pair(adev); int mode = adev->pio_mode - XFER_PIO_0; @@ -129,7 +129,7 @@ static void cs5535_set_dmamode(struct ata_port *ap, struct ata_device *adev) static const u32 mwdma_timings[3] = { 0x7F0FFFF3, 0x7F035352, 0x7F024241 }; - u32 reg, dummy; + u32 reg, __maybe_unused dummy; int mode = adev->dma_mode; rdmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg, dummy); diff --git a/drivers/clk/stm32/reset-stm32.c b/drivers/clk/stm32/reset-stm32.c index 040870130e4b..e89381528af9 100644 --- a/drivers/clk/stm32/reset-stm32.c +++ b/drivers/clk/stm32/reset-stm32.c @@ -111,6 +111,7 @@ int stm32_rcc_reset_init(struct device *dev, const struct of_device_id *match, if (!reset_data) return -ENOMEM; + spin_lock_init(&reset_data->lock); reset_data->membase = base; reset_data->rcdev.owner = THIS_MODULE; reset_data->rcdev.ops = &stm32_reset_ops; diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c index 9dba52fbee99..7d79a8744f9a 100644 --- a/drivers/crypto/ccp/sp-platform.c +++ b/drivers/crypto/ccp/sp-platform.c @@ -85,17 +85,9 @@ static int sp_get_irqs(struct sp_device *sp) struct sp_platform *sp_platform = sp->dev_specific; struct device *dev = sp->dev; struct platform_device *pdev = to_platform_device(dev); - unsigned int i, count; int ret; - for (i = 0, count = 0; i < pdev->num_resources; i++) { - struct resource *res = &pdev->resource[i]; - - if (resource_type(res) == IORESOURCE_IRQ) - count++; - } - - sp_platform->irq_count = count; + sp_platform->irq_count = platform_irq_count(pdev); ret = platform_get_irq(pdev, 0); if (ret < 0) { @@ -104,7 +96,7 @@ static int sp_get_irqs(struct sp_device *sp) } sp->psp_irq = ret; - if (count == 1) { + if (sp_platform->irq_count == 1) { sp->ccp_irq = ret; } else { ret = platform_get_irq(pdev, 1); diff --git a/drivers/firmware/sysfb.c b/drivers/firmware/sysfb.c index 2bfbb05f7d89..1f276f108cc9 100644 --- a/drivers/firmware/sysfb.c +++ b/drivers/firmware/sysfb.c @@ -34,21 +34,59 @@ #include <linux/screen_info.h> #include <linux/sysfb.h> +static struct platform_device *pd; +static DEFINE_MUTEX(disable_lock); +static bool disabled; + +static bool sysfb_unregister(void) +{ + if (IS_ERR_OR_NULL(pd)) + return false; + + platform_device_unregister(pd); + pd = NULL; + + return true; +} + +/** + * sysfb_disable() - disable the Generic System Framebuffers support + * + * This disables the registration of system framebuffer devices that match the + * generic drivers that make use of the system framebuffer set up by firmware. + * + * It also unregisters a device if this was already registered by sysfb_init(). + * + * Context: The function can sleep. A @disable_lock mutex is acquired to serialize + * against sysfb_init(), that registers a system framebuffer device. + */ +void sysfb_disable(void) +{ + mutex_lock(&disable_lock); + sysfb_unregister(); + disabled = true; + mutex_unlock(&disable_lock); +} +EXPORT_SYMBOL_GPL(sysfb_disable); + static __init int sysfb_init(void) { struct screen_info *si = &screen_info; struct simplefb_platform_data mode; - struct platform_device *pd; const char *name; bool compatible; - int ret; + int ret = 0; + + mutex_lock(&disable_lock); + if (disabled) + goto unlock_mutex; /* try to create a simple-framebuffer device */ compatible = sysfb_parse_mode(si, &mode); if (compatible) { - ret = sysfb_create_simplefb(si, &mode); - if (!ret) - return 0; + pd = sysfb_create_simplefb(si, &mode); + if (!IS_ERR(pd)) + goto unlock_mutex; } /* if the FB is incompatible, create a legacy framebuffer device */ @@ -60,8 +98,10 @@ static __init int sysfb_init(void) name = "platform-framebuffer"; pd = platform_device_alloc(name, 0); - if (!pd) - return -ENOMEM; + if (!pd) { + ret = -ENOMEM; + goto unlock_mutex; + } sysfb_apply_efi_quirks(pd); @@ -73,9 +113,11 @@ static __init int sysfb_init(void) if (ret) goto err; - return 0; + goto unlock_mutex; err: platform_device_put(pd); +unlock_mutex: + mutex_unlock(&disable_lock); return ret; } diff --git a/drivers/firmware/sysfb_simplefb.c b/drivers/firmware/sysfb_simplefb.c index bda8712bfd8c..a353e27f83f5 100644 --- a/drivers/firmware/sysfb_simplefb.c +++ b/drivers/firmware/sysfb_simplefb.c @@ -57,8 +57,8 @@ __init bool sysfb_parse_mode(const struct screen_info *si, return false; } -__init int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { struct platform_device *pd; struct resource res; @@ -76,7 +76,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, base |= (u64)si->ext_lfb_base << 32; if (!base || (u64)(resource_size_t)base != base) { printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } /* @@ -93,7 +93,7 @@ __init int sysfb_create_simplefb(const struct screen_info *si, length = mode->height * mode->stride; if (length > size) { printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n"); - return -EINVAL; + return ERR_PTR(-EINVAL); } length = PAGE_ALIGN(length); @@ -104,11 +104,11 @@ __init int sysfb_create_simplefb(const struct screen_info *si, res.start = base; res.end = res.start + length - 1; if (res.end <= res.start) - return -EINVAL; + return ERR_PTR(-EINVAL); pd = platform_device_alloc("simple-framebuffer", 0); if (!pd) - return -ENOMEM; + return ERR_PTR(-ENOMEM); sysfb_apply_efi_quirks(pd); @@ -124,10 +124,10 @@ __init int sysfb_create_simplefb(const struct screen_info *si, if (ret) goto err_put_device; - return 0; + return pd; err_put_device: platform_device_put(pd); - return ret; + return ERR_PTR(ret); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 1f8161cd507f..3b1c675aba34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -714,7 +714,8 @@ int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct amdgpu_device *adev, { bool all_hub = false; - if (adev->family == AMDGPU_FAMILY_AI) + if (adev->family == AMDGPU_FAMILY_AI || + adev->family == AMDGPU_FAMILY_RV) all_hub = true; return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 625424f3082b..58df107e3beb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5164,7 +5164,7 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, */ amdgpu_unregister_gpu_instance(tmp_adev); - drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true); + drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true); /* disable ras on ALL IPs */ if (!need_emergency_restart && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index b4cf8717f554..89011bae7588 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -320,6 +320,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ + /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 39b425d83bb1..9dd2e0601ea8 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -4259,9 +4259,6 @@ static int amdgpu_dm_initialize_drm_device(struct amdgpu_device *adev) } } - /* Disable vblank IRQs aggressively for power-saving. */ - adev_to_drm(adev)->vblank_disable_immediate = true; - /* loops over all connectors on the board */ for (i = 0; i < link_cnt; i++) { struct dc_link *link = NULL; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index ab4c5ab28e4d..321af109d484 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -933,8 +933,9 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_PERSISTENCE: if (args->size) ret = -EINVAL; - ret = proto_context_set_persistence(fpriv->dev_priv, pc, - args->value); + else + ret = proto_context_set_persistence(fpriv->dev_priv, pc, + args->value); break; case I915_CONTEXT_PARAM_PROTECTED_CONTENT: diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 3e5d6057b3ef..1674b0c5802b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -35,12 +35,12 @@ bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj) if (obj->cache_dirty) return false; - if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) - return true; - if (IS_DGFX(i915)) return false; + if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) + return true; + /* Currently in use by HW (display engine)? Keep flushed. */ return i915_gem_object_is_framebuffer(obj); } diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c index 90b0ce5051af..1041b5340465 100644 --- a/drivers/gpu/drm/i915/i915_driver.c +++ b/drivers/gpu/drm/i915/i915_driver.c @@ -530,6 +530,7 @@ mask_err: static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *root_pdev; int ret; if (i915_inject_probe_failure(dev_priv)) @@ -641,6 +642,15 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) intel_bw_init_hw(dev_priv); + /* + * FIXME: Temporary hammer to avoid freezing the machine on our DGFX + * This should be totally removed when we handle the pci states properly + * on runtime PM and on s2idle cases. + */ + root_pdev = pcie_find_root_port(pdev); + if (root_pdev) + pci_d3cold_disable(root_pdev); + return 0; err_msi: @@ -664,11 +674,16 @@ err_perf: static void i915_driver_hw_remove(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); + struct pci_dev *root_pdev; i915_perf_fini(dev_priv); if (pdev->msi_enabled) pci_disable_msi(pdev); + + root_pdev = pcie_find_root_port(pdev); + if (root_pdev) + pci_d3cold_enable(root_pdev); } /** @@ -1193,14 +1208,6 @@ static int i915_drm_suspend_late(struct drm_device *dev, bool hibernation) goto out; } - /* - * FIXME: Temporary hammer to avoid freezing the machine on our DGFX - * This should be totally removed when we handle the pci states properly - * on runtime PM and on s2idle cases. - */ - if (suspend_to_idle(dev_priv)) - pci_d3cold_disable(pdev); - pci_disable_device(pdev); /* * During hibernation on some platforms the BIOS may try to access @@ -1365,8 +1372,6 @@ static int i915_drm_resume_early(struct drm_device *dev) pci_set_master(pdev); - pci_d3cold_enable(pdev); - disable_rpm_wakeref_asserts(&dev_priv->runtime_pm); ret = vlv_resume_prepare(dev_priv, false); @@ -1543,7 +1548,6 @@ static int intel_runtime_suspend(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); int ret; if (drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_RUNTIME_PM(dev_priv))) @@ -1589,12 +1593,6 @@ static int intel_runtime_suspend(struct device *kdev) drm_err(&dev_priv->drm, "Unclaimed access detected prior to suspending\n"); - /* - * FIXME: Temporary hammer to avoid freezing the machine on our DGFX - * This should be totally removed when we handle the pci states properly - * on runtime PM and on s2idle cases. - */ - pci_d3cold_disable(pdev); rpm->suspended = true; /* @@ -1633,7 +1631,6 @@ static int intel_runtime_resume(struct device *kdev) { struct drm_i915_private *dev_priv = kdev_to_i915(kdev); struct intel_runtime_pm *rpm = &dev_priv->runtime_pm; - struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev); int ret; if (drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_RUNTIME_PM(dev_priv))) @@ -1646,7 +1643,6 @@ static int intel_runtime_resume(struct device *kdev) intel_opregion_notify_adapter(dev_priv, PCI_D0); rpm->suspended = false; - pci_d3cold_enable(pdev); if (intel_uncore_unclaimed_mmio(&dev_priv->uncore)) drm_dbg(&dev_priv->drm, "Unclaimed access during suspend, bios?\n"); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 3a462e327e0e..a1b8c4592943 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1251,12 +1251,13 @@ static void dpu_encoder_vblank_callback(struct drm_encoder *drm_enc, DPU_ATRACE_BEGIN("encoder_vblank_callback"); dpu_enc = to_dpu_encoder_virt(drm_enc); + atomic_inc(&phy_enc->vsync_cnt); + spin_lock_irqsave(&dpu_enc->enc_spinlock, lock_flags); if (dpu_enc->crtc) dpu_crtc_vblank_callback(dpu_enc->crtc); spin_unlock_irqrestore(&dpu_enc->enc_spinlock, lock_flags); - atomic_inc(&phy_enc->vsync_cnt); DPU_ATRACE_END("encoder_vblank_callback"); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 59da348ff339..0ec809ab06e7 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -252,11 +252,6 @@ static int dpu_encoder_phys_wb_atomic_check( DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n", phys_enc->wb_idx, mode->name, mode->hdisplay, mode->vdisplay); - if (!conn_state->writeback_job || !conn_state->writeback_job->fb) - return 0; - - fb = conn_state->writeback_job->fb; - if (!conn_state || !conn_state->connector) { DPU_ERROR("invalid connector state\n"); return -EINVAL; @@ -267,6 +262,11 @@ static int dpu_encoder_phys_wb_atomic_check( return -EINVAL; } + if (!conn_state->writeback_job || !conn_state->writeback_job->fb) + return 0; + + fb = conn_state->writeback_job->fb; + DPU_DEBUG("[fb_id:%u][fb:%u,%u]\n", fb->base.id, fb->width, fb->height); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index ec26855079c1..239c8e3f2fbd 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -316,6 +316,8 @@ static void dp_display_unbind(struct device *dev, struct device *master, dp_power_client_deinit(dp->power); dp_aux_unregister(dp->aux); + dp->drm_dev = NULL; + dp->aux->drm_dev = NULL; priv->dp[dp->id] = NULL; } diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 3c3a0cfade36..c9e4aeb14f4a 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -928,7 +928,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, INT_MAX, GFP_KERNEL); } if (submit->fence_id < 0) { - ret = submit->fence_id = 0; + ret = submit->fence_id; submit->fence_id = 0; } diff --git a/drivers/gpu/drm/vc4/vc4_perfmon.c b/drivers/gpu/drm/vc4/vc4_perfmon.c index c7f5adb6bcf8..79a74184d732 100644 --- a/drivers/gpu/drm/vc4/vc4_perfmon.c +++ b/drivers/gpu/drm/vc4/vc4_perfmon.c @@ -17,13 +17,16 @@ void vc4_perfmon_get(struct vc4_perfmon *perfmon) { - struct vc4_dev *vc4 = perfmon->dev; + struct vc4_dev *vc4; + if (!perfmon) + return; + + vc4 = perfmon->dev; if (WARN_ON_ONCE(vc4->is_vc5)) return; - if (perfmon) - refcount_inc(&perfmon->refcnt); + refcount_inc(&perfmon->refcnt); } void vc4_perfmon_put(struct vc4_perfmon *perfmon) diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1c107d6d03b9..b985e0d9bc05 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1252,8 +1252,10 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, return ERR_CAST(cm_id_priv); err = cm_init_listen(cm_id_priv, service_id, 0); - if (err) + if (err) { + ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); + } spin_lock_irq(&cm_id_priv->lock); listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h index 8def88cfa300..db9ef3e1eb97 100644 --- a/drivers/infiniband/hw/qedr/qedr.h +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -418,6 +418,7 @@ struct qedr_qp { u32 sq_psn; u32 qkey; u32 dest_qp_num; + u8 timeout; /* Relevant to qps created from kernel space only (ULPs) */ u8 prev_wqe_size; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index f0f43b6db89e..03ed7c0fae50 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2613,6 +2613,8 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 1 << max_t(int, attr->timeout - 8, 0); else qp_params.ack_timeout = 0; + + qp->timeout = attr->timeout; } if (attr_mask & IB_QP_RETRY_CNT) { @@ -2772,7 +2774,7 @@ int qedr_query_qp(struct ib_qp *ibqp, rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]); rdma_ah_set_port_num(&qp_attr->ah_attr, 1); rdma_ah_set_sl(&qp_attr->ah_attr, 0); - qp_attr->timeout = params.timeout; + qp_attr->timeout = qp->timeout; qp_attr->rnr_retry = params.rnr_retry; qp_attr->retry_cnt = params.retry_cnt; qp_attr->min_rnr_timer = params.min_rnr_nak_timer; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 9526ccbedafb..80c9f7134e9b 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1001,12 +1001,13 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) static int validate_raid_redundancy(struct raid_set *rs) { unsigned int i, rebuild_cnt = 0; - unsigned int rebuilds_per_group = 0, copies; + unsigned int rebuilds_per_group = 0, copies, raid_disks; unsigned int group_size, last_group_start; - for (i = 0; i < rs->md.raid_disks; i++) - if (!test_bit(In_sync, &rs->dev[i].rdev.flags) || - !rs->dev[i].rdev.sb_page) + for (i = 0; i < rs->raid_disks; i++) + if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) && + ((!test_bit(In_sync, &rs->dev[i].rdev.flags) || + !rs->dev[i].rdev.sb_page))) rebuild_cnt++; switch (rs->md.level) { @@ -1046,8 +1047,9 @@ static int validate_raid_redundancy(struct raid_set *rs) * A A B B C * C D D E E */ + raid_disks = min(rs->raid_disks, rs->md.raid_disks); if (__is_raid10_near(rs->md.new_layout)) { - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1070,10 +1072,10 @@ static int validate_raid_redundancy(struct raid_set *rs) * results in the need to treat the last (potentially larger) * set differently. */ - group_size = (rs->md.raid_disks / copies); - last_group_start = (rs->md.raid_disks / group_size) - 1; + group_size = (raid_disks / copies); + last_group_start = (raid_disks / group_size) - 1; last_group_start *= group_size; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < raid_disks; i++) { if (!(i % copies) && !(i > last_group_start)) rebuilds_per_group = 0; if ((!rs->dev[i].rdev.sb_page || @@ -1588,7 +1590,7 @@ static sector_t __rdev_sectors(struct raid_set *rs) { int i; - for (i = 0; i < rs->md.raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { struct md_rdev *rdev = &rs->dev[i].rdev; if (!test_bit(Journal, &rdev->flags) && @@ -3766,13 +3768,13 @@ static int raid_iterate_devices(struct dm_target *ti, unsigned int i; int r = 0; - for (i = 0; !r && i < rs->md.raid_disks; i++) - if (rs->dev[i].data_dev) - r = fn(ti, - rs->dev[i].data_dev, - 0, /* No offset on data devs */ - rs->md.dev_sectors, - data); + for (i = 0; !r && i < rs->raid_disks; i++) { + if (rs->dev[i].data_dev) { + r = fn(ti, rs->dev[i].data_dev, + 0, /* No offset on data devs */ + rs->md.dev_sectors, data); + } + } return r; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 5d09256d7f81..20e53b167f81 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7933,7 +7933,7 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) int err = 0; int number = rdev->raid_disk; struct md_rdev __rcu **rdevp; - struct disk_info *p = conf->disks + number; + struct disk_info *p; struct md_rdev *tmp; print_raid5_conf(conf); @@ -7952,6 +7952,9 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) log_exit(conf); return 0; } + if (unlikely(number >= conf->pool_size)) + return 0; + p = conf->disks + number; if (rdev == rcu_access_pointer(p->rdev)) rdevp = &p->rdev; else if (rdev == rcu_access_pointer(p->replacement)) @@ -8062,6 +8065,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) */ if (rdev->saved_raid_disk >= 0 && rdev->saved_raid_disk >= first && + rdev->saved_raid_disk <= last && conf->disks[rdev->saved_raid_disk].rdev == NULL) first = rdev->saved_raid_disk; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a86b1f71762e..d7fb33c078e8 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2228,7 +2228,8 @@ void bond_3ad_unbind_slave(struct slave *slave) temp_aggregator->num_of_ports--; if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; - ad_clear_agg(temp_aggregator); + if (temp_aggregator->num_of_ports == 0) + ad_clear_agg(temp_aggregator); if (select_new_active_agg) { slave_info(bond->dev, slave->dev, "Removing an active aggregator\n"); /* select new active aggregator */ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 303c8d32d451..007d43e46dcb 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1302,12 +1302,12 @@ int bond_alb_initialize(struct bonding *bond, int rlb_enabled) return res; if (rlb_enabled) { - bond->alb_info.rlb_enabled = 1; res = rlb_initialize(bond); if (res) { tlb_deinitialize(bond); return res; } + bond->alb_info.rlb_enabled = 1; } else { bond->alb_info.rlb_enabled = 0; } diff --git a/drivers/net/caif/caif_virtio.c b/drivers/net/caif/caif_virtio.c index 5458f57177a0..0b0f234b0b50 100644 --- a/drivers/net/caif/caif_virtio.c +++ b/drivers/net/caif/caif_virtio.c @@ -722,13 +722,21 @@ static int cfv_probe(struct virtio_device *vdev) /* Carrier is off until netdevice is opened */ netif_carrier_off(netdev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + /* register Netdev */ - err = register_netdev(netdev); + err = register_netdevice(netdev); if (err) { + rtnl_unlock(); dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err); goto err; } + virtio_device_ready(vdev); + + rtnl_unlock(); + debugfs_init(cfv); return 0; diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 87e81c636339..be0edfa093d0 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -878,6 +878,11 @@ static void bcm_sf2_sw_mac_link_up(struct dsa_switch *ds, int port, if (duplex == DUPLEX_FULL) reg |= DUPLX_MODE; + if (tx_pause) + reg |= TXFLOW_CNTL; + if (rx_pause) + reg |= RXFLOW_CNTL; + core_writel(priv, reg, offset); } diff --git a/drivers/net/dsa/hirschmann/hellcreek_ptp.c b/drivers/net/dsa/hirschmann/hellcreek_ptp.c index 2572c6087bb5..b28baab6d56a 100644 --- a/drivers/net/dsa/hirschmann/hellcreek_ptp.c +++ b/drivers/net/dsa/hirschmann/hellcreek_ptp.c @@ -300,6 +300,7 @@ static int hellcreek_led_setup(struct hellcreek *hellcreek) const char *label, *state; int ret = -EINVAL; + of_node_get(hellcreek->dev->of_node); leds = of_find_node_by_name(hellcreek->dev->of_node, "leds"); if (!leds) { dev_err(hellcreek->dev, "No LEDs specified in device tree!\n"); diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 570d0204b7be..9c27b9b0128d 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1886,6 +1886,8 @@ static void vsc9959_psfp_sgi_table_del(struct ocelot *ocelot, static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, struct felix_stream_filter_counters *counters) { + mutex_lock(&ocelot->stats_lock); + ocelot_rmw(ocelot, SYS_STAT_CFG_STAT_VIEW(index), SYS_STAT_CFG_STAT_VIEW_M, SYS_STAT_CFG); @@ -1900,6 +1902,8 @@ static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index, SYS_STAT_CFG_STAT_VIEW(index) | SYS_STAT_CFG_STAT_CLEAR_SHOT(0x10), SYS_STAT_CFG); + + mutex_unlock(&ocelot->stats_lock); } static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port, diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index cc51149790ff..3d5d39a52fe6 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -52,7 +52,7 @@ #define CN93_SDP_EPF_RINFO_SRN(val) ((val) & 0xFF) #define CN93_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) -#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) && 0xFF) +#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0xFF) /* SDP Function select */ #define CN93_SDP_FUNC_SEL_EPF_BIT_POS 8 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 9dbb573d53ea..0d8a0068e4ca 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4415,6 +4415,8 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, return 0; err_nexthop_neigh_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_remove(mlxsw_sp, nh); return err; } @@ -6740,6 +6742,7 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { struct net_device *dev = rt->fib6_nh->fib_nh_dev; + int err; nh->nhgi = nh_grp->nhgi; nh->nh_weight = rt->fib6_nh->fib_nh_weight; @@ -6755,7 +6758,16 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, return 0; nh->ifindex = dev->ifindex; - return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev); + if (err) + goto err_nexthop_type_init; + + return 0; + +err_nexthop_type_init: + list_del(&nh->router_list_node); + mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + return err; } static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c index 3429660cd2e5..5edc8b7176c8 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.c @@ -396,6 +396,9 @@ static int sparx5_handle_port_mdb_add(struct net_device *dev, u32 mact_entry; int res, err; + if (!sparx5_netdevice_check(dev)) + return -EOPNOTSUPP; + if (netif_is_bridge_master(v->obj.orig_dev)) { sparx5_mact_learn(spx5, PGID_CPU, v->addr, v->vid); return 0; @@ -466,6 +469,9 @@ static int sparx5_handle_port_mdb_del(struct net_device *dev, u32 mact_entry, res, pgid_entry[3]; int err; + if (!sparx5_netdevice_check(dev)) + return -EOPNOTSUPP; + if (netif_is_bridge_master(v->obj.orig_dev)) { sparx5_mact_forget(spx5, v->addr, v->vid); return 0; diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c index a0654e88444c..0329caf63279 100644 --- a/drivers/net/ethernet/smsc/epic100.c +++ b/drivers/net/ethernet/smsc/epic100.c @@ -1515,14 +1515,14 @@ static void epic_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct epic_private *ep = netdev_priv(dev); + unregister_netdev(dev); dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring, ep->tx_ring_dma); dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring, ep->rx_ring_dma); - unregister_netdev(dev); pci_iounmap(pdev, ep->ioaddr); - pci_release_regions(pdev); free_netdev(dev); + pci_release_regions(pdev); pci_disable_device(pdev); /* pci_power_off(pdev, -1); */ } diff --git a/drivers/net/phy/ax88796b.c b/drivers/net/phy/ax88796b.c index 457896337505..0f1e617a26c9 100644 --- a/drivers/net/phy/ax88796b.c +++ b/drivers/net/phy/ax88796b.c @@ -88,8 +88,10 @@ static void asix_ax88772a_link_change_notify(struct phy_device *phydev) /* Reset PHY, otherwise MII_LPA will provide outdated information. * This issue is reproducible only with some link partner PHYs */ - if (phydev->state == PHY_NOLINK && phydev->drv->soft_reset) - phydev->drv->soft_reset(phydev); + if (phydev->state == PHY_NOLINK) { + phy_init_hw(phydev); + phy_start_aneg(phydev); + } } static struct phy_driver asix_driver[] = { diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index e6ad3a494d32..8549e0e356c9 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -229,9 +229,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (misr_status < 0) return misr_status; - misr_status |= (DP83822_RX_ERR_HF_INT_EN | - DP83822_FALSE_CARRIER_HF_INT_EN | - DP83822_LINK_STAT_INT_EN | + misr_status |= (DP83822_LINK_STAT_INT_EN | DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index ef62f357b76d..8d3ee3a6495b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -31,6 +31,7 @@ #include <linux/io.h> #include <linux/uaccess.h> #include <linux/atomic.h> +#include <linux/suspend.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/sock.h> @@ -976,6 +977,28 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) struct phy_driver *drv = phydev->drv; irqreturn_t ret; + /* Wakeup interrupts may occur during a system sleep transition. + * Postpone handling until the PHY has resumed. + */ + if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) { + struct net_device *netdev = phydev->attached_dev; + + if (netdev) { + struct device *parent = netdev->dev.parent; + + if (netdev->wol_enabled) + pm_system_wakeup(); + else if (device_may_wakeup(&netdev->dev)) + pm_wakeup_dev_event(&netdev->dev, 0, true); + else if (parent && device_may_wakeup(parent)) + pm_wakeup_dev_event(parent, 0, true); + } + + phydev->irq_rerun = 1; + disable_irq_nosync(irq); + return IRQ_HANDLED; + } + mutex_lock(&phydev->lock); ret = drv->handle_interrupt(phydev); mutex_unlock(&phydev->lock); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 431a8719c635..46acddd865a7 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -278,6 +278,15 @@ static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) if (phydev->mac_managed_pm) return 0; + /* Wakeup interrupts may occur during the system sleep transition when + * the PHY is inaccessible. Set flag to postpone handling until the PHY + * has resumed. Wait for concurrent interrupt handler to complete. + */ + if (phy_interrupt_is_valid(phydev)) { + phydev->irq_suspended = 1; + synchronize_irq(phydev->irq); + } + /* We must stop the state machine manually, otherwise it stops out of * control, possibly with the phydev->lock held. Upon resume, netdev * may call phy routines that try to grab the same lock, and that may @@ -315,6 +324,20 @@ static __maybe_unused int mdio_bus_phy_resume(struct device *dev) if (ret < 0) return ret; no_resume: + if (phy_interrupt_is_valid(phydev)) { + phydev->irq_suspended = 0; + synchronize_irq(phydev->irq); + + /* Rerun interrupts which were postponed by phy_interrupt() + * because they occurred during the system sleep transition. + */ + if (phydev->irq_rerun) { + phydev->irq_rerun = 0; + enable_irq(phydev->irq); + irq_wake_thread(phydev->irq, phydev); + } + } + if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 9a5d5a10560f..e7b0e12cc75b 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2516,7 +2516,7 @@ static int sfp_probe(struct platform_device *pdev) platform_set_drvdata(pdev, sfp); - err = devm_add_action(sfp->dev, sfp_cleanup, sfp); + err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp); if (err < 0) return err; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 87a635aac008..259b2b84b2b3 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -273,6 +273,12 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile, } } +static void tun_napi_enable(struct tun_file *tfile) +{ + if (tfile->napi_enabled) + napi_enable(&tfile->napi); +} + static void tun_napi_disable(struct tun_file *tfile) { if (tfile->napi_enabled) @@ -634,7 +640,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun = rtnl_dereference(tfile->tun); if (tun && clean) { - tun_napi_disable(tfile); + if (!tfile->detached) + tun_napi_disable(tfile); tun_napi_del(tfile); } @@ -653,8 +660,10 @@ static void __tun_detach(struct tun_file *tfile, bool clean) if (clean) { RCU_INIT_POINTER(tfile->tun, NULL); sock_put(&tfile->sk); - } else + } else { tun_disable_queue(tun, tfile); + tun_napi_disable(tfile); + } synchronize_net(); tun_flow_delete_by_queue(tun, tun->numqueues + 1); @@ -727,6 +736,7 @@ static void tun_detach_all(struct net_device *dev) sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_napi_del(tfile); tun_enable_queue(tfile); tun_queue_purge(tfile); xdp_rxq_info_unreg(&tfile->xdp_rxq); @@ -807,6 +817,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file, if (tfile->detached) { tun_enable_queue(tfile); + tun_napi_enable(tfile); } else { sock_hold(&tfile->sk); tun_napi_init(tun, tfile, napi, napi_frags); diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index 2c81236c6c7c..45d3cc5cc355 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -126,8 +126,7 @@ AX_MEDIUM_RE) #define AX88772_MEDIUM_DEFAULT \ - (AX_MEDIUM_FD | AX_MEDIUM_RFC | \ - AX_MEDIUM_TFC | AX_MEDIUM_PS | \ + (AX_MEDIUM_FD | AX_MEDIUM_PS | \ AX_MEDIUM_AC | AX_MEDIUM_RE) /* AX88772 & AX88178 RX_CTL values */ diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 632fa6c1d5e3..b4a1b7abcfc9 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -431,6 +431,7 @@ void asix_adjust_link(struct net_device *netdev) asix_write_medium_mode(dev, mode, 0); phy_print_status(phydev); + usbnet_link_change(dev, phydev->link, 0); } int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 4704ed6f00ef..ac2d400d1d6c 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1472,6 +1472,42 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) * are bundled into this buffer and where we can find an array of * per-packet metadata (which contains elements encoded into u16). */ + + /* SKB contents for current firmware: + * <packet 1> <padding> + * ... + * <packet N> <padding> + * <per-packet metadata entry 1> <dummy header> + * ... + * <per-packet metadata entry N> <dummy header> + * <padding2> <rx_hdr> + * + * where: + * <packet N> contains pkt_len bytes: + * 2 bytes of IP alignment pseudo header + * packet received + * <per-packet metadata entry N> contains 4 bytes: + * pkt_len and fields AX_RXHDR_* + * <padding> 0-7 bytes to terminate at + * 8 bytes boundary (64-bit). + * <padding2> 4 bytes to make rx_hdr terminate at + * 8 bytes boundary (64-bit) + * <dummy-header> contains 4 bytes: + * pkt_len=0 and AX_RXHDR_DROP_ERR + * <rx-hdr> contains 4 bytes: + * pkt_cnt and hdr_off (offset of + * <per-packet metadata entry 1>) + * + * pkt_cnt is number of entrys in the per-packet metadata. + * In current firmware there is 2 entrys per packet. + * The first points to the packet and the + * second is a dummy header. + * This was done probably to align fields in 64-bit and + * maintain compatibility with old firmware. + * This code assumes that <dummy header> and <padding2> are + * optional. + */ + if (skb->len < 4) return 0; skb_trim(skb, skb->len - 4); @@ -1485,51 +1521,66 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* Make sure that the bounds of the metadata array are inside the SKB * (and in front of the counter at the end). */ - if (pkt_cnt * 2 + hdr_off > skb->len) + if (pkt_cnt * 4 + hdr_off > skb->len) return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); /* Packets must not overlap the metadata array */ skb_trim(skb, hdr_off); - for (; ; pkt_cnt--, pkt_hdr++) { + for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) { + u16 pkt_len_plus_padd; u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; + pkt_len_plus_padd = (pkt_len + 7) & 0xfff8; - if (pkt_len > skb->len) + /* Skip dummy header used for alignment + */ + if (pkt_len == 0) + continue; + + if (pkt_len_plus_padd > skb->len) return 0; /* Check CRC or runt packet */ - if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) && - pkt_len >= 2 + ETH_HLEN) { - bool last = (pkt_cnt == 0); - - if (last) { - ax_skb = skb; - } else { - ax_skb = skb_clone(skb, GFP_ATOMIC); - if (!ax_skb) - return 0; - } - ax_skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(ax_skb, 2); - skb_set_tail_pointer(ax_skb, ax_skb->len); - ax_skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(ax_skb, pkt_hdr); + if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) || + pkt_len < 2 + ETH_HLEN) { + dev->net->stats.rx_errors++; + skb_pull(skb, pkt_len_plus_padd); + continue; + } - if (last) - return 1; + /* last packet */ + if (pkt_len_plus_padd == skb->len) { + skb_trim(skb, pkt_len); - usbnet_skb_return(dev, ax_skb); + /* Skip IP alignment pseudo header */ + skb_pull(skb, 2); + + skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; } - /* Trim this packet away from the SKB */ - if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8)) + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (!ax_skb) return 0; + skb_trim(ax_skb, pkt_len); + + /* Skip IP alignment pseudo header */ + skb_pull(ax_skb, 2); + + skb->truesize = pkt_len_plus_padd + + SKB_DATA_ALIGN(sizeof(struct sk_buff)); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); + + skb_pull(skb, pkt_len_plus_padd); } + + return 0; } static struct sk_buff * diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 1cb6dab3e2d0..e2135ab87a6e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -2004,7 +2004,7 @@ static int __usbnet_read_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (size) { - buf = kmalloc(size, GFP_KERNEL); + buf = kmalloc(size, GFP_NOIO); if (!buf) goto out; } @@ -2036,7 +2036,7 @@ static int __usbnet_write_cmd(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); if (data) { - buf = kmemdup(data, size, GFP_KERNEL); + buf = kmemdup(data, size, GFP_NOIO); if (!buf) goto out; } else { diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 969a67970e71..356cf8dd4164 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3642,14 +3642,20 @@ static int virtnet_probe(struct virtio_device *vdev) if (vi->has_rss || vi->has_rss_hash_report) virtnet_init_default_rss(vi); - err = register_netdev(dev); + /* serialize netdev register + virtio_device_ready() with ndo_open() */ + rtnl_lock(); + + err = register_netdevice(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); + rtnl_unlock(); goto free_failover; } virtio_device_ready(vdev); + rtnl_unlock(); + err = virtnet_cpu_notif_add(vi); if (err) { pr_debug("virtio_net: registering cpu notifier failed\n"); diff --git a/drivers/nfc/nfcmrvl/i2c.c b/drivers/nfc/nfcmrvl/i2c.c index ceef81d93ac9..01329b91d59d 100644 --- a/drivers/nfc/nfcmrvl/i2c.c +++ b/drivers/nfc/nfcmrvl/i2c.c @@ -167,9 +167,9 @@ static int nfcmrvl_i2c_parse_dt(struct device_node *node, pdata->irq_polarity = IRQF_TRIGGER_RISING; ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nfcmrvl/spi.c b/drivers/nfc/nfcmrvl/spi.c index a38e2fcdfd39..ad3359a4942c 100644 --- a/drivers/nfc/nfcmrvl/spi.c +++ b/drivers/nfc/nfcmrvl/spi.c @@ -115,9 +115,9 @@ static int nfcmrvl_spi_parse_dt(struct device_node *node, } ret = irq_of_parse_and_map(node, 0); - if (ret < 0) { - pr_err("Unable to get irq, error: %d\n", ret); - return ret; + if (!ret) { + pr_err("Unable to get irq\n"); + return -EINVAL; } pdata->irq = ret; diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c index 7e451c10985d..ae2ba08d8ac3 100644 --- a/drivers/nfc/nxp-nci/i2c.c +++ b/drivers/nfc/nxp-nci/i2c.c @@ -122,7 +122,9 @@ static int nxp_nci_i2c_fw_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN); r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len); - if (r != frame_len) { + if (r < 0) { + goto fw_read_exit_free_skb; + } else if (r != frame_len) { nfc_err(&client->dev, "Invalid frame length: %u (expected %zu)\n", r, frame_len); @@ -162,8 +164,13 @@ static int nxp_nci_i2c_nci_read(struct nxp_nci_i2c_phy *phy, skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE); + if (!header.plen) + return 0; + r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen); - if (r != header.plen) { + if (r < 0) { + goto nci_read_exit_free_skb; + } else if (r != header.plen) { nfc_err(&client->dev, "Invalid frame payload length: %u (expected %u)\n", r, header.plen); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b3d9c29aba1e..ec6ac298d8de 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4595,6 +4595,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); + if (ctrl->ops->stop_ctrl) + ctrl->ops->stop_ctrl(ctrl); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 0da94b233fed..5558f8812157 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -502,6 +502,7 @@ struct nvme_ctrl_ops { void (*free_ctrl)(struct nvme_ctrl *ctrl); void (*submit_async_event)(struct nvme_ctrl *ctrl); void (*delete_ctrl)(struct nvme_ctrl *ctrl); + void (*stop_ctrl)(struct nvme_ctrl *ctrl); int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); void (*print_device_info)(struct nvme_ctrl *ctrl); }; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d7b24ee17285..e7af2234e53b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3469,8 +3469,11 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1cc1, 0x33f8), /* ADATA IM2P33F8ABR1 1 TB */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ - .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_IGNORE_DEV_SUBNQN, }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index f2a5e1ea508a..46c2dcf72f7e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1048,6 +1048,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, } } +static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + + cancel_work_sync(&ctrl->err_work); + cancel_delayed_work_sync(&ctrl->reconnect_work); +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -2252,9 +2260,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&ctrl->err_work); - cancel_delayed_work_sync(&ctrl->reconnect_work); - nvme_rdma_teardown_io_queues(ctrl, shutdown); nvme_stop_admin_queue(&ctrl->ctrl); if (shutdown) @@ -2304,6 +2309,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .submit_async_event = nvme_rdma_submit_async_event, .delete_ctrl = nvme_rdma_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_rdma_stop_ctrl, }; /* diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index bb67538d241b..7a9e6ffa2342 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1180,8 +1180,7 @@ done: } else if (ret < 0) { dev_err(queue->ctrl->ctrl.device, "failed to send request %d\n", ret); - if (ret != -EPIPE && ret != -ECONNRESET) - nvme_tcp_fail_request(queue->request); + nvme_tcp_fail_request(queue->request); nvme_tcp_done_send_req(queue); } return ret; @@ -2194,9 +2193,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) { - cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); - cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); - nvme_tcp_teardown_io_queues(ctrl, shutdown); nvme_stop_admin_queue(ctrl); if (shutdown) @@ -2236,6 +2232,12 @@ out_fail: nvme_tcp_reconnect_or_remove(ctrl); } +static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) +{ + cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work); + cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); +} + static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); @@ -2557,6 +2559,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .submit_async_event = nvme_tcp_submit_async_event, .delete_ctrl = nvme_tcp_delete_ctrl, .get_address = nvmf_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, }; static bool diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e44b2988759e..ff77c3d2354f 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -773,11 +773,31 @@ static ssize_t nvmet_passthru_io_timeout_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_passthru_, io_timeout); +static ssize_t nvmet_passthru_clear_ids_show(struct config_item *item, + char *page) +{ + return sprintf(page, "%u\n", to_subsys(item->ci_parent)->clear_ids); +} + +static ssize_t nvmet_passthru_clear_ids_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item->ci_parent); + unsigned int clear_ids; + + if (kstrtouint(page, 0, &clear_ids)) + return -EINVAL; + subsys->clear_ids = clear_ids; + return count; +} +CONFIGFS_ATTR(nvmet_passthru_, clear_ids); + static struct configfs_attribute *nvmet_passthru_attrs[] = { &nvmet_passthru_attr_device_path, &nvmet_passthru_attr_enable, &nvmet_passthru_attr_admin_timeout, &nvmet_passthru_attr_io_timeout, + &nvmet_passthru_attr_clear_ids, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 90e75324dae0..c27660a660d9 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1374,6 +1374,12 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, ctrl->port = req->port; ctrl->ops = req->ops; +#ifdef CONFIG_NVME_TARGET_PASSTHRU + /* By default, set loop targets to clear IDS by default */ + if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP) + subsys->clear_ids = 1; +#endif + INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); INIT_LIST_HEAD(&ctrl->async_events); INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 69818752a33a..2b3e5719f24e 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -249,6 +249,7 @@ struct nvmet_subsys { struct config_group passthru_group; unsigned int admin_timeout; unsigned int io_timeout; + unsigned int clear_ids; #endif /* CONFIG_NVME_TARGET_PASSTHRU */ #ifdef CONFIG_BLK_DEV_ZONED diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index b1f7efab3918..6f39a29828b1 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -30,6 +30,53 @@ void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl) ctrl->cap &= ~(1ULL << 43); } +static u16 nvmet_passthru_override_id_descs(struct nvmet_req *req) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + u16 status = NVME_SC_SUCCESS; + int pos, len; + bool csi_seen = false; + void *data; + u8 csi; + + if (!ctrl->subsys->clear_ids) + return status; + + data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); + if (!data) + return NVME_SC_INTERNAL; + + status = nvmet_copy_from_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE); + if (status) + goto out_free; + + for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) { + struct nvme_ns_id_desc *cur = data + pos; + + if (cur->nidl == 0) + break; + if (cur->nidt == NVME_NIDT_CSI) { + memcpy(&csi, cur + 1, NVME_NIDT_CSI_LEN); + csi_seen = true; + break; + } + len = sizeof(struct nvme_ns_id_desc) + cur->nidl; + } + + memset(data, 0, NVME_IDENTIFY_DATA_SIZE); + if (csi_seen) { + struct nvme_ns_id_desc *cur = data; + + cur->nidt = NVME_NIDT_CSI; + cur->nidl = NVME_NIDT_CSI_LEN; + memcpy(cur + 1, &csi, NVME_NIDT_CSI_LEN); + } + status = nvmet_copy_to_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE); +out_free: + kfree(data); + return status; +} + static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -152,6 +199,11 @@ static u16 nvmet_passthru_override_id_ns(struct nvmet_req *req) */ id->mc = 0; + if (req->sq->ctrl->subsys->clear_ids) { + memset(id->nguid, 0, NVME_NIDT_NGUID_LEN); + memset(id->eui64, 0, NVME_NIDT_EUI64_LEN); + } + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); out_free: @@ -176,6 +228,9 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) case NVME_ID_CNS_NS: nvmet_passthru_override_id_ns(req); break; + case NVME_ID_CNS_NS_DESC_LIST: + nvmet_passthru_override_id_descs(req); + break; } } else if (status < 0) status = NVME_SC_INTERNAL; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 2793554e622e..0a9542599ad1 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -405,7 +405,7 @@ err: return NVME_SC_INTERNAL; } -static void nvmet_tcp_send_ddgst(struct ahash_request *hash, +static void nvmet_tcp_calc_ddgst(struct ahash_request *hash, struct nvmet_tcp_cmd *cmd) { ahash_request_set_crypt(hash, cmd->req.sg, @@ -413,23 +413,6 @@ static void nvmet_tcp_send_ddgst(struct ahash_request *hash, crypto_ahash_digest(hash); } -static void nvmet_tcp_recv_ddgst(struct ahash_request *hash, - struct nvmet_tcp_cmd *cmd) -{ - struct scatterlist sg; - struct kvec *iov; - int i; - - crypto_ahash_init(hash); - for (i = 0, iov = cmd->iov; i < cmd->nr_mapped; i++, iov++) { - sg_init_one(&sg, iov->iov_base, iov->iov_len); - ahash_request_set_crypt(hash, &sg, NULL, iov->iov_len); - crypto_ahash_update(hash); - } - ahash_request_set_crypt(hash, NULL, (void *)&cmd->exp_ddgst, 0); - crypto_ahash_final(hash); -} - static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_data_pdu *pdu = cmd->data_pdu; @@ -454,7 +437,7 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) if (queue->data_digest) { pdu->hdr.flags |= NVME_TCP_F_DDGST; - nvmet_tcp_send_ddgst(queue->snd_hash, cmd); + nvmet_tcp_calc_ddgst(queue->snd_hash, cmd); } if (cmd->queue->hdr_digest) { @@ -1137,7 +1120,7 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) { struct nvmet_tcp_queue *queue = cmd->queue; - nvmet_tcp_recv_ddgst(queue->rcv_hash, cmd); + nvmet_tcp_calc_ddgst(queue->rcv_hash, cmd); queue->offset = 0; queue->left = NVME_TCP_DIGEST_LENGTH; queue->rcv_state = NVMET_TCP_RECV_DDGST; diff --git a/drivers/platform/mellanox/nvsw-sn2201.c b/drivers/platform/mellanox/nvsw-sn2201.c index 2923daf63b75..7b9c107c17ce 100644 --- a/drivers/platform/mellanox/nvsw-sn2201.c +++ b/drivers/platform/mellanox/nvsw-sn2201.c @@ -890,6 +890,7 @@ nvsw_sn2201_create_static_devices(struct nvsw_sn2201 *nvsw_sn2201, int size) { struct mlxreg_hotplug_device *dev = devs; + int ret; int i; /* Create I2C static devices. */ @@ -901,6 +902,7 @@ nvsw_sn2201_create_static_devices(struct nvsw_sn2201 *nvsw_sn2201, dev->nr, dev->brdinfo->addr); dev->adapter = NULL; + ret = PTR_ERR(dev->client); goto fail_create_static_devices; } } @@ -914,7 +916,7 @@ fail_create_static_devices: dev->client = NULL; dev->adapter = NULL; } - return IS_ERR(dev->client); + return ret; } static void nvsw_sn2201_destroy_static_devices(struct nvsw_sn2201 *nvsw_sn2201, diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index f08ad85683cb..bc4013e950ed 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -945,6 +945,8 @@ config PANASONIC_LAPTOP tristate "Panasonic Laptop Extras" depends on INPUT && ACPI depends on BACKLIGHT_CLASS_DEVICE + depends on ACPI_VIDEO=n || ACPI_VIDEO + depends on SERIO_I8042 || SERIO_I8042 = n select INPUT_SPARSEKMAP help This driver adds support for access to backlight control and hotkeys diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 0d8cb22e30df..bc7020e9df9e 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -89,6 +89,7 @@ enum hp_wmi_event_ids { HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D, HPWMI_PEAKSHIFT_PERIOD = 0x0F, HPWMI_BATTERY_CHARGE_PERIOD = 0x10, + HPWMI_SANITIZATION_MODE = 0x17, }; /* @@ -853,6 +854,8 @@ static void hp_wmi_notify(u32 value, void *context) break; case HPWMI_BATTERY_CHARGE_PERIOD: break; + case HPWMI_SANITIZATION_MODE: + break; default: pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 3ccb7b71dfb1..abd0c81d62c4 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -152,6 +152,10 @@ static bool no_bt_rfkill; module_param(no_bt_rfkill, bool, 0444); MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth."); +static bool allow_v4_dytc; +module_param(allow_v4_dytc, bool, 0444); +MODULE_PARM_DESC(allow_v4_dytc, "Enable DYTC version 4 platform-profile support."); + /* * ACPI Helpers */ @@ -871,12 +875,18 @@ static void dytc_profile_refresh(struct ideapad_private *priv) static const struct dmi_system_id ideapad_dytc_v4_allow_table[] = { { /* Ideapad 5 Pro 16ACH6 */ - .ident = "LENOVO 82L5", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), DMI_MATCH(DMI_PRODUCT_NAME, "82L5") } }, + { + /* Ideapad 5 15ITL05 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad 5 15ITL05") + } + }, {} }; @@ -901,13 +911,16 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF; - if (dytc_version < 5) { - if (dytc_version < 4 || !dmi_check_system(ideapad_dytc_v4_allow_table)) { - dev_info(&priv->platform_device->dev, - "DYTC_VERSION is less than 4 or is not allowed: %d\n", - dytc_version); - return -ENODEV; - } + if (dytc_version < 4) { + dev_info(&priv->platform_device->dev, "DYTC_VERSION < 4 is not supported\n"); + return -ENODEV; + } + + if (dytc_version < 5 && + !(allow_v4_dytc || dmi_check_system(ideapad_dytc_v4_allow_table))) { + dev_info(&priv->platform_device->dev, + "DYTC_VERSION 4 support may not work. Pass ideapad_laptop.allow_v4_dytc=Y on the kernel commandline to enable\n"); + return -ENODEV; } priv->dytc = kzalloc(sizeof(*priv->dytc), GFP_KERNEL); diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 40183bda7894..a1fe1e0dcf4a 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -1911,6 +1911,7 @@ static const struct x86_cpu_id intel_pmc_core_ids[] = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &icl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &tgl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map), {} diff --git a/drivers/platform/x86/panasonic-laptop.c b/drivers/platform/x86/panasonic-laptop.c index 37850d07987d..615e39cbbbf1 100644 --- a/drivers/platform/x86/panasonic-laptop.c +++ b/drivers/platform/x86/panasonic-laptop.c @@ -119,20 +119,22 @@ * - v0.1 start from toshiba_acpi driver written by John Belmonte */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/types.h> +#include <linux/acpi.h> #include <linux/backlight.h> #include <linux/ctype.h> -#include <linux/seq_file.h> -#include <linux/uaccess.h> -#include <linux/slab.h> -#include <linux/acpi.h> +#include <linux/i8042.h> +#include <linux/init.h> #include <linux/input.h> #include <linux/input/sparse-keymap.h> +#include <linux/kernel.h> +#include <linux/module.h> #include <linux/platform_device.h> - +#include <linux/seq_file.h> +#include <linux/serio.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/uaccess.h> +#include <acpi/video.h> MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>"); MODULE_AUTHOR("David Bronaugh <dbronaugh@linuxboxen.org>"); @@ -241,6 +243,42 @@ struct pcc_acpi { struct platform_device *platform; }; +/* + * On some Panasonic models the volume up / down / mute keys send duplicate + * keypress events over the PS/2 kbd interface, filter these out. + */ +static bool panasonic_i8042_filter(unsigned char data, unsigned char str, + struct serio *port) +{ + static bool extended; + + if (str & I8042_STR_AUXDATA) + return false; + + if (data == 0xe0) { + extended = true; + return true; + } else if (extended) { + extended = false; + + switch (data & 0x7f) { + case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */ + case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */ + case 0x30: /* e0 30 / e0 b0, Volume Up press / release */ + return true; + default: + /* + * Report the previously filtered e0 before continuing + * with the next non-filtered byte. + */ + serio_interrupt(port, 0xe0, 0); + return false; + } + } + + return false; +} + /* method access functions */ static int acpi_pcc_write_sset(struct pcc_acpi *pcc, int func, int val) { @@ -762,6 +800,8 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) struct input_dev *hotk_input_dev = pcc->input_dev; int rc; unsigned long long result; + unsigned int key; + unsigned int updown; rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY, NULL, &result); @@ -770,20 +810,27 @@ static void acpi_pcc_generate_keyinput(struct pcc_acpi *pcc) return; } + key = result & 0xf; + updown = result & 0x80; /* 0x80 == key down; 0x00 = key up */ + /* hack: some firmware sends no key down for sleep / hibernate */ - if ((result & 0xf) == 0x7 || (result & 0xf) == 0xa) { - if (result & 0x80) + if (key == 7 || key == 10) { + if (updown) sleep_keydown_seen = 1; if (!sleep_keydown_seen) sparse_keymap_report_event(hotk_input_dev, - result & 0xf, 0x80, false); + key, 0x80, false); } - if ((result & 0xf) == 0x7 || (result & 0xf) == 0x9 || (result & 0xf) == 0xa) { - if (!sparse_keymap_report_event(hotk_input_dev, - result & 0xf, result & 0x80, false)) - pr_err("Unknown hotkey event: 0x%04llx\n", result); - } + /* + * Don't report brightness key-presses if they are also reported + * by the ACPI video bus. + */ + if ((key == 1 || key == 2) && acpi_video_handles_brightness_key_presses()) + return; + + if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false)) + pr_err("Unknown hotkey event: 0x%04llx\n", result); } static void acpi_pcc_hotkey_notify(struct acpi_device *device, u32 event) @@ -997,6 +1044,7 @@ static int acpi_pcc_hotkey_add(struct acpi_device *device) pcc->platform = NULL; } + i8042_install_filter(panasonic_i8042_filter); return 0; out_platform: @@ -1020,6 +1068,8 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device) if (!device || !pcc) return -EINVAL; + i8042_remove_filter(panasonic_i8042_filter); + if (pcc->platform) { device_remove_file(&pcc->platform->dev, &dev_attr_cdpower); platform_device_unregister(pcc->platform); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index e6cb4a14cdd4..a8b383051528 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -4529,6 +4529,7 @@ static void thinkpad_acpi_amd_s2idle_restore(void) iounmap(addr); cleanup_resource: release_resource(res); + kfree(res); } static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = { @@ -10299,21 +10300,15 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_DISABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 0) #define DYTC_ENABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 1) -enum dytc_profile_funcmode { - DYTC_FUNCMODE_NONE = 0, - DYTC_FUNCMODE_MMC, - DYTC_FUNCMODE_PSC, -}; - -static enum dytc_profile_funcmode dytc_profile_available; static enum platform_profile_option dytc_current_profile; static atomic_t dytc_ignore_event = ATOMIC_INIT(0); static DEFINE_MUTEX(dytc_mutex); +static int dytc_capabilities; static bool dytc_mmc_get_available; static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile) { - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { switch (dytcmode) { case DYTC_MODE_MMC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10330,7 +10325,7 @@ static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *p } return 0; } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { switch (dytcmode) { case DYTC_MODE_PSC_LOWPOWER: *profile = PLATFORM_PROFILE_LOW_POWER; @@ -10352,21 +10347,21 @@ static int convert_profile_to_dytc(enum platform_profile_option profile, int *pe { switch (profile) { case PLATFORM_PROFILE_LOW_POWER: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_LOWPOWER; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_LOWPOWER; break; case PLATFORM_PROFILE_BALANCED: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_BALANCE; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_BALANCE; break; case PLATFORM_PROFILE_PERFORMANCE: - if (dytc_profile_available == DYTC_FUNCMODE_MMC) + if (dytc_capabilities & BIT(DYTC_FC_MMC)) *perfmode = DYTC_MODE_MMC_PERFORM; - else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + else if (dytc_capabilities & BIT(DYTC_FC_PSC)) *perfmode = DYTC_MODE_PSC_PERFORM; break; default: /* Unknown profile */ @@ -10445,7 +10440,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, if (err) goto unlock; - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (profile == PLATFORM_PROFILE_BALANCED) { /* * To get back to balanced mode we need to issue a reset command. @@ -10464,7 +10459,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, goto unlock; } } - if (dytc_profile_available == DYTC_FUNCMODE_PSC) { + if (dytc_capabilities & BIT(DYTC_FC_PSC)) { err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output); if (err) goto unlock; @@ -10483,12 +10478,12 @@ static void dytc_profile_refresh(void) int perfmode; mutex_lock(&dytc_mutex); - if (dytc_profile_available == DYTC_FUNCMODE_MMC) { + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { if (dytc_mmc_get_available) err = dytc_command(DYTC_CMD_MMC_GET, &output); else err = dytc_cql_command(DYTC_CMD_GET, &output); - } else if (dytc_profile_available == DYTC_FUNCMODE_PSC) + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) err = dytc_command(DYTC_CMD_GET, &output); mutex_unlock(&dytc_mutex); @@ -10517,7 +10512,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) set_bit(PLATFORM_PROFILE_BALANCED, dytc_profile.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, dytc_profile.choices); - dytc_profile_available = DYTC_FUNCMODE_NONE; err = dytc_command(DYTC_CMD_QUERY, &output); if (err) return err; @@ -10530,13 +10524,12 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) return -ENODEV; /* Check what capabilities are supported */ - err = dytc_command(DYTC_CMD_FUNC_CAP, &output); + err = dytc_command(DYTC_CMD_FUNC_CAP, &dytc_capabilities); if (err) return err; - if (output & BIT(DYTC_FC_MMC)) { /* MMC MODE */ - dytc_profile_available = DYTC_FUNCMODE_MMC; - + if (dytc_capabilities & BIT(DYTC_FC_MMC)) { /* MMC MODE */ + pr_debug("MMC is supported\n"); /* * Check if MMC_GET functionality available * Version > 6 and return success from MMC_GET command @@ -10547,8 +10540,13 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (!err && ((output & DYTC_ERR_MASK) == DYTC_ERR_SUCCESS)) dytc_mmc_get_available = true; } - } else if (output & BIT(DYTC_FC_PSC)) { /* PSC MODE */ - dytc_profile_available = DYTC_FUNCMODE_PSC; + } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */ + /* Support for this only works on AMD platforms */ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { + dbg_printk(TPACPI_DBG_INIT, "PSC not support on Intel platforms\n"); + return -ENODEV; + } + pr_debug("PSC is supported\n"); } else { dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n"); return -ENODEV; @@ -10574,7 +10572,6 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) static void dytc_profile_exit(void) { - dytc_profile_available = DYTC_FUNCMODE_NONE; platform_profile_remove(); } diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 97e51c34e6cf..161d3b141f0d 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -1136,8 +1136,13 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vcdev->err = -EIO; } virtio_ccw_check_activity(vcdev, activity); - /* Interrupts are disabled here */ +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION + /* + * Paired with virtio_ccw_synchronize_cbs() and interrupts are + * disabled here. + */ read_lock(&vcdev->irq_lock); +#endif for_each_set_bit(i, indicators(vcdev), sizeof(*indicators(vcdev)) * BITS_PER_BYTE) { /* The bit clear must happen before the vring kick. */ @@ -1146,7 +1151,9 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev, vq = virtio_ccw_vq_by_ind(vcdev, i); vring_interrupt(0, vq); } +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION read_unlock(&vcdev->irq_lock); +#endif if (test_bit(0, indicators2(vcdev))) { virtio_config_changed(&vcdev->vdev); clear_bit(0, indicators2(vcdev)); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 7d819fc0395e..eb86afb21aab 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2782,6 +2782,7 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) struct hisi_hba *hisi_hba = shost_priv(shost); struct device *dev = hisi_hba->dev; int ret = sas_slave_configure(sdev); + unsigned int max_sectors; if (ret) return ret; @@ -2799,6 +2800,12 @@ static int slave_configure_v3_hw(struct scsi_device *sdev) } } + /* Set according to IOMMU IOVA caching limit */ + max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue), + (PAGE_SIZE * 32) >> SECTOR_SHIFT); + + blk_queue_max_hw_sectors(sdev->request_queue, max_sectors); + return 0; } diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 1b6d46b86f81..e85c1d71f4ed 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1962,6 +1962,8 @@ static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_c struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); ndev->event_cbs[idx] = *cb; + if (is_ctrl_vq_idx(mvdev, idx)) + mvdev->cvq.event_cb = *cb; } static void mlx5_cvq_notify(struct vringh *vring) @@ -2174,7 +2176,6 @@ static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features) static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) { struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - struct mlx5_control_vq *cvq = &mvdev->cvq; int err; int i; @@ -2184,16 +2185,6 @@ static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev) goto err_vq; } - if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) { - err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, - MLX5_CVQ_MAX_ENT, false, - (struct vring_desc *)(uintptr_t)cvq->desc_addr, - (struct vring_avail *)(uintptr_t)cvq->driver_addr, - (struct vring_used *)(uintptr_t)cvq->device_addr); - if (err) - goto err_vq; - } - return 0; err_vq: @@ -2466,6 +2457,21 @@ static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) ndev->mvdev.cvq.ready = false; } +static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev) +{ + struct mlx5_control_vq *cvq = &mvdev->cvq; + int err = 0; + + if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) + err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features, + MLX5_CVQ_MAX_ENT, false, + (struct vring_desc *)(uintptr_t)cvq->desc_addr, + (struct vring_avail *)(uintptr_t)cvq->driver_addr, + (struct vring_used *)(uintptr_t)cvq->device_addr); + + return err; +} + static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) { struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); @@ -2478,6 +2484,11 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status) if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { if (status & VIRTIO_CONFIG_S_DRIVER_OK) { + err = setup_cvq_vring(mvdev); + if (err) { + mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n"); + goto err_setup; + } err = setup_driver(mvdev); if (err) { mlx5_vdpa_warn(mvdev, "failed to setup driver\n"); diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c index 776ad7496f53..3bc27de58f46 100644 --- a/drivers/vdpa/vdpa_user/vduse_dev.c +++ b/drivers/vdpa/vdpa_user/vduse_dev.c @@ -1476,16 +1476,12 @@ static char *vduse_devnode(struct device *dev, umode_t *mode) return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev)); } -static void vduse_mgmtdev_release(struct device *dev) -{ -} - -static struct device vduse_mgmtdev = { - .init_name = "vduse", - .release = vduse_mgmtdev_release, +struct vduse_mgmt_dev { + struct vdpa_mgmt_dev mgmt_dev; + struct device dev; }; -static struct vdpa_mgmt_dev mgmt_dev; +static struct vduse_mgmt_dev *vduse_mgmt; static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) { @@ -1510,7 +1506,7 @@ static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name) } set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops); vdev->vdpa.dma_dev = &vdev->vdpa.dev; - vdev->vdpa.mdev = &mgmt_dev; + vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev; return 0; } @@ -1556,34 +1552,52 @@ static struct virtio_device_id id_table[] = { { 0 }, }; -static struct vdpa_mgmt_dev mgmt_dev = { - .device = &vduse_mgmtdev, - .id_table = id_table, - .ops = &vdpa_dev_mgmtdev_ops, -}; +static void vduse_mgmtdev_release(struct device *dev) +{ + struct vduse_mgmt_dev *mgmt_dev; + + mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev); + kfree(mgmt_dev); +} static int vduse_mgmtdev_init(void) { int ret; - ret = device_register(&vduse_mgmtdev); - if (ret) + vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL); + if (!vduse_mgmt) + return -ENOMEM; + + ret = dev_set_name(&vduse_mgmt->dev, "vduse"); + if (ret) { + kfree(vduse_mgmt); return ret; + } - ret = vdpa_mgmtdev_register(&mgmt_dev); + vduse_mgmt->dev.release = vduse_mgmtdev_release; + + ret = device_register(&vduse_mgmt->dev); if (ret) - goto err; + goto dev_reg_err; - return 0; -err: - device_unregister(&vduse_mgmtdev); + vduse_mgmt->mgmt_dev.id_table = id_table; + vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops; + vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev; + ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev); + if (ret) + device_unregister(&vduse_mgmt->dev); + + return ret; + +dev_reg_err: + put_device(&vduse_mgmt->dev); return ret; } static void vduse_mgmtdev_exit(void) { - vdpa_mgmtdev_unregister(&mgmt_dev); - device_unregister(&vduse_mgmtdev); + vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev); + device_unregister(&vduse_mgmt->dev); } static int vduse_init(void) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 5ad2596c6e8a..23dcbfdfa13b 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1209,7 +1209,7 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep) vhost_dev_stop(&v->vdev); vhost_vdpa_free_domain(v); vhost_vdpa_config_put(v); - vhost_dev_cleanup(&v->vdev); + vhost_vdpa_cleanup(v); mutex_unlock(&d->mutex); atomic_dec(&v->opened); diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index afa2863670f3..8afc4538558c 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/major.h> #include <linux/slab.h> +#include <linux/sysfb.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/vt.h> @@ -1752,6 +1753,17 @@ int remove_conflicting_framebuffers(struct apertures_struct *a, do_free = true; } + /* + * If a driver asked to unregister a platform device registered by + * sysfb, then can be assumed that this is a driver for a display + * that is set up by the system firmware and has a generic driver. + * + * Drivers for devices that don't have a generic driver will never + * ask for this, so let's assume that a real driver for the display + * was already probed and prevent sysfb to register devices later. + */ + sysfb_disable(); + mutex_lock(®istration_lock); do_remove_conflicting_framebuffers(a, name, primary); mutex_unlock(®istration_lock); diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index a6dc8b5846fe..e1556d2a355a 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -29,6 +29,19 @@ menuconfig VIRTIO_MENU if VIRTIO_MENU +config VIRTIO_HARDEN_NOTIFICATION + bool "Harden virtio notification" + help + Enable this to harden the device notifications and suppress + those that happen at a time where notifications are illegal. + + Experimental: Note that several drivers still have bugs that + may cause crashes or hangs when correct handling of + notifications is enforced; depending on the subset of + drivers and devices you use, this may or may not work. + + If unsure, say N. + config VIRTIO_PCI tristate "PCI driver for virtio devices" depends on PCI diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 6bace84ae37e..7deeed30d1f3 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -219,6 +219,7 @@ static int virtio_features_ok(struct virtio_device *dev) * */ void virtio_reset_device(struct virtio_device *dev) { +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION /* * The below virtio_synchronize_cbs() guarantees that any * interrupt for this line arriving after @@ -227,6 +228,7 @@ void virtio_reset_device(struct virtio_device *dev) */ virtio_break_device(dev); virtio_synchronize_cbs(dev); +#endif dev->config->reset(dev); } diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index c9bec3813e94..083ff1eb743d 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -62,6 +62,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/pm.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/virtio.h> @@ -556,6 +557,28 @@ static const struct virtio_config_ops virtio_mmio_config_ops = { .synchronize_cbs = vm_synchronize_cbs, }; +#ifdef CONFIG_PM_SLEEP +static int virtio_mmio_freeze(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + return virtio_device_freeze(&vm_dev->vdev); +} + +static int virtio_mmio_restore(struct device *dev) +{ + struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev); + + if (vm_dev->version == 1) + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE); + + return virtio_device_restore(&vm_dev->vdev); +} + +static const struct dev_pm_ops virtio_mmio_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore) +}; +#endif static void virtio_mmio_release_dev(struct device *_d) { @@ -799,6 +822,9 @@ static struct platform_driver virtio_mmio_driver = { .name = "virtio-mmio", .of_match_table = virtio_mmio_match, .acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match), +#ifdef CONFIG_PM_SLEEP + .pm = &virtio_mmio_pm_ops, +#endif }, }; diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index b790f30b2b56..fa2a9445bb18 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -220,8 +220,6 @@ int vp_modern_probe(struct virtio_pci_modern_device *mdev) check_offsets(); - mdev->pci_dev = pci_dev; - /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */ if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f) return -ENODEV; diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 13a7348cedff..643ca779fcc6 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -111,7 +111,12 @@ struct vring_virtqueue { /* Number we've added since last sync. */ unsigned int num_added; - /* Last used index we've seen. */ + /* Last used index we've seen. + * for split ring, it just contains last used index + * for packed ring: + * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index. + * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter. + */ u16 last_used_idx; /* Hint for event idx: already triggered no need to disable. */ @@ -154,9 +159,6 @@ struct vring_virtqueue { /* Driver ring wrap counter. */ bool avail_wrap_counter; - /* Device ring wrap counter. */ - bool used_wrap_counter; - /* Avail used flags. */ u16 avail_used_flags; @@ -933,7 +935,7 @@ static struct virtqueue *vring_create_virtqueue_split( for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) { queue = vring_alloc_queue(vdev, vring_size(num, vring_align), &dma_addr, - GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO); + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); if (queue) break; if (!may_reduce_num) @@ -973,6 +975,15 @@ static struct virtqueue *vring_create_virtqueue_split( /* * Packed ring specific functions - *_packed(). */ +static inline bool packed_used_wrap_counter(u16 last_used_idx) +{ + return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR)); +} + +static inline u16 packed_last_used(u16 last_used_idx) +{ + return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR)); +} static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, struct vring_desc_extra *extra) @@ -1406,8 +1417,14 @@ static inline bool is_used_desc_packed(const struct vring_virtqueue *vq, static inline bool more_used_packed(const struct vring_virtqueue *vq) { - return is_used_desc_packed(vq, vq->last_used_idx, - vq->packed.used_wrap_counter); + u16 last_used; + u16 last_used_idx; + bool used_wrap_counter; + + last_used_idx = READ_ONCE(vq->last_used_idx); + last_used = packed_last_used(last_used_idx); + used_wrap_counter = packed_used_wrap_counter(last_used_idx); + return is_used_desc_packed(vq, last_used, used_wrap_counter); } static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, @@ -1415,7 +1432,8 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, void **ctx) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 last_used, id; + u16 last_used, id, last_used_idx; + bool used_wrap_counter; void *ret; START_USE(vq); @@ -1434,7 +1452,9 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, /* Only get used elements after they have been exposed by host. */ virtio_rmb(vq->weak_barriers); - last_used = vq->last_used_idx; + last_used_idx = READ_ONCE(vq->last_used_idx); + used_wrap_counter = packed_used_wrap_counter(last_used_idx); + last_used = packed_last_used(last_used_idx); id = le16_to_cpu(vq->packed.vring.desc[last_used].id); *len = le32_to_cpu(vq->packed.vring.desc[last_used].len); @@ -1451,12 +1471,15 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, ret = vq->packed.desc_state[id].data; detach_buf_packed(vq, id, ctx); - vq->last_used_idx += vq->packed.desc_state[id].num; - if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) { - vq->last_used_idx -= vq->packed.vring.num; - vq->packed.used_wrap_counter ^= 1; + last_used += vq->packed.desc_state[id].num; + if (unlikely(last_used >= vq->packed.vring.num)) { + last_used -= vq->packed.vring.num; + used_wrap_counter ^= 1; } + last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR)); + WRITE_ONCE(vq->last_used_idx, last_used); + /* * If we expect an interrupt for the next entry, tell host * by writing event index and flush out the write before @@ -1465,9 +1488,7 @@ static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq, if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC) virtio_store_mb(vq->weak_barriers, &vq->packed.vring.driver->off_wrap, - cpu_to_le16(vq->last_used_idx | - (vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR))); + cpu_to_le16(vq->last_used_idx)); LAST_ADD_TIME_INVALID(vq); @@ -1499,9 +1520,7 @@ static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) if (vq->event) { vq->packed.vring.driver->off_wrap = - cpu_to_le16(vq->last_used_idx | - (vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR)); + cpu_to_le16(vq->last_used_idx); /* * We need to update event offset and event wrap * counter first before updating event flags. @@ -1518,8 +1537,7 @@ static unsigned int virtqueue_enable_cb_prepare_packed(struct virtqueue *_vq) } END_USE(vq); - return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter << - VRING_PACKED_EVENT_F_WRAP_CTR); + return vq->last_used_idx; } static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) @@ -1537,7 +1555,7 @@ static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap) static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); - u16 used_idx, wrap_counter; + u16 used_idx, wrap_counter, last_used_idx; u16 bufs; START_USE(vq); @@ -1550,9 +1568,10 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) if (vq->event) { /* TODO: tune this threshold */ bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4; - wrap_counter = vq->packed.used_wrap_counter; + last_used_idx = READ_ONCE(vq->last_used_idx); + wrap_counter = packed_used_wrap_counter(last_used_idx); - used_idx = vq->last_used_idx + bufs; + used_idx = packed_last_used(last_used_idx) + bufs; if (used_idx >= vq->packed.vring.num) { used_idx -= vq->packed.vring.num; wrap_counter ^= 1; @@ -1582,9 +1601,10 @@ static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq) */ virtio_mb(vq->weak_barriers); - if (is_used_desc_packed(vq, - vq->last_used_idx, - vq->packed.used_wrap_counter)) { + last_used_idx = READ_ONCE(vq->last_used_idx); + wrap_counter = packed_used_wrap_counter(last_used_idx); + used_idx = packed_last_used(last_used_idx); + if (is_used_desc_packed(vq, used_idx, wrap_counter)) { END_USE(vq); return false; } @@ -1688,8 +1708,12 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->we_own_ring = true; vq->notify = notify; vq->weak_barriers = weak_barriers; +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION vq->broken = true; - vq->last_used_idx = 0; +#else + vq->broken = false; +#endif + vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR); vq->event_triggered = false; vq->num_added = 0; vq->packed_ring = true; @@ -1720,7 +1744,6 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->packed.next_avail_idx = 0; vq->packed.avail_wrap_counter = 1; - vq->packed.used_wrap_counter = 1; vq->packed.event_flags_shadow = 0; vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL; @@ -2135,9 +2158,13 @@ irqreturn_t vring_interrupt(int irq, void *_vq) } if (unlikely(vq->broken)) { +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION dev_warn_once(&vq->vq.vdev->dev, "virtio vring IRQ raised before DRIVER_OK"); return IRQ_NONE; +#else + return IRQ_HANDLED; +#endif } /* Just a hint for performance: so it's ok that this can be racy! */ @@ -2180,7 +2207,11 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->we_own_ring = false; vq->notify = notify; vq->weak_barriers = weak_barriers; +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION vq->broken = true; +#else + vq->broken = false; +#endif vq->last_used_idx = 0; vq->event_triggered = false; vq->num_added = 0; diff --git a/fs/io_uring.c b/fs/io_uring.c index 5ff2cdb425bc..0d491ad15b66 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1183,6 +1183,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .needs_async_setup = 1, + .ioprio = 1, .async_size = sizeof(struct io_async_msghdr), }, [IORING_OP_RECVMSG] = { @@ -1191,6 +1192,7 @@ static const struct io_op_def io_op_defs[] = { .pollin = 1, .buffer_select = 1, .needs_async_setup = 1, + .ioprio = 1, .async_size = sizeof(struct io_async_msghdr), }, [IORING_OP_TIMEOUT] = { @@ -1266,6 +1268,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, .pollout = 1, .audit_skip = 1, + .ioprio = 1, }, [IORING_OP_RECV] = { .needs_file = 1, @@ -1273,6 +1276,7 @@ static const struct io_op_def io_op_defs[] = { .pollin = 1, .buffer_select = 1, .audit_skip = 1, + .ioprio = 1, }, [IORING_OP_OPENAT2] = { }, @@ -4314,18 +4318,19 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret < 0)) return ret; } else { + rw = req->async_data; + s = &rw->s; + /* * Safe and required to re-import if we're using provided * buffers, as we dropped the selected one before retry. */ - if (req->flags & REQ_F_BUFFER_SELECT) { + if (io_do_buffer_select(req)) { ret = io_import_iovec(READ, req, &iovec, s, issue_flags); if (unlikely(ret < 0)) return ret; } - rw = req->async_data; - s = &rw->s; /* * We come here from an earlier attempt, restore our state to * match in case it doesn't. It's cheap enough that we don't @@ -6075,12 +6080,12 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = &req->sr_msg; - if (unlikely(sqe->file_index)) + if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); - sr->flags = READ_ONCE(sqe->addr2); + sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; @@ -6311,12 +6316,12 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *sr = &req->sr_msg; - if (unlikely(sqe->file_index)) + if (unlikely(sqe->file_index || sqe->addr2)) return -EINVAL; sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); sr->len = READ_ONCE(sqe->len); - sr->flags = READ_ONCE(sqe->addr2); + sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) return -EINVAL; sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index e6f4ccc12f49..353f047e783c 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -6490,6 +6490,7 @@ int smb2_write(struct ksmbd_work *work) goto out; } + ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) writethrough = true; @@ -6505,10 +6506,6 @@ int smb2_write(struct ksmbd_work *work) data_buf = (char *)(((char *)&req->hdr.ProtocolId) + le16_to_cpu(req->DataOffset)); - ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags)); - if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH) - writethrough = true; - ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n", fp->filp->f_path.dentry, offset, length); err = ksmbd_vfs_write(work, fp, data_buf, length, &offset, @@ -7703,7 +7700,7 @@ int smb2_ioctl(struct ksmbd_work *work) { struct file_zero_data_information *zero_data; struct ksmbd_file *fp; - loff_t off, len; + loff_t off, len, bfz; if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) { ksmbd_debug(SMB, @@ -7720,19 +7717,26 @@ int smb2_ioctl(struct ksmbd_work *work) zero_data = (struct file_zero_data_information *)&req->Buffer[0]; - fp = ksmbd_lookup_fd_fast(work, id); - if (!fp) { - ret = -ENOENT; + off = le64_to_cpu(zero_data->FileOffset); + bfz = le64_to_cpu(zero_data->BeyondFinalZero); + if (off > bfz) { + ret = -EINVAL; goto out; } - off = le64_to_cpu(zero_data->FileOffset); - len = le64_to_cpu(zero_data->BeyondFinalZero) - off; + len = bfz - off; + if (len) { + fp = ksmbd_lookup_fd_fast(work, id); + if (!fp) { + ret = -ENOENT; + goto out; + } - ret = ksmbd_vfs_zero_data(work, fp, off, len); - ksmbd_fd_put(work, fp); - if (ret < 0) - goto out; + ret = ksmbd_vfs_zero_data(work, fp, off, len); + ksmbd_fd_put(work, fp); + if (ret < 0) + goto out; + } break; } case FSCTL_QUERY_ALLOCATED_RANGES: @@ -7806,14 +7810,24 @@ int smb2_ioctl(struct ksmbd_work *work) src_off = le64_to_cpu(dup_ext->SourceFileOffset); dst_off = le64_to_cpu(dup_ext->TargetFileOffset); length = le64_to_cpu(dup_ext->ByteCount); - cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp, - dst_off, length, 0); + /* + * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE + * should fall back to vfs_copy_file_range(). This could be + * beneficial when re-exporting nfs/smb mount, but note that + * this can result in partial copy that returns an error status. + * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented, + * fall back to vfs_copy_file_range(), should be avoided when + * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set. + */ + cloned = vfs_clone_file_range(fp_in->filp, src_off, + fp_out->filp, dst_off, length, 0); if (cloned == -EXDEV || cloned == -EOPNOTSUPP) { ret = -EOPNOTSUPP; goto dup_ext_out; } else if (cloned != length) { cloned = vfs_copy_file_range(fp_in->filp, src_off, - fp_out->filp, dst_off, length, 0); + fp_out->filp, dst_off, + length, 0); if (cloned != length) { if (cloned < 0) ret = cloned; diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index d035e060c2f0..35b55ee94fe5 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -5,16 +5,6 @@ * * Author(s): Long Li <longli@microsoft.com>, * Hyunchul Lee <hyc.lee@gmail.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. */ #define SUBMOD_NAME "smb_direct" diff --git a/fs/ksmbd/transport_tcp.c b/fs/ksmbd/transport_tcp.c index 8fef9de787d3..143bba4e4db8 100644 --- a/fs/ksmbd/transport_tcp.c +++ b/fs/ksmbd/transport_tcp.c @@ -230,7 +230,7 @@ static int ksmbd_kthread_fn(void *p) break; } ret = kernel_accept(iface->ksmbd_socket, &client_sk, - O_NONBLOCK); + SOCK_NONBLOCK); mutex_unlock(&iface->sock_release_lock); if (ret) { if (ret == -EAGAIN) diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index dcdd07c6efff..05efcdf7a4a7 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1015,7 +1015,9 @@ int ksmbd_vfs_zero_data(struct ksmbd_work *work, struct ksmbd_file *fp, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, len); - return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len); + return vfs_fallocate(fp->filp, + FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE, + off, len); } int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, @@ -1046,7 +1048,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, *out_count = 0; end = start + length; while (start < end && *out_count < in_count) { - extent_start = f->f_op->llseek(f, start, SEEK_DATA); + extent_start = vfs_llseek(f, start, SEEK_DATA); if (extent_start < 0) { if (extent_start != -ENXIO) ret = (int)extent_start; @@ -1056,7 +1058,7 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, if (extent_start >= end) break; - extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE); + extent_end = vfs_llseek(f, extent_start, SEEK_HOLE); if (extent_end < 0) { if (extent_end != -ENXIO) ret = (int)extent_end; @@ -1777,6 +1779,10 @@ int ksmbd_vfs_copy_file_ranges(struct ksmbd_work *work, ret = vfs_copy_file_range(src_fp->filp, src_off, dst_fp->filp, dst_off, len, 0); + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src_fp->filp, src_off, + dst_fp->filp, dst_off, + len, 0); if (ret < 0) return ret; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 840e3af63a6f..b764213bcc55 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -577,6 +577,7 @@ out_err: ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, u64 dst_pos, u64 count) { + ssize_t ret; /* * Limit copy to 4MB to prevent indefinitely blocking an nfsd @@ -587,7 +588,12 @@ ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst, * limit like this and pipeline multiple COPY requests. */ count = min_t(u64, count, 1 << 22); - return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0); + + if (ret == -EOPNOTSUPP || ret == -EXDEV) + ret = generic_copy_file_range(src, src_pos, dst, dst_pos, + count, 0); + return ret; } __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index c2255b440df9..b08ce0d821a7 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1513,8 +1513,15 @@ static int fanotify_test_fid(struct dentry *dentry) return 0; } -static int fanotify_events_supported(struct path *path, __u64 mask) +static int fanotify_events_supported(struct fsnotify_group *group, + struct path *path, __u64 mask, + unsigned int flags) { + unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ + bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || + (mask & FAN_RENAME); + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of @@ -1526,6 +1533,16 @@ static int fanotify_events_supported(struct path *path, __u64 mask) if (mask & FANOTIFY_PERM_EVENTS && path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM) return -EINVAL; + + /* + * We shouldn't have allowed setting dirent events and the directory + * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode, + * but because we always allowed it, error only when using new APIs. + */ + if (strict_dir_events && mark_type == FAN_MARK_INODE && + !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + return -ENOTDIR; + return 0; } @@ -1672,7 +1689,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, goto fput_and_out; if (flags & FAN_MARK_ADD) { - ret = fanotify_events_supported(&path, mask); + ret = fanotify_events_supported(group, &path, mask, flags); if (ret) goto path_put_and_out; } @@ -1695,19 +1712,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, else mnt = path.mnt; - /* - * FAN_RENAME is not allowed on non-dir (for now). - * We shouldn't have allowed setting any dirent events in mask of - * non-dir, but because we always allowed it, error only if group - * was initialized with the new flag FAN_REPORT_TARGET_FID. - */ - ret = -ENOTDIR; - if (inode && !S_ISDIR(inode->i_mode) && - ((mask & FAN_RENAME) || - ((mask & FANOTIFY_DIRENT_EVENTS) && - FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID)))) - goto path_put_and_out; - /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ if (mnt || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; diff --git a/fs/read_write.c b/fs/read_write.c index b1b1cdfee9d3..e0777eefd846 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -1397,28 +1397,6 @@ ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, } EXPORT_SYMBOL(generic_copy_file_range); -static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, - struct file *file_out, loff_t pos_out, - size_t len, unsigned int flags) -{ - /* - * Although we now allow filesystems to handle cross sb copy, passing - * a file of the wrong filesystem type to filesystem driver can result - * in an attempt to dereference the wrong type of ->private_data, so - * avoid doing that until we really have a good reason. NFS defines - * several different file_system_type structures, but they all end up - * using the same ->copy_file_range() function pointer. - */ - if (file_out->f_op->copy_file_range && - file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) - return file_out->f_op->copy_file_range(file_in, pos_in, - file_out, pos_out, - len, flags); - - return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); -} - /* * Performs necessary checks before doing a file copy * @@ -1440,6 +1418,24 @@ static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, if (ret) return ret; + /* + * We allow some filesystems to handle cross sb copy, but passing + * a file of the wrong filesystem type to filesystem driver can result + * in an attempt to dereference the wrong type of ->private_data, so + * avoid doing that until we really have a good reason. + * + * nfs and cifs define several different file_system_type structures + * and several different sets of file_operations, but they all end up + * using the same ->copy_file_range() function pointer. + */ + if (file_out->f_op->copy_file_range) { + if (file_in->f_op->copy_file_range != + file_out->f_op->copy_file_range) + return -EXDEV; + } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { + return -EXDEV; + } + /* Don't touch certain kinds of inodes */ if (IS_IMMUTABLE(inode_out)) return -EPERM; @@ -1505,26 +1501,41 @@ ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, file_start_write(file_out); /* - * Try cloning first, this is supported by more file systems, and - * more efficient if both clone and copy are supported (e.g. NFS). + * Cloning is supported by more file systems, so we implement copy on + * same sb using clone, but for filesystems where both clone and copy + * are supported (e.g. nfs,cifs), we only call the copy method. */ + if (file_out->f_op->copy_file_range) { + ret = file_out->f_op->copy_file_range(file_in, pos_in, + file_out, pos_out, + len, flags); + goto done; + } + if (file_in->f_op->remap_file_range && file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { - loff_t cloned; - - cloned = file_in->f_op->remap_file_range(file_in, pos_in, + ret = file_in->f_op->remap_file_range(file_in, pos_in, file_out, pos_out, min_t(loff_t, MAX_RW_COUNT, len), REMAP_FILE_CAN_SHORTEN); - if (cloned > 0) { - ret = cloned; + if (ret > 0) goto done; - } } - ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, - flags); - WARN_ON_ONCE(ret == -EOPNOTSUPP); + /* + * We can get here for same sb copy of filesystems that do not implement + * ->copy_file_range() in case filesystem does not support clone or in + * case filesystem supports clone but rejected the clone request (e.g. + * because it was not block aligned). + * + * In both cases, fall back to kernel copy so we are able to maintain a + * consistent story about which filesystems support copy_file_range() + * and which filesystems do not, that will allow userspace tools to + * make consistent desicions w.r.t using copy_file_range(). + */ + ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); + done: if (ret > 0) { fsnotify_access(file_in); diff --git a/include/linux/dim.h b/include/linux/dim.h index b698266d0035..6c5733981563 100644 --- a/include/linux/dim.h +++ b/include/linux/dim.h @@ -21,7 +21,7 @@ * We consider 10% difference as significant. */ #define IS_SIGNIFICANT_DIFF(val, ref) \ - (((100UL * abs((val) - (ref))) / (ref)) > 10) + ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) /* * Calculate the gap between two values. diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index edc28555814c..e517dbcf74ed 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -111,6 +111,10 @@ FANOTIFY_PERM_EVENTS | \ FAN_Q_OVERFLOW | FAN_ONDIR) +/* Events and flags relevant only for directories */ +#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \ + FAN_EVENT_ON_CHILD | FAN_ONDIR) + #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \ FANOTIFY_EVENT_FLAGS) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f615a66c89e9..2563d30736e9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1671,7 +1671,7 @@ enum netdev_priv_flags { IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, - IFF_TX_SKB_NO_LINEAR = 1<<31, + IFF_TX_SKB_NO_LINEAR = BIT_ULL(31), IFF_CHANGE_PROTO_DOWN = BIT_ULL(32), }; diff --git a/include/linux/phy.h b/include/linux/phy.h index 508f1149665b..b09f7d36cff2 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -572,6 +572,10 @@ struct macsec_ops; * @mdix_ctrl: User setting of crossover * @pma_extable: Cached value of PMA/PMD Extended Abilities Register * @interrupts: Flag interrupts have been enabled + * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt + * handling shall be postponed until PHY has resumed + * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, + * requiring a rerun of the interrupt handler after resume * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics @@ -626,6 +630,8 @@ struct phy_device { /* Interrupts are enabled */ unsigned interrupts:1; + unsigned irq_suspended:1; + unsigned irq_rerun:1; enum phy_state state; diff --git a/include/linux/sysfb.h b/include/linux/sysfb.h index b0dcfa26d07b..8ba8b5be5567 100644 --- a/include/linux/sysfb.h +++ b/include/linux/sysfb.h @@ -55,6 +55,18 @@ struct efifb_dmi_info { int flags; }; +#ifdef CONFIG_SYSFB + +void sysfb_disable(void); + +#else /* CONFIG_SYSFB */ + +static inline void sysfb_disable(void) +{ +} + +#endif /* CONFIG_SYSFB */ + #ifdef CONFIG_EFI extern struct efifb_dmi_info efifb_dmi_list[]; @@ -72,8 +84,8 @@ static inline void sysfb_apply_efi_quirks(struct platform_device *pd) bool sysfb_parse_mode(const struct screen_info *si, struct simplefb_platform_data *mode); -int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode); +struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode); #else /* CONFIG_SYSFB_SIMPLE */ @@ -83,10 +95,10 @@ static inline bool sysfb_parse_mode(const struct screen_info *si, return false; } -static inline int sysfb_create_simplefb(const struct screen_info *si, - const struct simplefb_platform_data *mode) +static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si, + const struct simplefb_platform_data *mode) { - return -EINVAL; + return ERR_PTR(-EINVAL); } #endif /* CONFIG_SYSFB_SIMPLE */ diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 49c7c32815f1..b47c2e7ed0ee 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -257,6 +257,7 @@ void virtio_device_ready(struct virtio_device *dev) WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); +#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION /* * The virtio_synchronize_cbs() makes sure vring_interrupt() * will see the driver specific setup if it sees vq->broken @@ -264,6 +265,7 @@ void virtio_device_ready(struct virtio_device *dev) */ virtio_synchronize_cbs(dev); __virtio_unbreak_device(dev); +#endif /* * The transport should ensure the visibility of vq->broken * before setting DRIVER_OK. See the comments for the transport diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 279ae0fff7ad..5c4e5a96a984 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1338,24 +1338,28 @@ void nft_unregister_flowtable_type(struct nf_flowtable_type *type); /** * struct nft_traceinfo - nft tracing information and state * + * @trace: other struct members are initialised + * @nf_trace: copy of skb->nf_trace before rule evaluation + * @type: event type (enum nft_trace_types) + * @skbid: hash of skb to be used as trace id + * @packet_dumped: packet headers sent in a previous traceinfo message * @pkt: pktinfo currently processed * @basechain: base chain currently processed * @chain: chain currently processed * @rule: rule that was evaluated * @verdict: verdict given by rule - * @type: event type (enum nft_trace_types) - * @packet_dumped: packet headers sent in a previous traceinfo message - * @trace: other struct members are initialised */ struct nft_traceinfo { + bool trace; + bool nf_trace; + bool packet_dumped; + enum nft_trace_types type:8; + u32 skbid; const struct nft_pktinfo *pkt; const struct nft_base_chain *basechain; const struct nft_chain *chain; const struct nft_rule_dp *rule; const struct nft_verdict *verdict; - enum nft_trace_types type; - bool packet_dumped; - bool trace; }; void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index f1972154a594..0980678d502d 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1444,11 +1444,11 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) #define AMD_FMT_MOD_PIPE_MASK 0x7 #define AMD_FMT_MOD_SET(field, value) \ - ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT) + ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT) #define AMD_FMT_MOD_GET(field, value) \ (((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK) #define AMD_FMT_MOD_CLEAR(field) \ - (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) + (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT)) #if defined(__cplusplus) } diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 53e7dae92e42..f10b59d6693e 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -244,7 +244,7 @@ enum io_uring_op { #define IORING_ASYNC_CANCEL_ANY (1U << 2) /* - * send/sendmsg and recv/recvmsg flags (sqe->addr2) + * send/sendmsg and recv/recvmsg flags (sqe->ioprio) * * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send * or receive and arm poll if that yields an diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 921963589904..dfe19bf13f4c 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -2,16 +2,17 @@ #ifndef _UAPI_MPTCP_H #define _UAPI_MPTCP_H +#ifndef __KERNEL__ +#include <netinet/in.h> /* for sockaddr_in and sockaddr_in6 */ +#include <sys/socket.h> /* for struct sockaddr */ +#endif + #include <linux/const.h> #include <linux/types.h> #include <linux/in.h> /* for sockaddr_in */ #include <linux/in6.h> /* for sockaddr_in6 */ #include <linux/socket.h> /* for sockaddr_storage and sa_family */ -#ifndef __KERNEL__ -#include <sys/socket.h> /* for struct sockaddr */ -#endif - #define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) #define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) #define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 58a11f859ac7..30049580cd62 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -526,7 +526,6 @@ void __init tick_nohz_full_setup(cpumask_var_t cpumask) cpumask_copy(tick_nohz_full_mask, cpumask); tick_nohz_full_running = true; } -EXPORT_SYMBOL_GPL(tick_nohz_full_setup); static int tick_nohz_cpu_down(unsigned int cpu) { diff --git a/lib/sbitmap.c b/lib/sbitmap.c index ae4fd4de9ebe..29eb0484215a 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -528,7 +528,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, sbitmap_deferred_clear(map); if (map->word == (1UL << (map_depth - 1)) - 1) - continue; + goto next; nr = find_first_zero_bit(&map->word, map_depth); if (nr + nr_tags <= map_depth) { @@ -539,6 +539,8 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, get_mask = ((1UL << map_tags) - 1) << nr; do { val = READ_ONCE(map->word); + if ((val & ~get_mask) != val) + goto next; ret = atomic_long_cmpxchg(ptr, val, get_mask | val); } while (ret != val); get_mask = (get_mask & ~ret) >> nr; @@ -549,6 +551,7 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, return get_mask; } } +next: /* Jump to next index. */ if (++index >= sb->map_nr) index = 0; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 4fd882686b04..ff4779036649 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1012,9 +1012,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, return okfn(net, sk, skb); ops = nf_hook_entries_get_hook_ops(e); - for (i = 0; i < e->num_hook_entries && - ops[i]->priority <= NF_BR_PRI_BRNF; i++) - ; + for (i = 0; i < e->num_hook_entries; i++) { + /* These hooks have already been called */ + if (ops[i]->priority < NF_BR_PRI_BRNF) + continue; + + /* These hooks have not been called yet, run them. */ + if (ops[i]->priority > NF_BR_PRI_BRNF) + break; + + /* take a closer look at NF_BR_PRI_BRNF. */ + if (ops[i]->hook == br_nf_pre_routing) { + /* This hook diverted the skb to this function, + * hooks after this have not been run yet. + */ + i++; + break; + } + } nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 6b2dc7b2b612..cc1caab4a654 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -410,7 +410,7 @@ int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst, u32 mtu = dst_mtu(encap_dst) - headroom; if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) || - (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu)) + (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu)) return 0; skb_dst_update_pmtu_no_confirm(skb, mtu); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fe8f23b95d32..da5a3c44c4fb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1964,7 +1964,10 @@ process: struct sock *nsk; sk = req->rsk_listener; - drop_reason = tcp_inbound_md5_hash(sk, skb, + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + drop_reason = SKB_DROP_REASON_XFRM_POLICY; + else + drop_reason = tcp_inbound_md5_hash(sk, skb, &iph->saddr, &iph->daddr, AF_INET, dif, sdif); if (unlikely(drop_reason)) { @@ -2016,6 +2019,7 @@ process: } goto discard_and_relse; } + nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v4_restore_cb(skb); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1b1932502e9e..49cc6587dd77 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1109,10 +1109,6 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, goto out; } - if (net->ipv6.devconf_all->disable_policy || - idev->cnf.disable_policy) - f6i->dst_nopolicy = true; - neigh_parms_data_state_setall(idev->nd_parms); ifa->addr = *cfg->pfx; @@ -5172,9 +5168,9 @@ next: fillargs->event = RTM_GETMULTICAST; /* multicast address */ - for (ifmca = rcu_dereference(idev->mc_list); + for (ifmca = rtnl_dereference(idev->mc_list); ifmca; - ifmca = rcu_dereference(ifmca->next), ip_idx++) { + ifmca = rtnl_dereference(ifmca->next), ip_idx++) { if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, fillargs); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d25dc83bac62..828355710c57 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4569,8 +4569,15 @@ struct fib6_info *addrconf_f6i_alloc(struct net *net, } f6i = ip6_route_info_create(&cfg, gfp_flags, NULL); - if (!IS_ERR(f6i)) + if (!IS_ERR(f6i)) { f6i->dst_nocount = true; + + if (!anycast && + (net->ipv6.devconf_all->disable_policy || + idev->cnf.disable_policy)) + f6i->dst_nopolicy = true; + } + return f6i; } diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 6de01185cc68..d43c50a7310d 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -406,7 +406,6 @@ int __net_init seg6_hmac_net_init(struct net *net) return rhashtable_init(&sdata->hmac_infos, &rht_params); } -EXPORT_SYMBOL(seg6_hmac_net_init); void seg6_hmac_exit(void) { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c0b138c20992..6bcd5e419a08 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -323,8 +323,6 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) : NULL; - rcu_read_lock(); - ca = min(t->prl_count, cmax); if (!kp) { @@ -341,7 +339,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u } } - c = 0; + rcu_read_lock(); for_each_prl_rcu(t->prl) { if (c >= cmax) break; @@ -353,7 +351,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u if (kprl.addr != htonl(INADDR_ANY)) break; } -out: + rcu_read_unlock(); len = sizeof(*kp) * c; @@ -362,7 +360,7 @@ out: ret = -EFAULT; kfree(kp); - +out: return ret; } diff --git a/net/mptcp/options.c b/net/mptcp/options.c index be3b918a6d15..aead331866a0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -765,6 +765,7 @@ static noinline bool mptcp_established_options_rst(struct sock *sk, struct sk_bu opts->suboptions |= OPTION_MPTCP_RST; opts->reset_transient = subflow->reset_transient; opts->reset_reason = subflow->reset_reason; + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); return true; } @@ -788,6 +789,7 @@ static bool mptcp_established_options_fastclose(struct sock *sk, opts->rcvr_key = msk->remote_key; pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } @@ -809,6 +811,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk, opts->fail_seq = subflow->map_seq; pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; } @@ -833,13 +836,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); } /* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */ if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) { *size += opt_size; remaining -= opt_size; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX); } return true; } @@ -966,7 +967,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, goto reset; subflow->mp_capable = 0; pr_fallback(msk); - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); return false; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 59a85220edc9..45e2a48397b9 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -299,23 +299,21 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct sock *s = (struct sock *)msk; pr_debug("fail_seq=%llu", fail_seq); if (!READ_ONCE(msk->allow_infinite_fallback)) return; - if (!READ_ONCE(subflow->mp_fail_response_expect)) { + if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map"); subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); subflow->send_infinite_map = 1; - } else if (!sock_flag(sk, SOCK_DEAD)) { + tcp_send_ack(sk); + } else { pr_debug("MP_FAIL response received"); - - sk_stop_timer(s, &s->sk_timer); + WRITE_ONCE(subflow->fail_tout, 0); } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 17e13396024a..e475212f2618 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -500,7 +500,7 @@ static void mptcp_set_timeout(struct sock *sk) __mptcp_set_timeout(sk, tout); } -static bool tcp_can_send_ack(const struct sock *ssk) +static inline bool tcp_can_send_ack(const struct sock *ssk) { return !((1 << inet_sk_state_load(ssk)) & (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); @@ -1245,7 +1245,7 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); mptcp_subflow_ctx(ssk)->send_infinite_map = 0; pr_fallback(msk); - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); } static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, @@ -2175,21 +2175,6 @@ static void mptcp_retransmit_timer(struct timer_list *t) sock_put(sk); } -static struct mptcp_subflow_context * -mp_fail_response_expect_subflow(struct mptcp_sock *msk) -{ - struct mptcp_subflow_context *subflow, *ret = NULL; - - mptcp_for_each_subflow(msk, subflow) { - if (READ_ONCE(subflow->mp_fail_response_expect)) { - ret = subflow; - break; - } - } - - return ret; -} - static void mptcp_timeout_timer(struct timer_list *t) { struct sock *sk = from_timer(sk, t, sk_timer); @@ -2346,6 +2331,11 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, kfree_rcu(subflow, rcu); } else { /* otherwise tcp will dispose of the ssk and subflow ctx */ + if (ssk->sk_state == TCP_LISTEN) { + tcp_set_state(ssk, TCP_CLOSE); + mptcp_subflow_queue_clean(ssk); + inet_csk_listen_stop(ssk); + } __tcp_close(ssk, 0); /* close acquired an extra ref */ @@ -2518,27 +2508,50 @@ reset_timer: mptcp_reset_timer(sk); } +/* schedule the timeout timer for the relevant event: either close timeout + * or mp_fail timeout. The close timeout takes precedence on the mp_fail one + */ +void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout) +{ + struct sock *sk = (struct sock *)msk; + unsigned long timeout, close_timeout; + + if (!fail_tout && !sock_flag(sk, SOCK_DEAD)) + return; + + close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN; + + /* the close timeout takes precedence on the fail one, and here at least one of + * them is active + */ + timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout; + + sk_reset_timer(sk, &sk->sk_timer, timeout); +} + static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) { - struct mptcp_subflow_context *subflow; - struct sock *ssk; + struct sock *ssk = msk->first; bool slow; - subflow = mp_fail_response_expect_subflow(msk); - if (subflow) { - pr_debug("MP_FAIL doesn't respond, reset the subflow"); + if (!ssk) + return; - ssk = mptcp_subflow_tcp_sock(subflow); - slow = lock_sock_fast(ssk); - mptcp_subflow_reset(ssk); - unlock_sock_fast(ssk, slow); - } + pr_debug("MP_FAIL doesn't respond, reset the subflow"); + + slow = lock_sock_fast(ssk); + mptcp_subflow_reset(ssk); + WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0); + unlock_sock_fast(ssk, slow); + + mptcp_reset_timeout(msk, 0); } static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); struct sock *sk = &msk->sk.icsk_inet.sk; + unsigned long fail_tout; int state; lock_sock(sk); @@ -2575,7 +2588,9 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) __mptcp_retrans(sk); - mptcp_mp_fail_no_response(msk); + fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0; + if (fail_tout && time_after(jiffies, fail_tout)) + mptcp_mp_fail_no_response(msk); unlock: release_sock(sk); @@ -2822,6 +2837,7 @@ static void __mptcp_destroy_sock(struct sock *sk) static void mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk = mptcp_sk(sk); bool do_cancel_work = false; lock_sock(sk); @@ -2840,10 +2856,16 @@ static void mptcp_close(struct sock *sk, long timeout) cleanup: /* orphan all the subflows */ inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; - mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + mptcp_for_each_subflow(msk, subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); bool slow = lock_sock_fast_nested(ssk); + /* since the close timeout takes precedence on the fail one, + * cancel the latter + */ + if (ssk == msk->first) + subflow->fail_tout = 0; + sock_orphan(ssk); unlock_sock_fast(ssk, slow); } @@ -2852,13 +2874,13 @@ cleanup: sock_hold(sk); pr_debug("msk=%p state=%d", sk, sk->sk_state); if (mptcp_sk(sk)->token) - mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); + mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL); if (sk->sk_state == TCP_CLOSE) { __mptcp_destroy_sock(sk); do_cancel_work = true; } else { - sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN); + mptcp_reset_timeout(msk, 0); } release_sock(sk); if (do_cancel_work) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 200f89f6d62f..c14d70c036d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -306,6 +306,7 @@ struct mptcp_sock { u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + struct mptcp_sock *dl_next; }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -468,7 +469,6 @@ struct mptcp_subflow_context { local_id_valid : 1, /* local_id is correctly initialized */ valid_csum_seen : 1; /* at least one csum validated */ enum mptcp_data_avail data_avail; - bool mp_fail_response_expect; u32 remote_nonce; u64 thmac; u32 local_nonce; @@ -482,6 +482,7 @@ struct mptcp_subflow_context { u8 stale_count; long delegated_status; + unsigned long fail_tout; ); @@ -608,6 +609,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); @@ -662,6 +664,7 @@ void mptcp_get_options(const struct sk_buff *skb, void mptcp_finish_connect(struct sock *sk); void __mptcp_set_connected(struct sock *sk); +void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout); static inline bool mptcp_is_fully_established(struct sock *sk) { return inet_sk_state_load(sk) == TCP_ESTABLISHED && @@ -926,12 +929,25 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk) set_bit(MPTCP_FALLBACK_DONE, &msk->flags); } -static inline void mptcp_do_fallback(struct sock *sk) +static inline void mptcp_do_fallback(struct sock *ssk) { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct mptcp_sock *msk = mptcp_sk(subflow->conn); + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct sock *sk = subflow->conn; + struct mptcp_sock *msk; + msk = mptcp_sk(sk); __mptcp_do_fallback(msk); + if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { + gfp_t saved_allocation = ssk->sk_allocation; + + /* we are in a atomic (BH) scope, override ssk default for data + * fin allocation + */ + ssk->sk_allocation = GFP_ATOMIC; + ssk->sk_shutdown |= SEND_SHUTDOWN; + tcp_shutdown(ssk, SEND_SHUTDOWN); + ssk->sk_allocation = saved_allocation; + } } #define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 8841e8cd9ad8..63e8892ec807 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -843,7 +843,8 @@ enum mapping_status { MAPPING_INVALID, MAPPING_EMPTY, MAPPING_DATA_FIN, - MAPPING_DUMMY + MAPPING_DUMMY, + MAPPING_BAD_CSUM }; static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) @@ -958,11 +959,7 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff * subflow->map_data_csum); if (unlikely(csum)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); - if (subflow->mp_join || subflow->valid_csum_seen) { - subflow->send_mp_fail = 1; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX); - } - return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + return MAPPING_BAD_CSUM; } subflow->valid_csum_seen = 1; @@ -974,7 +971,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool csum_reqd = READ_ONCE(msk->csum_enabled); - struct sock *sk = (struct sock *)msk; struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@ -1016,9 +1012,6 @@ static enum mapping_status get_mapping_status(struct sock *ssk, pr_debug("infinite mapping received"); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX); subflow->map_data_len = 0; - if (!sock_flag(ssk, SOCK_DEAD)) - sk_stop_timer(sk, &sk->sk_timer); - return MAPPING_INVALID; } @@ -1165,6 +1158,33 @@ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) return !subflow->fully_established; } +static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + unsigned long fail_tout; + + /* greceful failure can happen only on the MPC subflow */ + if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) + return; + + /* since the close timeout take precedence on the fail one, + * no need to start the latter when the first is already set + */ + if (sock_flag((struct sock *)msk, SOCK_DEAD)) + return; + + /* we don't need extreme accuracy here, use a zero fail_tout as special + * value meaning no fail timeout at all; + */ + fail_tout = jiffies + TCP_RTO_MAX; + if (!fail_tout) + fail_tout = 1; + WRITE_ONCE(subflow->fail_tout, fail_tout); + tcp_send_ack(ssk); + + mptcp_reset_timeout(msk, subflow->fail_tout); +} + static bool subflow_check_data_avail(struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -1184,10 +1204,8 @@ static bool subflow_check_data_avail(struct sock *ssk) status = get_mapping_status(ssk, msk); trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue)); - if (unlikely(status == MAPPING_INVALID)) - goto fallback; - - if (unlikely(status == MAPPING_DUMMY)) + if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY || + status == MAPPING_BAD_CSUM)) goto fallback; if (status != MAPPING_OK) @@ -1229,22 +1247,17 @@ no_data: fallback: if (!__mptcp_check_fallback(msk)) { /* RFC 8684 section 3.7. */ - if (subflow->send_mp_fail) { + if (status == MAPPING_BAD_CSUM && + (subflow->mp_join || subflow->valid_csum_seen)) { + subflow->send_mp_fail = 1; + if (!READ_ONCE(msk->allow_infinite_fallback)) { - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; - tcp_send_active_reset(ssk, GFP_ATOMIC); - while ((skb = skb_peek(&ssk->sk_receive_queue))) - sk_eat_skb(ssk, skb); - } else if (!sock_flag(ssk, SOCK_DEAD)) { - WRITE_ONCE(subflow->mp_fail_response_expect, true); - sk_reset_timer((struct sock *)msk, - &((struct sock *)msk)->sk_timer, - jiffies + TCP_RTO_MAX); + goto reset; } - WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); + mptcp_subflow_fail(msk, ssk); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); return true; } @@ -1252,16 +1265,20 @@ fallback: /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ - ssk->sk_err = EBADMSG; - tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMPTCP; + +reset: + ssk->sk_err = EBADMSG; + tcp_set_state(ssk, TCP_CLOSE); + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); tcp_send_active_reset(ssk, GFP_ATOMIC); WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA); return false; } - __mptcp_do_fallback(msk); + mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); @@ -1706,6 +1723,58 @@ static void subflow_state_change(struct sock *sk) } } +void mptcp_subflow_queue_clean(struct sock *listener_ssk) +{ + struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue; + struct mptcp_sock *msk, *next, *head = NULL; + struct request_sock *req; + + /* build a list of all unaccepted mptcp sockets */ + spin_lock_bh(&queue->rskq_lock); + for (req = queue->rskq_accept_head; req; req = req->dl_next) { + struct mptcp_subflow_context *subflow; + struct sock *ssk = req->sk; + struct mptcp_sock *msk; + + if (!sk_is_mptcp(ssk)) + continue; + + subflow = mptcp_subflow_ctx(ssk); + if (!subflow || !subflow->conn) + continue; + + /* skip if already in list */ + msk = mptcp_sk(subflow->conn); + if (msk->dl_next || msk == head) + continue; + + msk->dl_next = head; + head = msk; + } + spin_unlock_bh(&queue->rskq_lock); + if (!head) + return; + + /* can't acquire the msk socket lock under the subflow one, + * or will cause ABBA deadlock + */ + release_sock(listener_ssk); + + for (msk = head; msk; msk = next) { + struct sock *sk = (struct sock *)msk; + bool slow; + + slow = lock_sock_fast_nested(sk); + next = msk->dl_next; + msk->first = NULL; + msk->dl_next = NULL; + unlock_sock_fast(sk, slow); + } + + /* we are still under the listener msk socket lock */ + lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING); +} + static int subflow_ulp_init(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 78814417d753..80713febfac6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -1803,7 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; - if (np && of_get_property(np, "mlx,multi-host", NULL)) + if (np && (of_get_property(np, "mellanox,multi-host", NULL) || + of_get_property(np, "mlx,multi-host", NULL))) ndp->mlx_multi_host = true; } diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 53f40e473855..3ddce24ac76d 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -25,9 +25,7 @@ static noinline void __nft_trace_packet(struct nft_traceinfo *info, const struct nft_chain *chain, enum nft_trace_types type) { - const struct nft_pktinfo *pkt = info->pkt; - - if (!info->trace || !pkt->skb->nf_trace) + if (!info->trace || !info->nf_trace) return; info->chain = chain; @@ -42,11 +40,24 @@ static inline void nft_trace_packet(struct nft_traceinfo *info, enum nft_trace_types type) { if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + info->nf_trace = pkt->skb->nf_trace; info->rule = rule; __nft_trace_packet(info, chain, type); } } +static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info) +{ + if (static_branch_unlikely(&nft_trace_enabled)) { + const struct nft_pktinfo *pkt = info->pkt; + + if (info->trace) + info->nf_trace = pkt->skb->nf_trace; + } +} + static void nft_bitwise_fast_eval(const struct nft_expr *expr, struct nft_regs *regs) { @@ -85,6 +96,7 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, const struct nft_chain *chain, const struct nft_regs *regs) { + const struct nft_pktinfo *pkt = info->pkt; enum nft_trace_types type; switch (regs->verdict.code) { @@ -92,8 +104,13 @@ static noinline void __nft_trace_verdict(struct nft_traceinfo *info, case NFT_RETURN: type = NFT_TRACETYPE_RETURN; break; + case NF_STOLEN: + type = NFT_TRACETYPE_RULE; + /* can't access skb->nf_trace; use copy */ + break; default: type = NFT_TRACETYPE_RULE; + info->nf_trace = pkt->skb->nf_trace; break; } @@ -254,6 +271,7 @@ next_rule: switch (regs.verdict.code) { case NFT_BREAK: regs.verdict.code = NFT_CONTINUE; + nft_trace_copy_nftrace(&info); continue; case NFT_CONTINUE: nft_trace_packet(&info, chain, rule, diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index 5041725423c2..1163ba9c1401 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -7,7 +7,7 @@ #include <linux/module.h> #include <linux/static_key.h> #include <linux/hash.h> -#include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/if_vlan.h> #include <linux/init.h> #include <linux/skbuff.h> @@ -25,22 +25,6 @@ DEFINE_STATIC_KEY_FALSE(nft_trace_enabled); EXPORT_SYMBOL_GPL(nft_trace_enabled); -static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb) -{ - __be32 id; - - /* using skb address as ID results in a limited number of - * values (and quick reuse). - * - * So we attempt to use as many skb members that will not - * change while skb is with netfilter. - */ - id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb), - skb->skb_iif); - - return nla_put_be32(nlskb, NFTA_TRACE_ID, id); -} - static int trace_fill_header(struct sk_buff *nlskb, u16 type, const struct sk_buff *skb, int off, unsigned int len) @@ -186,6 +170,7 @@ void nft_trace_notify(struct nft_traceinfo *info) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned int size; + u32 mark = 0; u16 event; if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE)) @@ -229,7 +214,7 @@ void nft_trace_notify(struct nft_traceinfo *info) if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type))) goto nla_put_failure; - if (trace_fill_id(skb, pkt->skb)) + if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid)) goto nla_put_failure; if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) @@ -249,16 +234,24 @@ void nft_trace_notify(struct nft_traceinfo *info) case NFT_TRACETYPE_RULE: if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict)) goto nla_put_failure; + + /* pkt->skb undefined iff NF_STOLEN, disable dump */ + if (info->verdict->code == NF_STOLEN) + info->packet_dumped = true; + else + mark = pkt->skb->mark; + break; case NFT_TRACETYPE_POLICY: + mark = pkt->skb->mark; + if (nla_put_be32(skb, NFTA_TRACE_POLICY, htonl(info->basechain->policy))) goto nla_put_failure; break; } - if (pkt->skb->mark && - nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark))) + if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark))) goto nla_put_failure; if (!info->packet_dumped) { @@ -283,9 +276,20 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, const struct nft_verdict *verdict, const struct nft_chain *chain) { + static siphash_key_t trace_key __read_mostly; + struct sk_buff *skb = pkt->skb; + info->basechain = nft_base_chain(chain); info->trace = true; + info->nf_trace = pkt->skb->nf_trace; info->packet_dumped = false; info->pkt = pkt; info->verdict = verdict; + + net_get_random_once(&trace_key, sizeof(trace_key)); + + info->skbid = (u32)siphash_3u32(hash32_ptr(skb), + skb_get_hash(skb), + skb->skb_iif, + &trace_key); } diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index df40314de21f..76de6c8d9865 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -143,6 +143,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key, /* Another cpu may race to insert the element with the same key */ if (prev) { nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); he = prev; } @@ -152,6 +153,7 @@ out: err2: nft_set_elem_destroy(set, he, true); + atomic_dec(&set->nelems); err1: return false; } diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index b3138fc2e552..f06ddbed3fed 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -31,89 +31,89 @@ static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); sk->sk_timer.function = rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; - add_timer(&sk->sk_timer); + sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires); } void rose_start_t1timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t2timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_t3timer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_hbtimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->timer); + sk_stop_timer(sk, &rose->timer); rose->timer.function = rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; - add_timer(&rose->timer); + sk_reset_timer(sk, &rose->timer, rose->timer.expires); } void rose_start_idletimer(struct sock *sk) { struct rose_sock *rose = rose_sk(sk); - del_timer(&rose->idletimer); + sk_stop_timer(sk, &rose->idletimer); if (rose->idle > 0) { rose->idletimer.function = rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; - add_timer(&rose->idletimer); + sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires); } } void rose_stop_heartbeat(struct sock *sk) { - del_timer(&sk->sk_timer); + sk_stop_timer(sk, &sk->sk_timer); } void rose_stop_timer(struct sock *sk) { - del_timer(&rose_sk(sk)->timer); + sk_stop_timer(sk, &rose_sk(sk)->timer); } void rose_stop_idletimer(struct sock *sk) { - del_timer(&rose_sk(sk)->idletimer); + sk_stop_timer(sk, &rose_sk(sk)->idletimer); } static void rose_heartbeat_expiry(struct timer_list *t) @@ -130,6 +130,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { bh_unlock_sock(sk); rose_destroy_socket(sk); + sock_put(sk); return; } break; @@ -152,6 +153,7 @@ static void rose_heartbeat_expiry(struct timer_list *t) rose_start_heartbeat(sk); bh_unlock_sock(sk); + sock_put(sk); } static void rose_timer_expiry(struct timer_list *t) @@ -181,6 +183,7 @@ static void rose_timer_expiry(struct timer_list *t) break; } bh_unlock_sock(sk); + sock_put(sk); } static void rose_idletimer_expiry(struct timer_list *t) @@ -205,4 +208,5 @@ static void rose_idletimer_expiry(struct timer_list *t) sock_set_flag(sk, SOCK_DEAD); } bh_unlock_sock(sk); + sock_put(sk); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index da9733da9868..817065aa2833 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -588,7 +588,8 @@ static int tcf_idr_release_unsafe(struct tc_action *p) } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, - const struct tc_action_ops *ops) + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; @@ -604,20 +605,25 @@ static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; + ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); - if (ret == ACT_P_DELETED) { + if (ret == ACT_P_DELETED) module_put(ops->owner); - n_i++; - } else if (ret < 0) { - mutex_unlock(&idrinfo->lock); - goto nla_put_failure; - } + else if (ret < 0) + break; + n_i++; } mutex_unlock(&idrinfo->lock); + if (ret < 0) { + if (n_i) + NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); + else + goto nla_put_failure; + } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) @@ -638,7 +644,7 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(idrinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { diff --git a/net/socket.c b/net/socket.c index 2bc8773d9dc5..96300cdc0625 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2149,10 +2149,13 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, struct sockaddr __user *addr, int __user *addr_len) { + struct sockaddr_storage address; + struct msghdr msg = { + /* Save some cycles and don't copy the address if not needed */ + .msg_name = addr ? (struct sockaddr *)&address : NULL, + }; struct socket *sock; struct iovec iov; - struct msghdr msg; - struct sockaddr_storage address; int err, err2; int fput_needed; @@ -2163,14 +2166,6 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags, if (!sock) goto out; - msg.msg_control = NULL; - msg.msg_controllen = 0; - /* Save some cycles and don't copy the address if not needed */ - msg.msg_name = addr ? (struct sockaddr *)&address : NULL; - /* We assume all kernel code knows the size of sockaddr_storage */ - msg.msg_namelen = 0; - msg.msg_iocb = NULL; - msg.msg_flags = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, flags); @@ -2375,6 +2370,7 @@ int __copy_msghdr_from_user(struct msghdr *kmsg, return -EFAULT; kmsg->msg_control_is_user = true; + kmsg->msg_get_inq = 0; kmsg->msg_control_user = msg.msg_control; kmsg->msg_controllen = msg.msg_controllen; kmsg->msg_flags = msg.msg_flags; diff --git a/net/tipc/node.c b/net/tipc/node.c index 6ef95ce565bd..b48d97cbbe29 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -472,8 +472,8 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, bool preliminary) { struct tipc_net *tn = net_generic(net, tipc_net_id); + struct tipc_link *l, *snd_l = tipc_bc_sndlink(net); struct tipc_node *n, *temp_node; - struct tipc_link *l; unsigned long intv; int bearer_id; int i; @@ -488,6 +488,16 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u8 *peer_id, goto exit; /* A preliminary node becomes "real" now, refresh its data */ tipc_node_write_lock(n); + if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link refresh failed, no memory\n"); + tipc_node_write_unlock_fast(n); + tipc_node_put(n); + n = NULL; + goto exit; + } n->preliminary = false; n->addr = addr; hlist_del_rcu(&n->hash); @@ -567,7 +577,16 @@ update: n->signature = INVALID_NODE_SIG; n->active_links[0] = INVALID_BEARER_ID; n->active_links[1] = INVALID_BEARER_ID; - n->bc_entry.link = NULL; + if (!preliminary && + !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX, + tipc_link_min_win(snd_l), tipc_link_max_win(snd_l), + n->capabilities, &n->bc_entry.inputq1, + &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) { + pr_warn("Broadcast rcv link creation failed, no memory\n"); + kfree(n); + n = NULL; + goto exit; + } tipc_node_get(n); timer_setup(&n->timer, tipc_node_timeout, 0); /* Start a slow timer anyway, crypto needs it */ @@ -1155,7 +1174,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, bool *respond, bool *dupl_addr) { struct tipc_node *n; - struct tipc_link *l, *snd_l; + struct tipc_link *l; struct tipc_link_entry *le; bool addr_match = false; bool sign_match = false; @@ -1175,22 +1194,6 @@ void tipc_node_check_dest(struct net *net, u32 addr, return; tipc_node_write_lock(n); - if (unlikely(!n->bc_entry.link)) { - snd_l = tipc_bc_sndlink(net); - if (!tipc_link_bc_create(net, tipc_own_addr(net), - addr, peer_id, U16_MAX, - tipc_link_min_win(snd_l), - tipc_link_max_win(snd_l), - n->capabilities, - &n->bc_entry.inputq1, - &n->bc_entry.namedq, snd_l, - &n->bc_entry.link)) { - pr_warn("Broadcast rcv link creation failed, no mem\n"); - tipc_node_write_unlock_fast(n); - tipc_node_put(n); - return; - } - } le = &n->links[b->identity]; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 17f8c523e33b..43509c7e90fc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -502,6 +502,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, sock_init_data(sock, sk); tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { + sk_free(sk); pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index af293ea1542c..e172d89e92e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -4,6 +4,7 @@ * Tests for sockmap/sockhash holding kTLS sockets. */ +#include <netinet/tcp.h> #include "test_progs.h" #define MAX_TEST_NAME 80 @@ -92,9 +93,78 @@ close_srv: close(srv); } +static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) +{ + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + struct sockaddr_in6 *v6; + struct sockaddr_in *v4; + int err, s, zero = 0; + + switch (family) { + case AF_INET: + v4 = (struct sockaddr_in *)&addr; + v4->sin_family = AF_INET; + break; + case AF_INET6: + v6 = (struct sockaddr_in6 *)&addr; + v6->sin6_family = AF_INET6; + break; + default: + PRINT_FAIL("unsupported socket family %d", family); + return; + } + + s = socket(family, SOCK_STREAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + return; + + err = bind(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + goto close; + + err = getsockname(s, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + goto close; + + err = connect(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "connect")) + goto close; + + /* save sk->sk_prot and set it to tls_prots */ + err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto close; + + /* sockmap update should not affect saved sk_prot */ + err = bpf_map_update_elem(map, &zero, &s, BPF_ANY); + if (!ASSERT_ERR(err, "sockmap update elem")) + goto close; + + /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */ + err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + ASSERT_OK(err, "setsockopt(TCP_NODELAY)"); + +close: + close(s); +} + +static const char *fmt_test_name(const char *subtest_name, int family, + enum bpf_map_type map_type) +{ + const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"; + const char *family_str = AF_INET ? "IPv4" : "IPv6"; + static char test_name[MAX_TEST_NAME]; + + snprintf(test_name, MAX_TEST_NAME, + "sockmap_ktls %s %s %s", + subtest_name, family_str, map_type_str); + + return test_name; +} + static void run_tests(int family, enum bpf_map_type map_type) { - char test_name[MAX_TEST_NAME]; int map; map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); @@ -103,14 +173,10 @@ static void run_tests(int family, enum bpf_map_type map_type) return; } - snprintf(test_name, MAX_TEST_NAME, - "sockmap_ktls disconnect_after_delete %s %s", - family == AF_INET ? "IPv4" : "IPv6", - map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"); - if (!test__start_subtest(test_name)) - return; - - test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) + test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) + test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7ea54af55490..ddad703ace34 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -54,7 +54,7 @@ TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile index 8a69c91fcca0..8ccaf8732eb2 100644 --- a/tools/testing/selftests/net/bpf/Makefile +++ b/tools/testing/selftests/net/bpf/Makefile @@ -2,7 +2,7 @@ CLANG ?= clang CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../lib +CCINCLUDE += -I../../../../lib CCINCLUDE += -I../../../../../usr/include/ TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 9dd43d7d957b..515859a5168b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -61,6 +61,39 @@ chk_msk_nr() __chk_nr "grep -c token:" $* } +wait_msk_nr() +{ + local condition="grep -c token:" + local expected=$1 + local timeout=20 + local msg nr + local max=0 + local i=0 + + shift 1 + msg=$* + + while [ $i -lt $timeout ]; do + nr=$(ss -inmHMN $ns | $condition) + [ $nr == $expected ] && break; + [ $nr -gt $max ] && max=$nr + i=$((i + 1)) + sleep 1 + done + + printf "%-50s" "$msg" + if [ $i -ge $timeout ]; then + echo "[ fail ] timeout while expecting $expected max $max last $nr" + ret=$test_cnt + elif [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi + test_cnt=$((test_cnt+1)) +} + chk_msk_fallback_nr() { __chk_nr "grep -c fallback" $* @@ -146,7 +179,7 @@ ip -n $ns link set dev lo up echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" @@ -155,7 +188,7 @@ chk_msk_listen 10000 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " @@ -167,13 +200,13 @@ flush_pids echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" @@ -184,7 +217,7 @@ for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -l -w 10 \ + ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done wait_local_port_listen $ns $((NR_CLIENTS + 10001)) @@ -193,12 +226,11 @@ for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -w 10 \ + ./mptcp_connect -p $((I+10001)) -w 20 \ -t ${timeout_poll} 127.0.0.1 >/dev/null & done -sleep 1.5 -chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" flush_pids exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 8628aa61b763..e2ea6c126c99 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -265,7 +265,7 @@ static void sock_test_tcpulp(int sock, int proto, unsigned int line) static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 29f75e2a1116..8672d898f8cd 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -88,7 +88,7 @@ static void xgetaddrinfo(const char *node, const char *service, static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index ac9a4d9c1764..ae61f39556ca 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -136,7 +136,7 @@ static void xgetaddrinfo(const char *node, const char *service, static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c new file mode 100644 index 000000000000..fa83918b62d1 --- /dev/null +++ b/tools/testing/selftests/net/tun.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +static int tun_attach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_ATTACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_detach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_DETACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_alloc(char *dev) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + fprintf(stderr, "can't open tun: %s\n", strerror(errno)); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE; + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err < 0) { + fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno)); + close(fd); + return err; + } + strcpy(dev, ifr.ifr_name); + return fd; +} + +static int tun_delete(char *dev) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg ifm; + unsigned char data[64]; + } req; + struct rtattr *rta; + int ret, rtnl; + + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (rtnl < 0) { + fprintf(stderr, "can't open rtnl: %s\n", strerror(errno)); + return 1; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm))); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_DELLINK; + + req.ifm.ifi_family = AF_UNSPEC; + + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); + rta->rta_type = IFLA_IFNAME; + rta->rta_len = RTA_LENGTH(IFNAMSIZ); + req.nh.nlmsg_len += rta->rta_len; + memcpy(RTA_DATA(rta), dev, IFNAMSIZ); + + ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + if (ret < 0) + fprintf(stderr, "can't send: %s\n", strerror(errno)); + ret = (unsigned int)ret != req.nh.nlmsg_len; + + close(rtnl); + return ret; +} + +FIXTURE(tun) +{ + char ifname[IFNAMSIZ]; + int fd, fd2; +}; + +FIXTURE_SETUP(tun) +{ + memset(self->ifname, 0, sizeof(self->ifname)); + + self->fd = tun_alloc(self->ifname); + ASSERT_GE(self->fd, 0); + + self->fd2 = tun_alloc(self->ifname); + ASSERT_GE(self->fd2, 0); +} + +FIXTURE_TEARDOWN(tun) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fd2 >= 0) + close(self->fd2); +} + +TEST_F(tun, delete_detach_close) { + EXPECT_EQ(tun_delete(self->ifname), 0); + EXPECT_EQ(tun_detach(self->fd, self->ifname), -1); + EXPECT_EQ(errno, 22); +} + +TEST_F(tun, detach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, detach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e..dc932fd65363 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index b24494c6f546..c652e8c1157d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -609,5 +609,82 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "7f52", + "name": "Try to flush action which is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + "$TC actions add action pass index 1", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "1", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] + }, + { + "id": "ae1e", + "name": "Try to flush actions when last one is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + [ + "$TC actions add action pass index 1", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 2", + "$TC actions add action drop index 3", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] } ] |