From 747bda7bb5b1644a06734900326847a5d353c448 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:00 +0200 Subject: fbdev/deferred-io: Provide get_page hook in struct fb_deferred_io Add a callback for drivers to provide framebuffer pages to fbdev's deferred-I/O helpers. Implementations need to acquire a reference on the page before returning it. Returning NULL generates a SIGBUS signal. This will be useful for DRM's fbdev emulation with GEM-shmem buffer objects. v2: - fix typo in commit message (Javier) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-8-tzimmermann@suse.de --- include/linux/fb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/fb.h b/include/linux/fb.h index 811e47f9d1c3..5358edbb9c0b 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -225,6 +225,7 @@ struct fb_deferred_io { struct mutex lock; /* mutex that protects the pageref list */ struct list_head pagereflist; /* list of pagerefs for touched pages */ /* callback */ + struct page *(*get_page)(struct fb_info *info, unsigned long offset); void (*deferred_io)(struct fb_info *info, struct list_head *pagelist); }; #endif -- cgit v1.2.3 From 150f431a08317e0e0363a7f9147b6246d3b40ba6 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:01 +0200 Subject: drm/fbdev: Add fbdev-shmem Add an fbdev emulation for SHMEM-based memory managers. The code is similar to fbdev-generic, but does not require an additional shadow buffer for mmap(). Fbdev-shmem operates directly on the buffer object's SHMEM pages. Fbdev's deferred-I/O mechanism updates the hardware state on write operations. The memory pages of GEM SHMEM cannot be detected by fbdefio. Therefore fbdev-shmem implements the .get_page() hook in struct fb_deferred_io. The fbdefio helpers call this hook to retrieve the page directly from fbdev-shmem instead of trying to detect it internally. v3: - clarify on get_page mechanism in commit description (Javier) v2: - use drm_driver_legacy_fb_format() (Geert) Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-9-tzimmermann@suse.de --- drivers/gpu/drm/Makefile | 1 + drivers/gpu/drm/drm_fbdev_shmem.c | 316 ++++++++++++++++++++++++++++++++++++++ include/drm/drm_fbdev_shmem.h | 15 ++ 3 files changed, 332 insertions(+) create mode 100644 drivers/gpu/drm/drm_fbdev_shmem.c create mode 100644 include/drm/drm_fbdev_shmem.h (limited to 'include') diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index f9ca4f8fa6c5..fc793c6dd299 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -108,6 +108,7 @@ drm_dma_helper-$(CONFIG_DRM_KMS_HELPER) += drm_fb_dma_helper.o obj-$(CONFIG_DRM_GEM_DMA_HELPER) += drm_dma_helper.o drm_shmem_helper-y := drm_gem_shmem_helper.o +drm_shmem_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_shmem.o obj-$(CONFIG_DRM_GEM_SHMEM_HELPER) += drm_shmem_helper.o drm_suballoc_helper-y := drm_suballoc.o diff --git a/drivers/gpu/drm/drm_fbdev_shmem.c b/drivers/gpu/drm/drm_fbdev_shmem.c new file mode 100644 index 000000000000..a85a8a8e2eb8 --- /dev/null +++ b/drivers/gpu/drm/drm_fbdev_shmem.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: MIT + +#include + +#include +#include +#include +#include +#include +#include + +#include + +/* + * struct fb_ops + */ + +static int drm_fbdev_shmem_fb_open(struct fb_info *info, int user) +{ + struct drm_fb_helper *fb_helper = info->par; + + /* No need to take a ref for fbcon because it unbinds on unregister */ + if (user && !try_module_get(fb_helper->dev->driver->fops->owner)) + return -ENODEV; + + return 0; +} + +static int drm_fbdev_shmem_fb_release(struct fb_info *info, int user) +{ + struct drm_fb_helper *fb_helper = info->par; + + if (user) + module_put(fb_helper->dev->driver->fops->owner); + + return 0; +} + +FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(drm_fbdev_shmem, + drm_fb_helper_damage_range, + drm_fb_helper_damage_area); + +static int drm_fbdev_shmem_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_framebuffer *fb = fb_helper->fb; + struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + + if (shmem->map_wc) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + return fb_deferred_io_mmap(info, vma); +} + +static void drm_fbdev_shmem_fb_destroy(struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + + if (!fb_helper->dev) + return; + + drm_fb_helper_fini(fb_helper); + + drm_client_buffer_vunmap(fb_helper->buffer); + drm_client_framebuffer_delete(fb_helper->buffer); + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); +} + +static const struct fb_ops drm_fbdev_shmem_fb_ops = { + .owner = THIS_MODULE, + .fb_open = drm_fbdev_shmem_fb_open, + .fb_release = drm_fbdev_shmem_fb_release, + __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_shmem), + DRM_FB_HELPER_DEFAULT_OPS, + __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_shmem), + .fb_mmap = drm_fbdev_shmem_fb_mmap, + .fb_destroy = drm_fbdev_shmem_fb_destroy, +}; + +static struct page *drm_fbdev_shmem_get_page(struct fb_info *info, unsigned long offset) +{ + struct drm_fb_helper *fb_helper = info->par; + struct drm_framebuffer *fb = fb_helper->fb; + struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); + struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); + unsigned int i = offset >> PAGE_SHIFT; + struct page *page; + + if (fb_WARN_ON_ONCE(info, offset > obj->size)) + return NULL; + + page = shmem->pages[i]; // protected by active vmap + if (page) + get_page(page); + fb_WARN_ON_ONCE(info, !page); + + return page; +} + +/* + * struct drm_fb_helper + */ + +static int drm_fbdev_shmem_helper_fb_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_dev *client = &fb_helper->client; + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer; + struct drm_gem_shmem_object *shmem; + struct drm_framebuffer *fb; + struct fb_info *info; + u32 format; + struct iosys_map map; + int ret; + + drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", + sizes->surface_width, sizes->surface_height, + sizes->surface_bpp); + + format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, sizes->surface_depth); + buffer = drm_client_framebuffer_create(client, sizes->surface_width, + sizes->surface_height, format); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + shmem = to_drm_gem_shmem_obj(buffer->gem); + + fb = buffer->fb; + + ret = drm_client_buffer_vmap(buffer, &map); + if (ret) { + goto err_drm_client_buffer_delete; + } else if (drm_WARN_ON(dev, map.is_iomem)) { + ret = -ENODEV; /* I/O memory not supported; use generic emulation */ + goto err_drm_client_buffer_delete; + } + + fb_helper->buffer = buffer; + fb_helper->fb = fb; + + info = drm_fb_helper_alloc_info(fb_helper); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto err_drm_client_buffer_vunmap; + } + + drm_fb_helper_fill_info(info, fb_helper, sizes); + + info->fbops = &drm_fbdev_shmem_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB; /* system memory */ + if (!shmem->map_wc) + info->flags |= FBINFO_READS_FAST; /* signal caching */ + info->screen_size = sizes->surface_height * fb->pitches[0]; + info->screen_buffer = map.vaddr; + info->fix.smem_len = info->screen_size; + + /* deferred I/O */ + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.get_page = drm_fbdev_shmem_get_page; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_drm_fb_helper_release_info; + + return 0; + +err_drm_fb_helper_release_info: + drm_fb_helper_release_info(fb_helper); +err_drm_client_buffer_vunmap: + fb_helper->fb = NULL; + fb_helper->buffer = NULL; + drm_client_buffer_vunmap(buffer); +err_drm_client_buffer_delete: + drm_client_framebuffer_delete(buffer); + return ret; +} + +static int drm_fbdev_shmem_helper_fb_dirty(struct drm_fb_helper *helper, + struct drm_clip_rect *clip) +{ + struct drm_device *dev = helper->dev; + int ret; + + /* Call damage handlers only if necessary */ + if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) + return 0; + + if (helper->fb->funcs->dirty) { + ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); + if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) + return ret; + } + + return 0; +} + +static const struct drm_fb_helper_funcs drm_fbdev_shmem_helper_funcs = { + .fb_probe = drm_fbdev_shmem_helper_fb_probe, + .fb_dirty = drm_fbdev_shmem_helper_fb_dirty, +}; + +/* + * struct drm_client_funcs + */ + +static void drm_fbdev_shmem_client_unregister(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + + if (fb_helper->info) { + drm_fb_helper_unregister_info(fb_helper); + } else { + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + } +} + +static int drm_fbdev_shmem_client_restore(struct drm_client_dev *client) +{ + drm_fb_helper_lastclose(client->dev); + + return 0; +} + +static int drm_fbdev_shmem_client_hotplug(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + struct drm_device *dev = client->dev; + int ret; + + if (dev->fb_helper) + return drm_fb_helper_hotplug_event(dev->fb_helper); + + ret = drm_fb_helper_init(dev, fb_helper); + if (ret) + goto err_drm_err; + + if (!drm_drv_uses_atomic_modeset(dev)) + drm_helper_disable_unused_functions(dev); + + ret = drm_fb_helper_initial_config(fb_helper); + if (ret) + goto err_drm_fb_helper_fini; + + return 0; + +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); +err_drm_err: + drm_err(dev, "fbdev-shmem: Failed to setup emulation (ret=%d)\n", ret); + return ret; +} + +static const struct drm_client_funcs drm_fbdev_shmem_client_funcs = { + .owner = THIS_MODULE, + .unregister = drm_fbdev_shmem_client_unregister, + .restore = drm_fbdev_shmem_client_restore, + .hotplug = drm_fbdev_shmem_client_hotplug, +}; + +/** + * drm_fbdev_shmem_setup() - Setup fbdev emulation for GEM SHMEM helpers + * @dev: DRM device + * @preferred_bpp: Preferred bits per pixel for the device. + * 32 is used if this is zero. + * + * This function sets up fbdev emulation for GEM DMA drivers that support + * dumb buffers with a virtual address and that can be mmap'ed. + * drm_fbdev_shmem_setup() shall be called after the DRM driver registered + * the new DRM device with drm_dev_register(). + * + * Restore, hotplug events and teardown are all taken care of. Drivers that do + * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves. + * Simple drivers might use drm_mode_config_helper_suspend(). + * + * This function is safe to call even when there are no connectors present. + * Setup will be retried on the next hotplug event. + * + * The fbdev is destroyed by drm_dev_unregister(). + */ +void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ + struct drm_fb_helper *fb_helper; + int ret; + + drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); + drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); + + fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); + if (!fb_helper) + return; + drm_fb_helper_prepare(dev, fb_helper, preferred_bpp, &drm_fbdev_shmem_helper_funcs); + + ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_shmem_client_funcs); + if (ret) { + drm_err(dev, "Failed to register client: %d\n", ret); + goto err_drm_client_init; + } + + drm_client_register(&fb_helper->client); + + return; + +err_drm_client_init: + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); +} +EXPORT_SYMBOL(drm_fbdev_shmem_setup); diff --git a/include/drm/drm_fbdev_shmem.h b/include/drm/drm_fbdev_shmem.h new file mode 100644 index 000000000000..fb43cadd1950 --- /dev/null +++ b/include/drm/drm_fbdev_shmem.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_FBDEV_SHMEM_H +#define DRM_FBDEV_SHMEM_H + +struct drm_device; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp); +#else +static inline void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ } +#endif + +#endif -- cgit v1.2.3 From aae4682e5d66c1e1dc181fa341652e037237f144 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:35 +0200 Subject: drm/fbdev-generic: Convert to fbdev-ttm Only TTM-based drivers use fbdev-generic. Rename it to fbdev-ttm and change the symbol infix from _generic_ to _ttm_. Link the source file into TTM helpers, so that it is only build if TTM-based drivers have been selected. Select DRM_TTM_HELPER for loongson. Signed-off-by: Thomas Zimmermann Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-43-tzimmermann@suse.de --- Documentation/gpu/drm-kms-helpers.rst | 2 +- drivers/gpu/drm/Makefile | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 +- drivers/gpu/drm/drm_fbdev_generic.c | 349 ------------------------ drivers/gpu/drm/drm_fbdev_ttm.c | 349 ++++++++++++++++++++++++ drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c | 4 +- drivers/gpu/drm/loongson/Kconfig | 1 + drivers/gpu/drm/loongson/lsdc_drv.c | 4 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 6 +- drivers/gpu/drm/qxl/qxl_drv.c | 4 +- drivers/gpu/drm/tiny/bochs.c | 4 +- drivers/gpu/drm/vboxvideo/vbox_drv.c | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 4 +- include/drm/drm_fbdev_generic.h | 15 - include/drm/drm_fbdev_ttm.h | 15 + 15 files changed, 386 insertions(+), 386 deletions(-) delete mode 100644 drivers/gpu/drm/drm_fbdev_generic.c create mode 100644 drivers/gpu/drm/drm_fbdev_ttm.c delete mode 100644 include/drm/drm_fbdev_generic.h create mode 100644 include/drm/drm_fbdev_ttm.h (limited to 'include') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index 59cfe8a7a8ba..e46ab9b670ac 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -116,7 +116,7 @@ fbdev Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c :export: -.. kernel-doc:: drivers/gpu/drm/drm_fbdev_generic.c +.. kernel-doc:: drivers/gpu/drm/drm_fbdev_ttm.c :export: format Helper Functions Reference diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index fc793c6dd299..68cc9258ffc4 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -118,6 +118,7 @@ drm_vram_helper-y := drm_gem_vram_helper.o obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o drm_ttm_helper-y := drm_gem_ttm_helper.o +drm_ttm_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fbdev_ttm.o obj-$(CONFIG_DRM_TTM_HELPER) += drm_ttm_helper.o # @@ -143,9 +144,7 @@ drm_kms_helper-y := \ drm_self_refresh_helper.o \ drm_simple_kms_helper.o drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o -drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += \ - drm_fbdev_generic.o \ - drm_fb_helper.o +drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o # diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ea14f1c8f430..4f76aa606ac9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include @@ -2318,9 +2318,9 @@ retry_init: !list_empty(&adev_to_drm(adev)->mode_config.connector_list)) { /* select 8 bpp console on low vram cards */ if (adev->gmc.real_vram_size <= (32*1024*1024)) - drm_fbdev_generic_setup(adev_to_drm(adev), 8); + drm_fbdev_ttm_setup(adev_to_drm(adev), 8); else - drm_fbdev_generic_setup(adev_to_drm(adev), 32); + drm_fbdev_ttm_setup(adev_to_drm(adev), 32); } ret = amdgpu_debugfs_init(adev); diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c deleted file mode 100644 index 97e579c33d84..000000000000 --- a/drivers/gpu/drm/drm_fbdev_generic.c +++ /dev/null @@ -1,349 +0,0 @@ -// SPDX-License-Identifier: MIT - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include - -/* @user: 1=userspace, 0=fbcon */ -static int drm_fbdev_generic_fb_open(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - - /* No need to take a ref for fbcon because it unbinds on unregister */ - if (user && !try_module_get(fb_helper->dev->driver->fops->owner)) - return -ENODEV; - - return 0; -} - -static int drm_fbdev_generic_fb_release(struct fb_info *info, int user) -{ - struct drm_fb_helper *fb_helper = info->par; - - if (user) - module_put(fb_helper->dev->driver->fops->owner); - - return 0; -} - -FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(drm_fbdev_generic, - drm_fb_helper_damage_range, - drm_fb_helper_damage_area); - -static void drm_fbdev_generic_fb_destroy(struct fb_info *info) -{ - struct drm_fb_helper *fb_helper = info->par; - void *shadow = info->screen_buffer; - - if (!fb_helper->dev) - return; - - fb_deferred_io_cleanup(info); - drm_fb_helper_fini(fb_helper); - vfree(shadow); - drm_client_framebuffer_delete(fb_helper->buffer); - - drm_client_release(&fb_helper->client); - drm_fb_helper_unprepare(fb_helper); - kfree(fb_helper); -} - -static const struct fb_ops drm_fbdev_generic_fb_ops = { - .owner = THIS_MODULE, - .fb_open = drm_fbdev_generic_fb_open, - .fb_release = drm_fbdev_generic_fb_release, - FB_DEFAULT_DEFERRED_OPS(drm_fbdev_generic), - DRM_FB_HELPER_DEFAULT_OPS, - .fb_destroy = drm_fbdev_generic_fb_destroy, -}; - -/* - * This function uses the client API to create a framebuffer backed by a dumb buffer. - */ -static int drm_fbdev_generic_helper_fb_probe(struct drm_fb_helper *fb_helper, - struct drm_fb_helper_surface_size *sizes) -{ - struct drm_client_dev *client = &fb_helper->client; - struct drm_device *dev = fb_helper->dev; - struct drm_client_buffer *buffer; - struct fb_info *info; - size_t screen_size; - void *screen_buffer; - u32 format; - int ret; - - drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", - sizes->surface_width, sizes->surface_height, - sizes->surface_bpp); - - format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); - buffer = drm_client_framebuffer_create(client, sizes->surface_width, - sizes->surface_height, format); - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - - fb_helper->buffer = buffer; - fb_helper->fb = buffer->fb; - - screen_size = buffer->gem->size; - screen_buffer = vzalloc(screen_size); - if (!screen_buffer) { - ret = -ENOMEM; - goto err_drm_client_framebuffer_delete; - } - - info = drm_fb_helper_alloc_info(fb_helper); - if (IS_ERR(info)) { - ret = PTR_ERR(info); - goto err_vfree; - } - - drm_fb_helper_fill_info(info, fb_helper, sizes); - - info->fbops = &drm_fbdev_generic_fb_ops; - - /* screen */ - info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; - info->screen_buffer = screen_buffer; - info->fix.smem_len = screen_size; - - /* deferred I/O */ - fb_helper->fbdefio.delay = HZ / 20; - fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; - - info->fbdefio = &fb_helper->fbdefio; - ret = fb_deferred_io_init(info); - if (ret) - goto err_drm_fb_helper_release_info; - - return 0; - -err_drm_fb_helper_release_info: - drm_fb_helper_release_info(fb_helper); -err_vfree: - vfree(screen_buffer); -err_drm_client_framebuffer_delete: - fb_helper->fb = NULL; - fb_helper->buffer = NULL; - drm_client_framebuffer_delete(buffer); - return ret; -} - -static void drm_fbdev_generic_damage_blit_real(struct drm_fb_helper *fb_helper, - struct drm_clip_rect *clip, - struct iosys_map *dst) -{ - struct drm_framebuffer *fb = fb_helper->fb; - size_t offset = clip->y1 * fb->pitches[0]; - size_t len = clip->x2 - clip->x1; - unsigned int y; - void *src; - - switch (drm_format_info_bpp(fb->format, 0)) { - case 1: - offset += clip->x1 / 8; - len = DIV_ROUND_UP(len + clip->x1 % 8, 8); - break; - case 2: - offset += clip->x1 / 4; - len = DIV_ROUND_UP(len + clip->x1 % 4, 4); - break; - case 4: - offset += clip->x1 / 2; - len = DIV_ROUND_UP(len + clip->x1 % 2, 2); - break; - default: - offset += clip->x1 * fb->format->cpp[0]; - len *= fb->format->cpp[0]; - break; - } - - src = fb_helper->info->screen_buffer + offset; - iosys_map_incr(dst, offset); /* go to first pixel within clip rect */ - - for (y = clip->y1; y < clip->y2; y++) { - iosys_map_memcpy_to(dst, 0, src, len); - iosys_map_incr(dst, fb->pitches[0]); - src += fb->pitches[0]; - } -} - -static int drm_fbdev_generic_damage_blit(struct drm_fb_helper *fb_helper, - struct drm_clip_rect *clip) -{ - struct drm_client_buffer *buffer = fb_helper->buffer; - struct iosys_map map, dst; - int ret; - - /* - * We have to pin the client buffer to its current location while - * flushing the shadow buffer. In the general case, concurrent - * modesetting operations could try to move the buffer and would - * fail. The modeset has to be serialized by acquiring the reservation - * object of the underlying BO here. - * - * For fbdev emulation, we only have to protect against fbdev modeset - * operations. Nothing else will involve the client buffer's BO. So it - * is sufficient to acquire struct drm_fb_helper.lock here. - */ - mutex_lock(&fb_helper->lock); - - ret = drm_client_buffer_vmap_local(buffer, &map); - if (ret) - goto out; - - dst = map; - drm_fbdev_generic_damage_blit_real(fb_helper, clip, &dst); - - drm_client_buffer_vunmap_local(buffer); - -out: - mutex_unlock(&fb_helper->lock); - - return ret; -} - -static int drm_fbdev_generic_helper_fb_dirty(struct drm_fb_helper *helper, - struct drm_clip_rect *clip) -{ - struct drm_device *dev = helper->dev; - int ret; - - /* Call damage handlers only if necessary */ - if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) - return 0; - - ret = drm_fbdev_generic_damage_blit(helper, clip); - if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret)) - return ret; - - if (helper->fb->funcs->dirty) { - ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); - if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) - return ret; - } - - return 0; -} - -static const struct drm_fb_helper_funcs drm_fbdev_generic_helper_funcs = { - .fb_probe = drm_fbdev_generic_helper_fb_probe, - .fb_dirty = drm_fbdev_generic_helper_fb_dirty, -}; - -static void drm_fbdev_generic_client_unregister(struct drm_client_dev *client) -{ - struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); - - if (fb_helper->info) { - drm_fb_helper_unregister_info(fb_helper); - } else { - drm_client_release(&fb_helper->client); - drm_fb_helper_unprepare(fb_helper); - kfree(fb_helper); - } -} - -static int drm_fbdev_generic_client_restore(struct drm_client_dev *client) -{ - drm_fb_helper_lastclose(client->dev); - - return 0; -} - -static int drm_fbdev_generic_client_hotplug(struct drm_client_dev *client) -{ - struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); - struct drm_device *dev = client->dev; - int ret; - - if (dev->fb_helper) - return drm_fb_helper_hotplug_event(dev->fb_helper); - - ret = drm_fb_helper_init(dev, fb_helper); - if (ret) - goto err_drm_err; - - if (!drm_drv_uses_atomic_modeset(dev)) - drm_helper_disable_unused_functions(dev); - - ret = drm_fb_helper_initial_config(fb_helper); - if (ret) - goto err_drm_fb_helper_fini; - - return 0; - -err_drm_fb_helper_fini: - drm_fb_helper_fini(fb_helper); -err_drm_err: - drm_err(dev, "fbdev: Failed to setup generic emulation (ret=%d)\n", ret); - return ret; -} - -static const struct drm_client_funcs drm_fbdev_generic_client_funcs = { - .owner = THIS_MODULE, - .unregister = drm_fbdev_generic_client_unregister, - .restore = drm_fbdev_generic_client_restore, - .hotplug = drm_fbdev_generic_client_hotplug, -}; - -/** - * drm_fbdev_generic_setup() - Setup generic fbdev emulation - * @dev: DRM device - * @preferred_bpp: Preferred bits per pixel for the device. - * - * This function sets up generic fbdev emulation for drivers that supports - * dumb buffers with a virtual address and that can be mmap'ed. - * drm_fbdev_generic_setup() shall be called after the DRM driver registered - * the new DRM device with drm_dev_register(). - * - * Restore, hotplug events and teardown are all taken care of. Drivers that do - * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves. - * Simple drivers might use drm_mode_config_helper_suspend(). - * - * In order to provide fixed mmap-able memory ranges, generic fbdev emulation - * uses a shadow buffer in system memory. The implementation blits the shadow - * fbdev buffer onto the real buffer in regular intervals. - * - * This function is safe to call even when there are no connectors present. - * Setup will be retried on the next hotplug event. - * - * The fbdev is destroyed by drm_dev_unregister(). - */ -void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) -{ - struct drm_fb_helper *fb_helper; - int ret; - - drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); - drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); - - fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); - if (!fb_helper) - return; - drm_fb_helper_prepare(dev, fb_helper, preferred_bpp, &drm_fbdev_generic_helper_funcs); - - ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_generic_client_funcs); - if (ret) { - drm_err(dev, "Failed to register client: %d\n", ret); - goto err_drm_client_init; - } - - drm_client_register(&fb_helper->client); - - return; - -err_drm_client_init: - drm_fb_helper_unprepare(fb_helper); - kfree(fb_helper); - return; -} -EXPORT_SYMBOL(drm_fbdev_generic_setup); diff --git a/drivers/gpu/drm/drm_fbdev_ttm.c b/drivers/gpu/drm/drm_fbdev_ttm.c new file mode 100644 index 000000000000..bb7898cd7dc6 --- /dev/null +++ b/drivers/gpu/drm/drm_fbdev_ttm.c @@ -0,0 +1,349 @@ +// SPDX-License-Identifier: MIT + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +/* @user: 1=userspace, 0=fbcon */ +static int drm_fbdev_ttm_fb_open(struct fb_info *info, int user) +{ + struct drm_fb_helper *fb_helper = info->par; + + /* No need to take a ref for fbcon because it unbinds on unregister */ + if (user && !try_module_get(fb_helper->dev->driver->fops->owner)) + return -ENODEV; + + return 0; +} + +static int drm_fbdev_ttm_fb_release(struct fb_info *info, int user) +{ + struct drm_fb_helper *fb_helper = info->par; + + if (user) + module_put(fb_helper->dev->driver->fops->owner); + + return 0; +} + +FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(drm_fbdev_ttm, + drm_fb_helper_damage_range, + drm_fb_helper_damage_area); + +static void drm_fbdev_ttm_fb_destroy(struct fb_info *info) +{ + struct drm_fb_helper *fb_helper = info->par; + void *shadow = info->screen_buffer; + + if (!fb_helper->dev) + return; + + fb_deferred_io_cleanup(info); + drm_fb_helper_fini(fb_helper); + vfree(shadow); + drm_client_framebuffer_delete(fb_helper->buffer); + + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); +} + +static const struct fb_ops drm_fbdev_ttm_fb_ops = { + .owner = THIS_MODULE, + .fb_open = drm_fbdev_ttm_fb_open, + .fb_release = drm_fbdev_ttm_fb_release, + FB_DEFAULT_DEFERRED_OPS(drm_fbdev_ttm), + DRM_FB_HELPER_DEFAULT_OPS, + .fb_destroy = drm_fbdev_ttm_fb_destroy, +}; + +/* + * This function uses the client API to create a framebuffer backed by a dumb buffer. + */ +static int drm_fbdev_ttm_helper_fb_probe(struct drm_fb_helper *fb_helper, + struct drm_fb_helper_surface_size *sizes) +{ + struct drm_client_dev *client = &fb_helper->client; + struct drm_device *dev = fb_helper->dev; + struct drm_client_buffer *buffer; + struct fb_info *info; + size_t screen_size; + void *screen_buffer; + u32 format; + int ret; + + drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", + sizes->surface_width, sizes->surface_height, + sizes->surface_bpp); + + format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth); + buffer = drm_client_framebuffer_create(client, sizes->surface_width, + sizes->surface_height, format); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + + fb_helper->buffer = buffer; + fb_helper->fb = buffer->fb; + + screen_size = buffer->gem->size; + screen_buffer = vzalloc(screen_size); + if (!screen_buffer) { + ret = -ENOMEM; + goto err_drm_client_framebuffer_delete; + } + + info = drm_fb_helper_alloc_info(fb_helper); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto err_vfree; + } + + drm_fb_helper_fill_info(info, fb_helper, sizes); + + info->fbops = &drm_fbdev_ttm_fb_ops; + + /* screen */ + info->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST; + info->screen_buffer = screen_buffer; + info->fix.smem_len = screen_size; + + /* deferred I/O */ + fb_helper->fbdefio.delay = HZ / 20; + fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; + + info->fbdefio = &fb_helper->fbdefio; + ret = fb_deferred_io_init(info); + if (ret) + goto err_drm_fb_helper_release_info; + + return 0; + +err_drm_fb_helper_release_info: + drm_fb_helper_release_info(fb_helper); +err_vfree: + vfree(screen_buffer); +err_drm_client_framebuffer_delete: + fb_helper->fb = NULL; + fb_helper->buffer = NULL; + drm_client_framebuffer_delete(buffer); + return ret; +} + +static void drm_fbdev_ttm_damage_blit_real(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip, + struct iosys_map *dst) +{ + struct drm_framebuffer *fb = fb_helper->fb; + size_t offset = clip->y1 * fb->pitches[0]; + size_t len = clip->x2 - clip->x1; + unsigned int y; + void *src; + + switch (drm_format_info_bpp(fb->format, 0)) { + case 1: + offset += clip->x1 / 8; + len = DIV_ROUND_UP(len + clip->x1 % 8, 8); + break; + case 2: + offset += clip->x1 / 4; + len = DIV_ROUND_UP(len + clip->x1 % 4, 4); + break; + case 4: + offset += clip->x1 / 2; + len = DIV_ROUND_UP(len + clip->x1 % 2, 2); + break; + default: + offset += clip->x1 * fb->format->cpp[0]; + len *= fb->format->cpp[0]; + break; + } + + src = fb_helper->info->screen_buffer + offset; + iosys_map_incr(dst, offset); /* go to first pixel within clip rect */ + + for (y = clip->y1; y < clip->y2; y++) { + iosys_map_memcpy_to(dst, 0, src, len); + iosys_map_incr(dst, fb->pitches[0]); + src += fb->pitches[0]; + } +} + +static int drm_fbdev_ttm_damage_blit(struct drm_fb_helper *fb_helper, + struct drm_clip_rect *clip) +{ + struct drm_client_buffer *buffer = fb_helper->buffer; + struct iosys_map map, dst; + int ret; + + /* + * We have to pin the client buffer to its current location while + * flushing the shadow buffer. In the general case, concurrent + * modesetting operations could try to move the buffer and would + * fail. The modeset has to be serialized by acquiring the reservation + * object of the underlying BO here. + * + * For fbdev emulation, we only have to protect against fbdev modeset + * operations. Nothing else will involve the client buffer's BO. So it + * is sufficient to acquire struct drm_fb_helper.lock here. + */ + mutex_lock(&fb_helper->lock); + + ret = drm_client_buffer_vmap_local(buffer, &map); + if (ret) + goto out; + + dst = map; + drm_fbdev_ttm_damage_blit_real(fb_helper, clip, &dst); + + drm_client_buffer_vunmap_local(buffer); + +out: + mutex_unlock(&fb_helper->lock); + + return ret; +} + +static int drm_fbdev_ttm_helper_fb_dirty(struct drm_fb_helper *helper, + struct drm_clip_rect *clip) +{ + struct drm_device *dev = helper->dev; + int ret; + + /* Call damage handlers only if necessary */ + if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) + return 0; + + ret = drm_fbdev_ttm_damage_blit(helper, clip); + if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret)) + return ret; + + if (helper->fb->funcs->dirty) { + ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); + if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) + return ret; + } + + return 0; +} + +static const struct drm_fb_helper_funcs drm_fbdev_ttm_helper_funcs = { + .fb_probe = drm_fbdev_ttm_helper_fb_probe, + .fb_dirty = drm_fbdev_ttm_helper_fb_dirty, +}; + +static void drm_fbdev_ttm_client_unregister(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + + if (fb_helper->info) { + drm_fb_helper_unregister_info(fb_helper); + } else { + drm_client_release(&fb_helper->client); + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + } +} + +static int drm_fbdev_ttm_client_restore(struct drm_client_dev *client) +{ + drm_fb_helper_lastclose(client->dev); + + return 0; +} + +static int drm_fbdev_ttm_client_hotplug(struct drm_client_dev *client) +{ + struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); + struct drm_device *dev = client->dev; + int ret; + + if (dev->fb_helper) + return drm_fb_helper_hotplug_event(dev->fb_helper); + + ret = drm_fb_helper_init(dev, fb_helper); + if (ret) + goto err_drm_err; + + if (!drm_drv_uses_atomic_modeset(dev)) + drm_helper_disable_unused_functions(dev); + + ret = drm_fb_helper_initial_config(fb_helper); + if (ret) + goto err_drm_fb_helper_fini; + + return 0; + +err_drm_fb_helper_fini: + drm_fb_helper_fini(fb_helper); +err_drm_err: + drm_err(dev, "fbdev: Failed to setup emulation (ret=%d)\n", ret); + return ret; +} + +static const struct drm_client_funcs drm_fbdev_ttm_client_funcs = { + .owner = THIS_MODULE, + .unregister = drm_fbdev_ttm_client_unregister, + .restore = drm_fbdev_ttm_client_restore, + .hotplug = drm_fbdev_ttm_client_hotplug, +}; + +/** + * drm_fbdev_ttm_setup() - Setup fbdev emulation for TTM-based drivers + * @dev: DRM device + * @preferred_bpp: Preferred bits per pixel for the device. + * + * This function sets up fbdev emulation for TTM-based drivers that support + * dumb buffers with a virtual address and that can be mmap'ed. + * drm_fbdev_ttm_setup() shall be called after the DRM driver registered + * the new DRM device with drm_dev_register(). + * + * Restore, hotplug events and teardown are all taken care of. Drivers that do + * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves. + * Simple drivers might use drm_mode_config_helper_suspend(). + * + * In order to provide fixed mmap-able memory ranges, fbdev emulation + * uses a shadow buffer in system memory. The implementation blits the shadow + * fbdev buffer onto the real buffer in regular intervals. + * + * This function is safe to call even when there are no connectors present. + * Setup will be retried on the next hotplug event. + * + * The fbdev is destroyed by drm_dev_unregister(). + */ +void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ + struct drm_fb_helper *fb_helper; + int ret; + + drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); + drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); + + fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); + if (!fb_helper) + return; + drm_fb_helper_prepare(dev, fb_helper, preferred_bpp, &drm_fbdev_ttm_helper_funcs); + + ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_ttm_client_funcs); + if (ret) { + drm_err(dev, "Failed to register client: %d\n", ret); + goto err_drm_client_init; + } + + drm_client_register(&fb_helper->client); + + return; + +err_drm_client_init: + drm_fb_helper_unprepare(fb_helper); + kfree(fb_helper); + return; +} +EXPORT_SYMBOL(drm_fbdev_ttm_setup); diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index 57c21ec452b7..9f9b19ea0587 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -339,7 +339,7 @@ static int hibmc_pci_probe(struct pci_dev *pdev, goto err_unload; } - drm_fbdev_generic_setup(dev, 32); + drm_fbdev_ttm_setup(dev, 32); return 0; diff --git a/drivers/gpu/drm/loongson/Kconfig b/drivers/gpu/drm/loongson/Kconfig index 8e59753e532d..9ed463a76ae2 100644 --- a/drivers/gpu/drm/loongson/Kconfig +++ b/drivers/gpu/drm/loongson/Kconfig @@ -6,6 +6,7 @@ config DRM_LOONGSON depends on LOONGARCH || MIPS || COMPILE_TEST select DRM_KMS_HELPER select DRM_TTM + select DRM_TTM_HELPER select I2C select I2C_ALGOBIT help diff --git a/drivers/gpu/drm/loongson/lsdc_drv.c b/drivers/gpu/drm/loongson/lsdc_drv.c index d8ff60b46abe..adc7344d2f80 100644 --- a/drivers/gpu/drm/loongson/lsdc_drv.c +++ b/drivers/gpu/drm/loongson/lsdc_drv.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -314,7 +314,7 @@ static int lsdc_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) return ret; - drm_fbdev_generic_setup(ddev, 32); + drm_fbdev_ttm_setup(ddev, 32); return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index a947e1d5f309..a58c31089613 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include #include @@ -846,9 +846,9 @@ static int nouveau_drm_probe(struct pci_dev *pdev, goto fail_drm_dev_init; if (nouveau_drm(drm_dev)->client.device.info.ram_size <= 32 * 1024 * 1024) - drm_fbdev_generic_setup(drm_dev, 8); + drm_fbdev_ttm_setup(drm_dev, 8); else - drm_fbdev_generic_setup(drm_dev, 32); + drm_fbdev_ttm_setup(drm_dev, 32); quirk_broken_nv_runpm(pdev); return 0; diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c index beee5563031a..5eb3f5719fdf 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.c +++ b/drivers/gpu/drm/qxl/qxl_drv.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include @@ -118,7 +118,7 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto modeset_cleanup; - drm_fbdev_generic_setup(&qdev->ddev, 32); + drm_fbdev_ttm_setup(&qdev->ddev, 32); return 0; modeset_cleanup: diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c index c23c9f0cf49c..2d7ad808cc0e 100644 --- a/drivers/gpu/drm/tiny/bochs.c +++ b/drivers/gpu/drm/tiny/bochs.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -670,7 +670,7 @@ static int bochs_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent if (ret) goto err_hw_fini; - drm_fbdev_generic_setup(dev, 32); + drm_fbdev_ttm_setup(dev, 32); return ret; err_hw_fini: diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c index cd9e66a06596..ef36834c8673 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_drv.c +++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -80,7 +80,7 @@ static int vbox_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto err_irq_fini; - drm_fbdev_generic_setup(&vbox->ddev, 32); + drm_fbdev_ttm_setup(&vbox->ddev, 32); return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index bdad93864b98..4bf6da2b15fe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -37,7 +37,7 @@ #include #include -#include +#include #include #include #include @@ -1685,7 +1685,7 @@ static int vmw_probe(struct pci_dev *pdev, const struct pci_device_id *ent) vmw_fifo_resource_inc(vmw); vmw_svga_enable(vmw); - drm_fbdev_generic_setup(&vmw->drm, 0); + drm_fbdev_ttm_setup(&vmw->drm, 0); vmw_debugfs_gem_init(vmw); vmw_debugfs_resource_managers_init(vmw); diff --git a/include/drm/drm_fbdev_generic.h b/include/drm/drm_fbdev_generic.h deleted file mode 100644 index 75799342098d..000000000000 --- a/include/drm/drm_fbdev_generic.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: MIT */ - -#ifndef DRM_FBDEV_GENERIC_H -#define DRM_FBDEV_GENERIC_H - -struct drm_device; - -#ifdef CONFIG_DRM_FBDEV_EMULATION -void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp); -#else -static inline void drm_fbdev_generic_setup(struct drm_device *dev, unsigned int preferred_bpp) -{ } -#endif - -#endif diff --git a/include/drm/drm_fbdev_ttm.h b/include/drm/drm_fbdev_ttm.h new file mode 100644 index 000000000000..9e6c3bdf3537 --- /dev/null +++ b/include/drm/drm_fbdev_ttm.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_FBDEV_TTM_H +#define DRM_FBDEV_TTM_H + +struct drm_device; + +#ifdef CONFIG_DRM_FBDEV_EMULATION +void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp); +#else +static inline void drm_fbdev_ttm_setup(struct drm_device *dev, unsigned int preferred_bpp) +{ } +#endif + +#endif -- cgit v1.2.3 From 18bc074c226bfecd205bf031678f5e35ee55c3da Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Fri, 19 Apr 2024 10:29:36 +0200 Subject: drm/fbdev: Clean up fbdev documentation Rewrite some docs that are not up-to-date any longer. Remove the TODO item for fbdev-generic conversion, as the helper has been replaced. Make documentation for DMA, SHMEM and TTM emulation available. Signed-off-by: Thomas Zimmermann Cc: Jonathan Corbet Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240419083331.7761-44-tzimmermann@suse.de --- Documentation/gpu/drm-kms-helpers.rst | 12 +++++++++--- Documentation/gpu/todo.rst | 13 ------------- drivers/gpu/drm/drm_drv.c | 2 +- drivers/gpu/drm/drm_fb_helper.c | 11 ++--------- include/drm/drm_mode_config.h | 4 ++-- 5 files changed, 14 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/Documentation/gpu/drm-kms-helpers.rst b/Documentation/gpu/drm-kms-helpers.rst index e46ab9b670ac..8435e8621cc0 100644 --- a/Documentation/gpu/drm-kms-helpers.rst +++ b/Documentation/gpu/drm-kms-helpers.rst @@ -110,15 +110,21 @@ fbdev Helper Functions Reference .. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c :doc: fbdev helpers -.. kernel-doc:: include/drm/drm_fb_helper.h - :internal: +.. kernel-doc:: drivers/gpu/drm/drm_fbdev_dma.c + :export: -.. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c +.. kernel-doc:: drivers/gpu/drm/drm_fbdev_shmem.c :export: .. kernel-doc:: drivers/gpu/drm/drm_fbdev_ttm.c :export: +.. kernel-doc:: include/drm/drm_fb_helper.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c + :export: + format Helper Functions Reference ================================= diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index fb9ad120b141..e2a0585915b3 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -243,19 +243,6 @@ Contact: Maintainer of the driver you plan to convert Level: Intermediate -Convert drivers to use drm_fbdev_generic_setup() ------------------------------------------------- - -Most drivers can use drm_fbdev_generic_setup(). Driver have to implement -atomic modesetting and GEM vmap support. Historically, generic fbdev emulation -expected the framebuffer in system memory or system-like memory. By employing -struct iosys_map, drivers with frambuffers in I/O memory can be supported -as well. - -Contact: Maintainer of the driver you plan to convert - -Level: Intermediate - Reimplement functions in drm_fbdev_fb_ops without fbdev ------------------------------------------------------- diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 535b624d4c9d..b8186cf16230 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -346,7 +346,7 @@ void drm_minor_release(struct drm_minor *minor) * if (ret) * return ret; * - * drm_fbdev_generic_setup(drm, 32); + * drm_fbdev_{...}_setup(drm, 32); * * return 0; * } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index d612133e2cf7..e2e19f49342e 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -85,12 +85,8 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * The fb helper functions are useful to provide an fbdev on top of a drm kernel * mode setting driver. They can be used mostly independently from the crtc * helper functions used by many drivers to implement the kernel mode setting - * interfaces. - * - * Drivers that support a dumb buffer with a virtual address and mmap support, - * should try out the generic fbdev emulation using drm_fbdev_generic_setup(). - * It will automatically set up deferred I/O if the driver requires a shadow - * buffer. + * interfaces. Drivers that use one of the shared memory managers, TTM, SHMEM, + * DMA, should instead use the corresponding fbdev emulation. * * Existing fbdev implementations should restore the fbdev console by using * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback. @@ -126,9 +122,6 @@ static DEFINE_MUTEX(kernel_fb_helper_lock); * atomic context. If drm_fb_helper_deferred_io() is used as the deferred_io * callback it will also schedule dirty_work with the damage collected from the * mmap page writes. - * - * Deferred I/O is not compatible with SHMEM. Such drivers should request an - * fbdev shadow buffer and call drm_fbdev_generic_setup() instead. */ static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc) diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 8de3c9a5f61b..ab0f167474b1 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -106,8 +106,8 @@ struct drm_mode_config_funcs { * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event() * to call this hook to inform the fbdev helper of output changes. * - * This hook is deprecated, drivers should instead use - * drm_fbdev_generic_setup() which takes care of any necessary + * This hook is deprecated, drivers should instead implement fbdev + * support with struct drm_client, which takes care of any necessary * hotplug event forwarding already without further involvement by * the driver. */ -- cgit v1.2.3 From ef283674a17e000bb6b2ff05dd2ac5cbf2e3ae0d Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 22 Apr 2024 11:58:57 +0300 Subject: drm/uapi: Move drm_color_ctm_3x4 out from drm_mode.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drm_color_ctm_3x4 is some undocumented amgdpu private uapi and thus has no business being in drm_mode.h. At least move it to some amdgpu specific header, albeit with the wrong namespace as maybe something somewhere is using this already? Cc: Harry Wentland Cc: Joshua Ashton Cc: Alex Deucher Fixes: 6872a189be50 ("drm/amd/display: Add 3x4 CTM support for plane CTM") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20240422085857.17651-1-ville.syrjala@linux.intel.com Reviewed-by: Harry Wentland --- include/uapi/drm/amdgpu_drm.h | 9 +++++++++ include/uapi/drm/drm_mode.h | 8 -------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 96e32dafd4f0..d5ebafacdd70 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -1269,6 +1269,15 @@ struct drm_amdgpu_info_gpuvm_fault { #define AMDGPU_FAMILY_GC_10_3_7 151 /* GC 10.3.7 */ #define AMDGPU_FAMILY_GC_11_5_0 150 /* GC 11.5.0 */ +/* FIXME wrong namespace! */ +struct drm_color_ctm_3x4 { + /* + * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude + * (not two's complement!) format. + */ + __u64 matrix[12]; +}; + #if defined(__cplusplus) } #endif diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 1ca5c7e418fd..d390011b89b4 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -846,14 +846,6 @@ struct drm_color_ctm { __u64 matrix[9]; }; -struct drm_color_ctm_3x4 { - /* - * Conversion matrix with 3x4 dimensions in S31.32 sign-magnitude - * (not two's complement!) format. - */ - __u64 matrix[12]; -}; - struct drm_color_lut { /* * Values are mapped linearly to 0.0 - 1.0 range, with 0x0 == 0.0 and -- cgit v1.2.3 From 7fb8af6798e8d013017e4607505f58d9942fd671 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 29 Apr 2024 19:43:36 +0300 Subject: drm: deprecate driver date The driver date serves no useful purpose, because it's hardly ever updated. The information is misleading at best. As described in Documentation/gpu/drm-internals.rst: The driver date, formatted as YYYYMMDD, is meant to identify the date of the latest modification to the driver. However, as most drivers fail to update it, its value is mostly useless. The DRM core prints it to the kernel log at initialization time and passes it to userspace through the DRM_IOCTL_VERSION ioctl. Stop printing the driver date at init, and start returning the empty string "" as driver date through the DRM_IOCTL_VERSION ioctl. The driver date initialization in drivers and the struct drm_driver date member can be removed in follow-up. Reviewed-by: Hamza Mahfooz Acked-by: Simon Ser Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240429164336.1406480-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- Documentation/gpu/drm-internals.rst | 10 ++-------- drivers/gpu/drm/drm_drv.c | 4 ++-- drivers/gpu/drm/drm_ioctl.c | 5 +++-- include/drm/drm_drv.h | 2 +- 4 files changed, 8 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/Documentation/gpu/drm-internals.rst b/Documentation/gpu/drm-internals.rst index 335de7fcddee..11d9a5730fb2 100644 --- a/Documentation/gpu/drm-internals.rst +++ b/Documentation/gpu/drm-internals.rst @@ -57,8 +57,8 @@ is larger than the driver minor, the DRM_IOCTL_SET_VERSION call will return an error. Otherwise the driver's set_version() method will be called with the requested version. -Name, Description and Date -~~~~~~~~~~~~~~~~~~~~~~~~~~ +Name and Description +~~~~~~~~~~~~~~~~~~~~ char \*name; char \*desc; char \*date; The driver name is printed to the kernel log at initialization time, @@ -69,12 +69,6 @@ The driver description is a purely informative string passed to userspace through the DRM_IOCTL_VERSION ioctl and otherwise unused by the kernel. -The driver date, formatted as YYYYMMDD, is meant to identify the date of -the latest modification to the driver. However, as most drivers fail to -update it, its value is mostly useless. The DRM core prints it to the -kernel log at initialization time and passes it to userspace through the -DRM_IOCTL_VERSION ioctl. - Module Initialization --------------------- diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index b8186cf16230..93543071a500 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -947,9 +947,9 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) } drm_panic_register(dev); - DRM_INFO("Initialized %s %d.%d.%d %s for %s on minor %d\n", + DRM_INFO("Initialized %s %d.%d.%d for %s on minor %d\n", driver->name, driver->major, driver->minor, - driver->patchlevel, driver->date, + driver->patchlevel, dev->dev ? dev_name(dev->dev) : "virtual device", dev->primary ? dev->primary->index : dev->accel->index); diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index e368fc084c77..89feb7306e47 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -529,9 +529,10 @@ int drm_version(struct drm_device *dev, void *data, version->version_patchlevel = dev->driver->patchlevel; err = drm_copy_field(version->name, &version->name_len, dev->driver->name); + + /* Driver date is deprecated. Return the empty string. */ if (!err) - err = drm_copy_field(version->date, &version->date_len, - dev->driver->date); + err = drm_copy_field(version->date, &version->date_len, ""); if (!err) err = drm_copy_field(version->desc, &version->desc_len, dev->driver->desc); diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index 8878260d7529..cd37936c3926 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -411,7 +411,7 @@ struct drm_driver { char *name; /** @desc: driver description */ char *desc; - /** @date: driver date */ + /** @date: driver date, unused, to be removed */ char *date; /** -- cgit v1.2.3 From c72ceafbd12cf95e088681ae5e535ef1a78bf0ed Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Fri, 29 Mar 2024 16:24:42 -0500 Subject: mm: Introduce AS_INACCESSIBLE for encrypted/confidential memory filemap users like guest_memfd may use page cache pages to allocate/manage memory that is only intended to be accessed by guests via hardware protections like encryption. Writes to memory of this sort in common paths like truncation may cause unexpected behavior such as writing garbage instead of zeros when attempting to zero pages, or worse, triggering hardware protections that are considered fatal as far as the kernel is concerned. Introduce a new address_space flag, AS_INACCESSIBLE, and use this initially to prevent zero'ing of pages during truncation, with the understanding that it is up to the owner of the mapping to handle this specially if needed. This is admittedly a rather blunt solution, but it seems like there are no other places that should take into account the flag to keep its promise. Link: https://lore.kernel.org/lkml/ZR9LYhpxTaTk6PJX@google.com/ Cc: Matthew Wilcox Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Message-ID: <20240329212444.395559-5-michael.roth@amd.com> Acked-by: Vlastimil Babka Signed-off-by: Paolo Bonzini --- include/linux/pagemap.h | 1 + mm/truncate.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2df35e65557d..f879c1d54da7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -207,6 +207,7 @@ enum mapping_flags { AS_STABLE_WRITES, /* must wait for writeback before modifying folio contents */ AS_UNMOVABLE, /* The mapping cannot be moved, ever */ + AS_INACCESSIBLE, /* Do not attempt direct R/W access to the mapping */ }; /** diff --git a/mm/truncate.c b/mm/truncate.c index 725b150e47ac..c501338c7ebd 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -233,7 +233,8 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) * doing a complex calculation here, and then doing the zeroing * anyway if the page split fails. */ - folio_zero_range(folio, offset, length); + if (!(folio->mapping->flags & AS_INACCESSIBLE)) + folio_zero_range(folio, offset, length); if (folio_has_private(folio)) folio_invalidate(folio, offset, length); -- cgit v1.2.3 From 3bb2531e20bff99f9cd8719ee7aea8bd82687173 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 7 May 2024 12:54:03 -0400 Subject: KVM: guest_memfd: Add hook for initializing memory guest_memfd pages are generally expected to be in some arch-defined initial state prior to using them for guest memory. For SEV-SNP this initial state is 'private', or 'guest-owned', and requires additional operations to move these pages into a 'private' state by updating the corresponding entries the RMP table. Allow for an arch-defined hook to handle updates of this sort, and go ahead and implement one for x86 so KVM implementations like AMD SVM can register a kvm_x86_ops callback to handle these updates for SEV-SNP guests. The preparation callback is always called when allocating/grabbing folios via gmem, and it is up to the architecture to keep track of whether or not the pages are already in the expected state (e.g. the RMP table in the case of SEV-SNP). In some cases, it is necessary to defer the preparation of the pages to handle things like in-place encryption of initial guest memory payloads before marking these pages as 'private'/'guest-owned'. Add an argument (always true for now) to kvm_gmem_get_folio() that allows for the preparation callback to be bypassed. To detect possible issues in the way userspace initializes memory, it is only possible to add an unprepared page if it is not already included in the filemap. Link: https://lore.kernel.org/lkml/ZLqVdvsF11Ddo7Dq@google.com/ Co-developed-by: Michael Roth Signed-off-by: Michael Roth Message-Id: <20231230172351.574091-5-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 6 +++++ include/linux/kvm_host.h | 5 ++++ virt/kvm/Kconfig | 4 +++ virt/kvm/guest_memfd.c | 51 +++++++++++++++++++++++++++++++++++--- 6 files changed, 65 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 5187fcf4b610..d26fcad13e36 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -139,6 +139,7 @@ KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) +KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0369e9efe429..738ad82a3e67 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1812,6 +1812,7 @@ struct kvm_x86_ops { gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); + int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2d2619d3eee4..972524ddcfdb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13598,6 +13598,12 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) +{ + return static_call(kvm_x86_gmem_prepare)(kvm, pfn, gfn, max_order); +} +#endif int kvm_spec_ctrl_test_value(u64 value) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index afbc99264ffa..1af069ab657c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2443,4 +2443,9 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); +bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); +#endif + #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 29b73eedfe74..ca870157b2ed 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -109,3 +109,7 @@ config KVM_GENERIC_PRIVATE_MEM select KVM_GENERIC_MEMORY_ATTRIBUTES select KVM_PRIVATE_MEM bool + +config HAVE_KVM_GMEM_PREPARE + bool + depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index fd32288d0fbc..0176089be731 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -13,7 +13,43 @@ struct kvm_gmem { struct list_head entry; }; -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) +static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) +{ +#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE + struct list_head *gmem_list = &inode->i_mapping->i_private_list; + struct kvm_gmem *gmem; + + list_for_each_entry(gmem, gmem_list, entry) { + struct kvm_memory_slot *slot; + struct kvm *kvm = gmem->kvm; + struct page *page; + kvm_pfn_t pfn; + gfn_t gfn; + int rc; + + if (!kvm_arch_gmem_prepare_needed(kvm)) + continue; + + slot = xa_load(&gmem->bindings, index); + if (!slot) + continue; + + page = folio_file_page(folio, index); + pfn = page_to_pfn(page); + gfn = slot->base_gfn + index - slot->gmem.pgoff; + rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); + if (rc) { + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx, error %d.\n", + index, rc); + return rc; + } + } + +#endif + return 0; +} + +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) { struct folio *folio; @@ -41,6 +77,15 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) folio_mark_uptodate(folio); } + if (prepare) { + int r = kvm_gmem_prepare_folio(inode, index, folio); + if (r < 0) { + folio_unlock(folio); + folio_put(folio); + return ERR_PTR(r); + } + } + /* * Ignore accessed, referenced, and dirty flags. The memory is * unevictable and there is no storage to write back to. @@ -145,7 +190,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index); + folio = kvm_gmem_get_folio(inode, index, true); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -505,7 +550,7 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, goto out_fput; } - folio = kvm_gmem_get_folio(file_inode(file), index); + folio = kvm_gmem_get_folio(file_inode(file), index, true); if (IS_ERR(folio)) { r = PTR_ERR(folio); goto out_fput; -- cgit v1.2.3 From 1f6c06b177513e8a47c43e95d1985dbd9cff3ddd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 14 Feb 2024 12:09:06 -0500 Subject: KVM: guest_memfd: Add interface for populating gmem pages with user data During guest run-time, kvm_arch_gmem_prepare() is issued as needed to prepare newly-allocated gmem pages prior to mapping them into the guest. In the case of SEV-SNP, this mainly involves setting the pages to private in the RMP table. However, for the GPA ranges comprising the initial guest payload, which are encrypted/measured prior to starting the guest, the gmem pages need to be accessed prior to setting them to private in the RMP table so they can be initialized with the userspace-provided data. Additionally, an SNP firmware call is needed afterward to encrypt them in-place and measure the contents into the guest's launch digest. While it is possible to bypass the kvm_arch_gmem_prepare() hooks so that this handling can be done in an open-coded/vendor-specific manner, this may expose more gmem-internal state/dependencies to external callers than necessary. Try to avoid this by implementing an interface that tries to handle as much of the common functionality inside gmem as possible, while also making it generic enough to potentially be usable/extensible for TDX as well. Suggested-by: Sean Christopherson Signed-off-by: Michael Roth Co-developed-by: Michael Roth Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 27 +++++++++++++++++++++++++ virt/kvm/guest_memfd.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1af069ab657c..1ae65774d9fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2448,4 +2448,31 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +/** + * kvm_gmem_populate() - Populate/prepare a GPA range with guest data + * + * @kvm: KVM instance + * @gfn: starting GFN to be populated + * @src: userspace-provided buffer containing data to copy into GFN range + * (passed to @post_populate, and incremented on each iteration + * if not NULL) + * @npages: number of pages to copy from userspace-buffer + * @post_populate: callback to issue for each gmem page that backs the GPA + * range + * @opaque: opaque data to pass to @post_populate callback + * + * This is primarily intended for cases where a gmem-backed GPA range needs + * to be initialized with userspace-provided data prior to being mapped into + * the guest as a private page. This should be called with the slots->lock + * held so that caller-enforced invariants regarding the expected memory + * attributes of the GPA range do not race with KVM_SET_MEMORY_ATTRIBUTES. + * + * Returns the number of pages that were populated. + */ +typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, + void __user *src, int order, void *opaque); + +long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque); + #endif diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index bfe437098b79..5d6c87bb13f6 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -585,3 +585,55 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); + +long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, + kvm_gmem_populate_cb post_populate, void *opaque) +{ + struct file *file; + struct kvm_memory_slot *slot; + void __user *p; + + int ret = 0, max_order; + long i; + + lockdep_assert_held(&kvm->slots_lock); + if (npages < 0) + return -EINVAL; + + slot = gfn_to_memslot(kvm, start_gfn); + if (!kvm_slot_can_be_private(slot)) + return -EINVAL; + + file = kvm_gmem_get_file(slot); + if (!file) + return -EFAULT; + + filemap_invalidate_lock(file->f_mapping); + + npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); + for (i = 0; i < npages; i += (1 << max_order)) { + gfn_t gfn = start_gfn + i; + kvm_pfn_t pfn; + + ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); + if (ret) + break; + + if (!IS_ALIGNED(gfn, (1 << max_order)) || + (npages - i) < (1 << max_order)) + max_order = 0; + + p = src ? src + i * PAGE_SIZE : NULL; + ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); + + put_page(pfn_to_page(pfn)); + if (ret) + break; + } + + filemap_invalidate_unlock(file->f_mapping); + + fput(file); + return ret && !i ? ret : i; +} +EXPORT_SYMBOL_GPL(kvm_gmem_populate); -- cgit v1.2.3 From a90764f0e4ed5350cc9caeb384e0fb356c032587 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Sat, 30 Dec 2023 11:23:21 -0600 Subject: KVM: guest_memfd: Add hook for invalidating memory In some cases, like with SEV-SNP, guest memory needs to be updated in a platform-specific manner before it can be safely freed back to the host. Wire up arch-defined hooks to the .free_folio kvm_gmem_aops callback to allow for special handling of this sort when freeing memory in response to FALLOC_FL_PUNCH_HOLE operations and when releasing the inode, and go ahead and define an arch-specific hook for x86 since it will be needed for handling memory used for SEV-SNP guests. Signed-off-by: Michael Roth Message-Id: <20231230172351.574091-6-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm-x86-ops.h | 1 + arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 7 +++++++ include/linux/kvm_host.h | 4 ++++ virt/kvm/Kconfig | 4 ++++ virt/kvm/guest_memfd.c | 14 ++++++++++++++ 6 files changed, 31 insertions(+) (limited to 'include') diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index d26fcad13e36..c81990937ab4 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -140,6 +140,7 @@ KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(alloc_apic_backing_page) KVM_X86_OP_OPTIONAL_RET0(gmem_prepare) +KVM_X86_OP_OPTIONAL(gmem_invalidate) #undef KVM_X86_OP #undef KVM_X86_OP_OPTIONAL diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 738ad82a3e67..51bb4b785d6a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1813,6 +1813,7 @@ struct kvm_x86_ops { gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); + void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 972524ddcfdb..83b8260443a3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -13605,6 +13605,13 @@ int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_ord } #endif +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) +{ + static_call_cond(kvm_x86_gmem_invalidate)(start, end); +} +#endif + int kvm_spec_ctrl_test_value(u64 value) { /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1ae65774d9fa..b43b96f876fe 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2475,4 +2475,8 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); +#endif + #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index ca870157b2ed..754c6c923427 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,3 +113,7 @@ config KVM_GENERIC_PRIVATE_MEM config HAVE_KVM_GMEM_PREPARE bool depends on KVM_PRIVATE_MEM + +config HAVE_KVM_GMEM_INVALIDATE + bool + depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 5d6c87bb13f6..dfe50c64a552 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -343,10 +343,24 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol return MF_DELAYED; } +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +static void kvm_gmem_free_folio(struct folio *folio) +{ + struct page *page = folio_page(folio, 0); + kvm_pfn_t pfn = page_to_pfn(page); + int order = folio_order(folio); + + kvm_arch_gmem_invalidate(pfn, pfn + (1ul << order)); +} +#endif + static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, +#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE + .free_folio = kvm_gmem_free_folio, +#endif }; static int kvm_gmem_getattr(struct mnt_idmap *idmap, const struct path *path, -- cgit v1.2.3 From ad27ce155566f2b4400fa865859834592bd18777 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Wed, 1 May 2024 03:51:57 -0500 Subject: KVM: SEV: Add KVM_SEV_SNP_LAUNCH_FINISH command Add a KVM_SEV_SNP_LAUNCH_FINISH command to finalize the cryptographic launch digest which stores the measurement of the guest at launch time. Also extend the existing SNP firmware data structures to support disabling the use of Versioned Chip Endorsement Keys (VCEK) by guests as part of this command. While finalizing the launch flow, the code also issues the LAUNCH_UPDATE SNP firmware commands to encrypt/measure the initial VMSA pages for each configured vCPU, which requires setting the RMP entries for those pages to private, so also add handling to clean up the RMP entries for these pages whening freeing vCPUs during shutdown. Signed-off-by: Brijesh Singh Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Harald Hoyer Signed-off-by: Ashish Kalra Message-ID: <20240501085210.2213060-8-michael.roth@amd.com> Signed-off-by: Paolo Bonzini --- .../virt/kvm/x86/amd-memory-encryption.rst | 28 +++++ arch/x86/include/uapi/asm/kvm.h | 17 +++ arch/x86/kvm/svm/sev.c | 127 +++++++++++++++++++++ include/linux/psp-sev.h | 4 +- 4 files changed, 175 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/virt/kvm/x86/amd-memory-encryption.rst b/Documentation/virt/kvm/x86/amd-memory-encryption.rst index cc16a7426d18..1ddb6a86ce7f 100644 --- a/Documentation/virt/kvm/x86/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/x86/amd-memory-encryption.rst @@ -544,6 +544,34 @@ where the allowed values for page_type are #define'd as:: See the SEV-SNP spec [snp-fw-abi]_ for further details on how each page type is used/measured. +20. KVM_SEV_SNP_LAUNCH_FINISH +----------------------------- + +After completion of the SNP guest launch flow, the KVM_SEV_SNP_LAUNCH_FINISH +command can be issued to make the guest ready for execution. + +Parameters (in): struct kvm_sev_snp_launch_finish + +Returns: 0 on success, -negative on error + +:: + + struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[32]; + __u8 pad0[3]; + __u16 flags; /* Must be zero */ + __u64 pad1[4]; + }; + + +See SNP_LAUNCH_FINISH in the SEV-SNP specification [snp-fw-abi]_ for further +details on the input parameters in ``struct kvm_sev_snp_launch_finish``. + Device attribute API ==================== diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5935dc8a7e02..988b5204d636 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -700,6 +700,7 @@ enum sev_cmd_id { /* SNP-specific commands */ KVM_SEV_SNP_LAUNCH_START = 100, KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, KVM_SEV_NR_MAX, }; @@ -854,6 +855,22 @@ struct kvm_sev_snp_launch_update { __u64 pad2[4]; }; +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c966f2224624..208bb8170d3f 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -75,6 +75,8 @@ static u64 sev_supported_vmsa_features; SNP_POLICY_MASK_DEBUG | \ SNP_POLICY_MASK_SINGLE_SOCKET) +#define INITIAL_VMSA_GPA 0xFFFFFFFFF000 + static u8 sev_enc_bit; static DECLARE_RWSEM(sev_deactivate_lock); static DEFINE_MUTEX(sev_bitmap_lock); @@ -2348,6 +2350,115 @@ out: return ret; } +static int snp_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct sev_data_snp_launch_update data = {}; + struct kvm_vcpu *vcpu; + unsigned long i; + int ret; + + data.gctx_paddr = __psp_pa(sev->snp_context); + data.page_type = SNP_PAGE_TYPE_VMSA; + + kvm_for_each_vcpu(i, vcpu, kvm) { + struct vcpu_svm *svm = to_svm(vcpu); + u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; + + ret = sev_es_sync_vmsa(svm); + if (ret) + return ret; + + /* Transition the VMSA page to a firmware state. */ + ret = rmp_make_private(pfn, INITIAL_VMSA_GPA, PG_LEVEL_4K, sev->asid, true); + if (ret) + return ret; + + /* Issue the SNP command to encrypt the VMSA */ + data.address = __sme_pa(svm->sev_es.vmsa); + ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_SNP_LAUNCH_UPDATE, + &data, &argp->error); + if (ret) { + if (!snp_page_reclaim(pfn)) + host_rmp_make_shared(pfn, PG_LEVEL_4K); + + return ret; + } + + svm->vcpu.arch.guest_state_protected = true; + } + + return 0; +} + +static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + struct kvm_sev_snp_launch_finish params; + struct sev_data_snp_launch_finish *data; + void *id_block = NULL, *id_auth = NULL; + int ret; + + if (!sev_snp_guest(kvm)) + return -ENOTTY; + + if (!sev->snp_context) + return -EINVAL; + + if (copy_from_user(¶ms, u64_to_user_ptr(argp->data), sizeof(params))) + return -EFAULT; + + if (params.flags) + return -EINVAL; + + /* Measure all vCPUs using LAUNCH_UPDATE before finalizing the launch flow. */ + ret = snp_launch_update_vmsa(kvm, argp); + if (ret) + return ret; + + data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT); + if (!data) + return -ENOMEM; + + if (params.id_block_en) { + id_block = psp_copy_user_blob(params.id_block_uaddr, KVM_SEV_SNP_ID_BLOCK_SIZE); + if (IS_ERR(id_block)) { + ret = PTR_ERR(id_block); + goto e_free; + } + + data->id_block_en = 1; + data->id_block_paddr = __sme_pa(id_block); + + id_auth = psp_copy_user_blob(params.id_auth_uaddr, KVM_SEV_SNP_ID_AUTH_SIZE); + if (IS_ERR(id_auth)) { + ret = PTR_ERR(id_auth); + goto e_free_id_block; + } + + data->id_auth_paddr = __sme_pa(id_auth); + + if (params.auth_key_en) + data->auth_key_en = 1; + } + + data->vcek_disabled = params.vcek_disabled; + + memcpy(data->host_data, params.host_data, KVM_SEV_SNP_FINISH_DATA_SIZE); + data->gctx_paddr = __psp_pa(sev->snp_context); + ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); + + kfree(id_auth); + +e_free_id_block: + kfree(id_block); + +e_free: + kfree(data); + + return ret; +} + int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -2450,6 +2561,9 @@ int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) case KVM_SEV_SNP_LAUNCH_UPDATE: r = snp_launch_update(kvm, &sev_cmd); break; + case KVM_SEV_SNP_LAUNCH_FINISH: + r = snp_launch_finish(kvm, &sev_cmd); + break; default: r = -EINVAL; goto out; @@ -2940,11 +3054,24 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu) svm = to_svm(vcpu); + /* + * If it's an SNP guest, then the VMSA was marked in the RMP table as + * a guest-owned page. Transition the page to hypervisor state before + * releasing it back to the system. + */ + if (sev_snp_guest(vcpu->kvm)) { + u64 pfn = __pa(svm->sev_es.vmsa) >> PAGE_SHIFT; + + if (host_rmp_make_shared(pfn, PG_LEVEL_4K)) + goto skip_vmsa_free; + } + if (vcpu->arch.guest_state_protected) sev_flush_encrypted_page(vcpu, svm->sev_es.vmsa); __free_page(virt_to_page(svm->sev_es.vmsa)); +skip_vmsa_free: if (svm->sev_es.ghcb_sa_free) kvfree(svm->sev_es.ghcb_sa); } diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 3705c2044fc0..903ddfea8585 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -658,6 +658,7 @@ struct sev_data_snp_launch_update { * @id_auth_paddr: system physical address of ID block authentication structure * @id_block_en: indicates whether ID block is present * @auth_key_en: indicates whether author key is present in authentication structure + * @vcek_disabled: indicates whether use of VCEK is allowed for attestation reports * @rsvd: reserved * @host_data: host-supplied data for guest, not interpreted by firmware */ @@ -667,7 +668,8 @@ struct sev_data_snp_launch_finish { u64 id_auth_paddr; u8 id_block_en:1; u8 auth_key_en:1; - u64 rsvd:62; + u8 vcek_disabled:1; + u64 rsvd:61; u8 host_data[32]; } __packed; -- cgit v1.2.3 From 3dbfbd101a5844f851da9ae6e90f59753c10ff42 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 13 May 2024 23:27:23 +0300 Subject: drm/edid: remove drm_do_get_edid() All users of drm_do_get_edid() have been converted to drm_edid_read_custom(). Remove the unused function to prevent new users from creeping in. Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240513202723.261440-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_edid.c | 28 ---------------------------- include/drm/drm_edid.h | 4 ---- 2 files changed, 32 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 4f54c91b31b2..0f7c4c5b14b9 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2464,34 +2464,6 @@ fail: return NULL; } -/** - * drm_do_get_edid - get EDID data using a custom EDID block read function - * @connector: connector we're probing - * @read_block: EDID block read function - * @context: private data passed to the block read function - * - * When the I2C adapter connected to the DDC bus is hidden behind a device that - * exposes a different interface to read EDID blocks this function can be used - * to get EDID data using a custom block read function. - * - * As in the general case the DDC bus is accessible by the kernel at the I2C - * level, drivers must make all reasonable efforts to expose it as an I2C - * adapter and use drm_get_edid() instead of abusing this function. - * - * The EDID may be overridden using debugfs override_edid or firmware EDID - * (drm_edid_load_firmware() and drm.edid_firmware parameter), in this priority - * order. Having either of them bypasses actual EDID reads. - * - * Return: Pointer to valid EDID or NULL if we couldn't find any. - */ -struct edid *drm_do_get_edid(struct drm_connector *connector, - read_block_fn read_block, - void *context) -{ - return _drm_do_get_edid(connector, read_block, context, NULL); -} -EXPORT_SYMBOL_GPL(drm_do_get_edid); - /** * drm_edid_raw - Get a pointer to the raw EDID data. * @drm_edid: drm_edid container diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index b085525e53e2..6bdfa254a1c1 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -423,10 +423,6 @@ static inline void drm_edid_decode_panel_id(u32 panel_id, char vend[4], u16 *pro } bool drm_probe_ddc(struct i2c_adapter *adapter); -struct edid *drm_do_get_edid(struct drm_connector *connector, - int (*get_edid_block)(void *data, u8 *buf, unsigned int block, - size_t len), - void *data); struct edid *drm_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter); struct edid *drm_get_edid_switcheroo(struct drm_connector *connector, -- cgit v1.2.3 From cdfad4db7756563db7d458216d9e3c2651dddc7d Mon Sep 17 00:00:00 2001 From: Tomasz Rusinowicz Date: Mon, 13 May 2024 14:04:27 +0200 Subject: accel/ivpu: Add NPU profiling support Implement time based Metric Streamer profiling UAPI. This is a generic mechanism allowing user mode tools to sample NPU metrics. These metrics are defined by the FW and transparent to the driver. The user space can check for this feature by checking DRM_IVPU_CAP_METRIC_STREAMER driver capability. Signed-off-by: Tomasz Rusinowicz Signed-off-by: Jacek Lawrynowicz Link: https://patchwork.freedesktop.org/patch/msgid/20240513120431.3187212-9-jacek.lawrynowicz@linux.intel.com --- drivers/accel/ivpu/Makefile | 3 +- drivers/accel/ivpu/ivpu_drv.c | 14 +- drivers/accel/ivpu/ivpu_drv.h | 3 + drivers/accel/ivpu/ivpu_jsm_msg.c | 98 ++++++++++++ drivers/accel/ivpu/ivpu_jsm_msg.h | 8 +- drivers/accel/ivpu/ivpu_ms.c | 309 ++++++++++++++++++++++++++++++++++++++ drivers/accel/ivpu/ivpu_ms.h | 36 +++++ drivers/accel/ivpu/ivpu_pm.c | 4 + include/uapi/drm/ivpu_accel.h | 69 ++++++++- 9 files changed, 540 insertions(+), 4 deletions(-) create mode 100644 drivers/accel/ivpu/ivpu_ms.c create mode 100644 drivers/accel/ivpu/ivpu_ms.h (limited to 'include') diff --git a/drivers/accel/ivpu/Makefile b/drivers/accel/ivpu/Makefile index 95ff7ad16338..1c67a73cfefe 100644 --- a/drivers/accel/ivpu/Makefile +++ b/drivers/accel/ivpu/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2024 Intel Corporation intel_vpu-y := \ ivpu_drv.o \ @@ -13,6 +13,7 @@ intel_vpu-y := \ ivpu_jsm_msg.o \ ivpu_mmu.o \ ivpu_mmu_context.o \ + ivpu_ms.o \ ivpu_pm.o intel_vpu-$(CONFIG_DEBUG_FS) += ivpu_debugfs.o diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c index ca4fcef7edf5..a02a1929f5a1 100644 --- a/drivers/accel/ivpu/ivpu_drv.c +++ b/drivers/accel/ivpu/ivpu_drv.c @@ -26,6 +26,7 @@ #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" #include "ivpu_mmu_context.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" #ifndef DRIVER_VERSION_STR @@ -100,6 +101,7 @@ static void file_priv_release(struct kref *ref) mutex_unlock(&vdev->context_list_lock); pm_runtime_put_autosuspend(vdev->drm.dev); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); } @@ -122,7 +124,7 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param { switch (args->index) { case DRM_IVPU_CAP_METRIC_STREAMER: - args->value = 0; + args->value = 1; break; case DRM_IVPU_CAP_DMA_MEMORY_RANGE: args->value = 1; @@ -231,10 +233,13 @@ static int ivpu_open(struct drm_device *dev, struct drm_file *file) goto err_dev_exit; } + INIT_LIST_HEAD(&file_priv->ms_instance_list); + file_priv->vdev = vdev; file_priv->bound = true; kref_init(&file_priv->ref); mutex_init(&file_priv->lock); + mutex_init(&file_priv->ms_lock); mutex_lock(&vdev->context_list_lock); @@ -263,6 +268,7 @@ err_xa_erase: xa_erase_irq(&vdev->context_xa, ctx_id); err_unlock: mutex_unlock(&vdev->context_list_lock); + mutex_destroy(&file_priv->ms_lock); mutex_destroy(&file_priv->lock); kfree(file_priv); err_dev_exit: @@ -278,6 +284,7 @@ static void ivpu_postclose(struct drm_device *dev, struct drm_file *file) ivpu_dbg(vdev, FILE, "file_priv close: ctx %u process %s pid %d\n", file_priv->ctx.id, current->comm, task_pid_nr(current)); + ivpu_ms_cleanup(file_priv); ivpu_file_priv_put(&file_priv); } @@ -288,6 +295,10 @@ static const struct drm_ioctl_desc ivpu_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(IVPU_BO_INFO, ivpu_bo_info_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_SUBMIT, ivpu_submit_ioctl, 0), DRM_IOCTL_DEF_DRV(IVPU_BO_WAIT, ivpu_bo_wait_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_START, ivpu_ms_start_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_DATA, ivpu_ms_get_data_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_STOP, ivpu_ms_stop_ioctl, 0), + DRM_IOCTL_DEF_DRV(IVPU_METRIC_STREAMER_GET_INFO, ivpu_ms_get_info_ioctl, 0), }; static int ivpu_wait_for_ready(struct ivpu_device *vdev) @@ -638,6 +649,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev) ivpu_prepare_for_reset(vdev); ivpu_shutdown(vdev); + ivpu_ms_cleanup_all(vdev); ivpu_jobs_abort_all(vdev); ivpu_job_done_consumer_fini(vdev); ivpu_pm_cancel_recovery(vdev); diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h index 9e9d85ad78ea..55341762b9d9 100644 --- a/drivers/accel/ivpu/ivpu_drv.h +++ b/drivers/accel/ivpu/ivpu_drv.h @@ -155,6 +155,9 @@ struct ivpu_file_priv { struct mutex lock; /* Protects cmdq */ struct ivpu_cmdq *cmdq[IVPU_NUM_CMDQS_PER_CTX]; struct ivpu_mmu_context ctx; + struct mutex ms_lock; /* Protects ms_instance_list, ms_info_bo */ + struct list_head ms_instance_list; + struct ivpu_bo *ms_info_bo; bool has_mmu_faults; bool bound; }; diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c index 4b260065ad72..e8dd73d947e4 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.c +++ b/drivers/accel/ivpu/ivpu_jsm_msg.c @@ -440,3 +440,101 @@ int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev) return ret; } + +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_START }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.sampling_rate = sampling_rate; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_START_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to start metric streamer: ret %d\n", ret); + return ret; + } + + return ret; +} + +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_STOP }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_stop.metric_group_mask = metric_group_mask; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_STOP_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) + ivpu_warn_ratelimited(vdev, "Failed to stop metric streamer: ret %d\n", ret); + + return ret; +} + +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_UPDATE }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_update.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_update.buffer_addr = buffer_addr; + req.payload.metric_streamer_update.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_UPDATE_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to update metric streamer: ret %d\n", ret); + return ret; + } + + if (buffer_size && resp.payload.metric_streamer_done.bytes_written > buffer_size) { + ivpu_warn_ratelimited(vdev, "MS buffer overflow: bytes_written %#llx > buffer_size %#llx\n", + resp.payload.metric_streamer_done.bytes_written, buffer_size); + return -EOVERFLOW; + } + + *bytes_written = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} + +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size) +{ + struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_METRIC_STREAMER_INFO }; + struct vpu_jsm_msg resp; + int ret; + + req.payload.metric_streamer_start.metric_group_mask = metric_group_mask; + req.payload.metric_streamer_start.buffer_addr = buffer_addr; + req.payload.metric_streamer_start.buffer_size = buffer_size; + + ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_METRIC_STREAMER_INFO_DONE, &resp, + VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm); + if (ret) { + ivpu_warn_ratelimited(vdev, "Failed to get metric streamer info: ret %d\n", ret); + return ret; + } + + if (!resp.payload.metric_streamer_done.sample_size) { + ivpu_warn_ratelimited(vdev, "Invalid sample size\n"); + return -EBADMSG; + } + + if (sample_size) + *sample_size = resp.payload.metric_streamer_done.sample_size; + if (info_size) + *info_size = resp.payload.metric_streamer_done.bytes_written; + + return ret; +} diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h index 357728295fe9..060363409fb3 100644 --- a/drivers/accel/ivpu/ivpu_jsm_msg.h +++ b/drivers/accel/ivpu/ivpu_jsm_msg.h @@ -34,5 +34,11 @@ int ivpu_jsm_hws_set_context_sched_properties(struct ivpu_device *vdev, u32 ctx_ int ivpu_jsm_hws_set_scheduling_log(struct ivpu_device *vdev, u32 engine_idx, u32 host_ssid, u64 vpu_log_buffer_va); int ivpu_jsm_hws_setup_priority_bands(struct ivpu_device *vdev); - +int ivpu_jsm_metric_streamer_start(struct ivpu_device *vdev, u64 metric_group_mask, + u64 sampling_rate, u64 buffer_addr, u64 buffer_size); +int ivpu_jsm_metric_streamer_stop(struct ivpu_device *vdev, u64 metric_group_mask); +int ivpu_jsm_metric_streamer_update(struct ivpu_device *vdev, u64 metric_group_mask, + u64 buffer_addr, u64 buffer_size, u64 *bytes_written); +int ivpu_jsm_metric_streamer_info(struct ivpu_device *vdev, u64 metric_group_mask, u64 buffer_addr, + u64 buffer_size, u32 *sample_size, u64 *info_size); #endif diff --git a/drivers/accel/ivpu/ivpu_ms.c b/drivers/accel/ivpu/ivpu_ms.c new file mode 100644 index 000000000000..2f9d37f5c208 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.c @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +/* + * Copyright (C) 2020-2024 Intel Corporation + */ + +#include + +#include "ivpu_drv.h" +#include "ivpu_gem.h" +#include "ivpu_jsm_msg.h" +#include "ivpu_ms.h" +#include "ivpu_pm.h" + +#define MS_INFO_BUFFER_SIZE SZ_16K +#define MS_NUM_BUFFERS 2 +#define MS_READ_PERIOD_MULTIPLIER 2 +#define MS_MIN_SAMPLE_PERIOD_NS 1000000 + +static struct ivpu_ms_instance * +get_instance_by_mask(struct ivpu_file_priv *file_priv, u64 metric_mask) +{ + struct ivpu_ms_instance *ms; + + lockdep_assert_held(&file_priv->ms_lock); + + list_for_each_entry(ms, &file_priv->ms_instance_list, ms_instance_node) + if (ms->mask == metric_mask) + return ms; + + return NULL; +} + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_start *args = data; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 single_buff_size; + u32 sample_size; + int ret; + + if (!args->metric_group_mask || !args->read_period_samples || + args->sampling_period_ns < MS_MIN_SAMPLE_PERIOD_NS) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + if (get_instance_by_mask(file_priv, args->metric_group_mask)) { + ivpu_err(vdev, "Instance already exists (mask %#llx)\n", args->metric_group_mask); + ret = -EALREADY; + goto unlock; + } + + ms = kzalloc(sizeof(*ms), GFP_KERNEL); + if (!ms) { + ret = -ENOMEM; + goto unlock; + } + + ms->mask = args->metric_group_mask; + + ret = ivpu_jsm_metric_streamer_info(vdev, ms->mask, 0, 0, &sample_size, NULL); + if (ret) + goto err_free_ms; + + single_buff_size = sample_size * + ((u64)args->read_period_samples * MS_READ_PERIOD_MULTIPLIER); + ms->bo = ivpu_bo_create_global(vdev, PAGE_ALIGN(single_buff_size * MS_NUM_BUFFERS), + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + if (!ms->bo) { + ivpu_err(vdev, "Failed to allocate MS buffer (size %llu)\n", single_buff_size); + ret = -ENOMEM; + goto err_free_ms; + } + + ms->buff_size = ivpu_bo_size(ms->bo) / MS_NUM_BUFFERS; + ms->active_buff_vpu_addr = ms->bo->vpu_addr; + ms->inactive_buff_vpu_addr = ms->bo->vpu_addr + ms->buff_size; + ms->active_buff_ptr = ivpu_bo_vaddr(ms->bo); + ms->inactive_buff_ptr = ivpu_bo_vaddr(ms->bo) + ms->buff_size; + + ret = ivpu_jsm_metric_streamer_start(vdev, ms->mask, args->sampling_period_ns, + ms->active_buff_vpu_addr, ms->buff_size); + if (ret) + goto err_free_bo; + + args->sample_size = sample_size; + args->max_data_size = ivpu_bo_size(ms->bo); + list_add_tail(&ms->ms_instance_node, &file_priv->ms_instance_list); + goto unlock; + +err_free_bo: + ivpu_bo_free(ms->bo); +err_free_ms: + kfree(ms); +unlock: + mutex_unlock(&file_priv->ms_lock); + return ret; +} + +static int +copy_leftover_bytes(struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 copy_bytes; + + if (ms->leftover_bytes) { + copy_bytes = min(user_size - *user_bytes_copied, ms->leftover_bytes); + if (copy_to_user(user_ptr + *user_bytes_copied, ms->leftover_addr, copy_bytes)) + return -EFAULT; + + ms->leftover_bytes -= copy_bytes; + ms->leftover_addr += copy_bytes; + *user_bytes_copied += copy_bytes; + } + + return 0; +} + +static int +copy_samples_to_user(struct ivpu_device *vdev, struct ivpu_ms_instance *ms, + void __user *user_ptr, u64 user_size, u64 *user_bytes_copied) +{ + u64 bytes_written; + int ret; + + *user_bytes_copied = 0; + + ret = copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); + if (ret) + return ret; + + if (*user_bytes_copied == user_size) + return 0; + + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, ms->inactive_buff_vpu_addr, + ms->buff_size, &bytes_written); + if (ret) + return ret; + + swap(ms->active_buff_vpu_addr, ms->inactive_buff_vpu_addr); + swap(ms->active_buff_ptr, ms->inactive_buff_ptr); + + ms->leftover_bytes = bytes_written; + ms->leftover_addr = ms->inactive_buff_ptr; + + return copy_leftover_bytes(ms, user_ptr, user_size, user_bytes_copied); +} + +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_ms_instance *ms; + u64 bytes_written; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (!ms) { + ivpu_err(vdev, "Instance doesn't exist for mask: %#llx\n", args->metric_group_mask); + ret = -EINVAL; + goto unlock; + } + + if (!args->buffer_size) { + ret = ivpu_jsm_metric_streamer_update(vdev, ms->mask, 0, 0, &bytes_written); + if (ret) + goto unlock; + args->data_size = bytes_written + ms->leftover_bytes; + goto unlock; + } + + if (!args->buffer_ptr) { + ret = -EINVAL; + goto unlock; + } + + ret = copy_samples_to_user(vdev, ms, u64_to_user_ptr(args->buffer_ptr), + args->buffer_size, &args->data_size); +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +static void free_instance(struct ivpu_file_priv *file_priv, struct ivpu_ms_instance *ms) +{ + lockdep_assert_held(&file_priv->ms_lock); + + list_del(&ms->ms_instance_node); + ivpu_jsm_metric_streamer_stop(file_priv->vdev, ms->mask); + ivpu_bo_free(ms->bo); + kfree(ms); +} + +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct ivpu_file_priv *file_priv = file->driver_priv; + struct drm_ivpu_metric_streamer_stop *args = data; + struct ivpu_ms_instance *ms; + + if (!args->metric_group_mask) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + ms = get_instance_by_mask(file_priv, args->metric_group_mask); + if (ms) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); + + return ms ? 0 : -EINVAL; +} + +static inline struct ivpu_bo *get_ms_info_bo(struct ivpu_file_priv *file_priv) +{ + lockdep_assert_held(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) + return file_priv->ms_info_bo; + + file_priv->ms_info_bo = ivpu_bo_create_global(file_priv->vdev, MS_INFO_BUFFER_SIZE, + DRM_IVPU_BO_CACHED | DRM_IVPU_BO_MAPPABLE); + return file_priv->ms_info_bo; +} + +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_ivpu_metric_streamer_get_data *args = data; + struct ivpu_file_priv *file_priv = file->driver_priv; + struct ivpu_device *vdev = file_priv->vdev; + struct ivpu_bo *bo; + u64 info_size; + int ret; + + if (!args->metric_group_mask) + return -EINVAL; + + if (!args->buffer_size) + return ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, + 0, 0, NULL, &args->data_size); + if (!args->buffer_ptr) + return -EINVAL; + + mutex_lock(&file_priv->ms_lock); + + bo = get_ms_info_bo(file_priv); + if (!bo) { + ret = -ENOMEM; + goto unlock; + } + + ret = ivpu_jsm_metric_streamer_info(vdev, args->metric_group_mask, bo->vpu_addr, + ivpu_bo_size(bo), NULL, &info_size); + if (ret) + goto unlock; + + if (args->buffer_size < info_size) { + ret = -ENOSPC; + goto unlock; + } + + if (copy_to_user(u64_to_user_ptr(args->buffer_ptr), ivpu_bo_vaddr(bo), info_size)) + ret = -EFAULT; + + args->data_size = info_size; +unlock: + mutex_unlock(&file_priv->ms_lock); + + return ret; +} + +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv) +{ + struct ivpu_ms_instance *ms, *tmp; + + mutex_lock(&file_priv->ms_lock); + + if (file_priv->ms_info_bo) { + ivpu_bo_free(file_priv->ms_info_bo); + file_priv->ms_info_bo = NULL; + } + + list_for_each_entry_safe(ms, tmp, &file_priv->ms_instance_list, ms_instance_node) + free_instance(file_priv, ms); + + mutex_unlock(&file_priv->ms_lock); +} + +void ivpu_ms_cleanup_all(struct ivpu_device *vdev) +{ + struct ivpu_file_priv *file_priv; + unsigned long ctx_id; + + mutex_lock(&vdev->context_list_lock); + + xa_for_each(&vdev->context_xa, ctx_id, file_priv) + ivpu_ms_cleanup(file_priv); + + mutex_unlock(&vdev->context_list_lock); +} diff --git a/drivers/accel/ivpu/ivpu_ms.h b/drivers/accel/ivpu/ivpu_ms.h new file mode 100644 index 000000000000..fbd5ebebc3d9 --- /dev/null +++ b/drivers/accel/ivpu/ivpu_ms.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ +/* + * Copyright (C) 2020-2024 Intel Corporation + */ +#ifndef __IVPU_MS_H__ +#define __IVPU_MS_H__ + +#include + +struct drm_device; +struct drm_file; +struct ivpu_bo; +struct ivpu_device; +struct ivpu_file_priv; + +struct ivpu_ms_instance { + struct ivpu_bo *bo; + struct list_head ms_instance_node; + u64 mask; + u64 buff_size; + u64 active_buff_vpu_addr; + u64 inactive_buff_vpu_addr; + void *active_buff_ptr; + void *inactive_buff_ptr; + u64 leftover_bytes; + void *leftover_addr; +}; + +int ivpu_ms_start_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_stop_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_data_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +int ivpu_ms_get_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file); +void ivpu_ms_cleanup(struct ivpu_file_priv *file_priv); +void ivpu_ms_cleanup_all(struct ivpu_device *vdev); + +#endif /* __IVPU_MS_H__ */ diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c index 4f5ea466731f..7b2aa205fdec 100644 --- a/drivers/accel/ivpu/ivpu_pm.c +++ b/drivers/accel/ivpu/ivpu_pm.c @@ -18,6 +18,7 @@ #include "ivpu_job.h" #include "ivpu_jsm_msg.h" #include "ivpu_mmu.h" +#include "ivpu_ms.h" #include "ivpu_pm.h" static bool ivpu_disable_recovery; @@ -131,6 +132,7 @@ static void ivpu_pm_recovery_work(struct work_struct *work) ivpu_suspend(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); ret = ivpu_resume(vdev); if (ret) @@ -333,6 +335,8 @@ void ivpu_pm_reset_prepare_cb(struct pci_dev *pdev) ivpu_hw_reset(vdev); ivpu_pm_prepare_cold_boot(vdev); ivpu_jobs_abort_all(vdev); + ivpu_ms_cleanup_all(vdev); + ivpu_dbg(vdev, PM, "Pre-reset done.\n"); } diff --git a/include/uapi/drm/ivpu_accel.h b/include/uapi/drm/ivpu_accel.h index 19a13468eca5..084fb529e1e9 100644 --- a/include/uapi/drm/ivpu_accel.h +++ b/include/uapi/drm/ivpu_accel.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation */ #ifndef __UAPI_IVPU_DRM_H__ @@ -21,6 +21,10 @@ extern "C" { #define DRM_IVPU_BO_INFO 0x03 #define DRM_IVPU_SUBMIT 0x05 #define DRM_IVPU_BO_WAIT 0x06 +#define DRM_IVPU_METRIC_STREAMER_START 0x07 +#define DRM_IVPU_METRIC_STREAMER_STOP 0x08 +#define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09 +#define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a #define DRM_IOCTL_IVPU_GET_PARAM \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param) @@ -40,6 +44,22 @@ extern "C" { #define DRM_IOCTL_IVPU_BO_WAIT \ DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait) +#define DRM_IOCTL_IVPU_METRIC_STREAMER_START \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START, \ + struct drm_ivpu_metric_streamer_start) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP \ + DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP, \ + struct drm_ivpu_metric_streamer_stop) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA, \ + struct drm_ivpu_metric_streamer_get_data) + +#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO \ + DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO, \ + struct drm_ivpu_metric_streamer_get_data) + /** * DOC: contexts * @@ -336,6 +356,53 @@ struct drm_ivpu_bo_wait { __u32 pad; }; +/** + * struct drm_ivpu_metric_streamer_start - Start collecting metric data + */ +struct drm_ivpu_metric_streamer_start { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @sampling_period_ns: Sampling period in nanoseconds */ + __u64 sampling_period_ns; + /** + * @read_period_samples: + * + * Number of samples after which user space will try to read the data. + * Reading the data after significantly longer period may cause data loss. + */ + __u32 read_period_samples; + /** @sample_size: Returned size of a single sample in bytes */ + __u32 sample_size; + /** @max_data_size: Returned max @data_size from %DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA */ + __u32 max_data_size; +}; + +/** + * struct drm_ivpu_metric_streamer_get_data - Copy collected metric data + */ +struct drm_ivpu_metric_streamer_get_data { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; + /** @buffer_ptr: A pointer to a destination for the copied data */ + __u64 buffer_ptr; + /** @buffer_size: Size of the destination buffer */ + __u64 buffer_size; + /** + * @data_size: Returned size of copied metric data + * + * If the @buffer_size is zero, returns the amount of data ready to be copied. + */ + __u64 data_size; +}; + +/** + * struct drm_ivpu_metric_streamer_stop - Stop collecting metric data + */ +struct drm_ivpu_metric_streamer_stop { + /** @metric_group_mask: Indicates metric streamer instance */ + __u64 metric_group_mask; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 0b03829fdece47beba9ecb7dbcbde4585ee3663e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:51 -0700 Subject: drm/mipi-dsi: Fix theoretical int overflow in mipi_dsi_dcs_write_seq() The mipi_dsi_dcs_write_seq() macro makes a call to mipi_dsi_dcs_write_buffer() which returns a type ssize_t. The macro then stores it in an int and checks to see if it's negative. This could theoretically be a problem if "ssize_t" is larger than "int". To see the issue, imagine that "ssize_t" is 32-bits and "int" is 16-bits, you could see a problem if there was some code out there that looked like: mipi_dsi_dcs_write_seq(dsi, cmd, <32767 bytes as arguments>); ...since we'd get back that 32768 bytes were transferred and 32768 stored in a 16-bit int would look negative. Though there are no callsites where we'd actually hit this (even if "int" was only 16-bit), it's cleaner to make the types match so let's fix it. Fixes: 2a9e9daf7523 ("drm/mipi-dsi: Introduce mipi_dsi_dcs_write_seq macro") Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.1.I30fa4c8348ea316c886ef8a522a52fed617f930d@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.1.I30fa4c8348ea316c886ef8a522a52fed617f930d@changeid --- include/drm/drm_mipi_dsi.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 82b1cc434ea3..70ce0b8cbc68 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -333,18 +333,18 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * @cmd: Command * @seq: buffer containing data to be transmitted */ -#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ - do { \ - static const u8 d[] = { cmd, seq }; \ - struct device *dev = &dsi->dev; \ - int ret; \ - ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err_ratelimited( \ - dev, "sending command %#02x failed: %d\n", \ - cmd, ret); \ - return ret; \ - } \ +#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ + do { \ + static const u8 d[] = { cmd, seq }; \ + struct device *dev = &dsi->dev; \ + ssize_t ret; \ + ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) { \ + dev_err_ratelimited( \ + dev, "sending command %#02x failed: %zd\n", \ + cmd, ret); \ + return ret; \ + } \ } while (0) /** -- cgit v1.2.3 From 24acbcce5cc673886c2f4f9b3f6f89a9c6a53b7e Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:52 -0700 Subject: drm/mipi-dsi: Fix theoretical int overflow in mipi_dsi_generic_write_seq() The mipi_dsi_generic_write_seq() macro makes a call to mipi_dsi_generic_write() which returns a type ssize_t. The macro then stores it in an int and checks to see if it's negative. This could theoretically be a problem if "ssize_t" is larger than "int". To see the issue, imagine that "ssize_t" is 32-bits and "int" is 16-bits, you could see a problem if there was some code out there that looked like: mipi_dsi_generic_write_seq(dsi, <32768 bytes as arguments>); ...since we'd get back that 32768 bytes were transferred and 32768 stored in a 16-bit int would look negative. Though there are no callsites where we'd actually hit this (even if "int" was only 16-bit), it's cleaner to make the types match so let's fix it. Fixes: a9015ce59320 ("drm/mipi-dsi: Add a mipi_dsi_dcs_write_seq() macro") Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.2.Iadb65b8add19ed3ae3ed6425011beb97e380a912@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.2.Iadb65b8add19ed3ae3ed6425011beb97e380a912@changeid --- include/drm/drm_mipi_dsi.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 70ce0b8cbc68..e0f56564bf97 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -314,17 +314,17 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * @dsi: DSI peripheral device * @seq: buffer containing the payload */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - struct device *dev = &dsi->dev; \ - int ret; \ - ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err_ratelimited(dev, "transmit data failed: %d\n", \ - ret); \ - return ret; \ - } \ +#define mipi_dsi_generic_write_seq(dsi, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + struct device *dev = &dsi->dev; \ + ssize_t ret; \ + ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) { \ + dev_err_ratelimited(dev, "transmit data failed: %zd\n", \ + ret); \ + return ret; \ + } \ } while (0) /** -- cgit v1.2.3 From 7d3f6acaf87c7db6dcd868694a2f65e7040478dc Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:53 -0700 Subject: drm/mipi-dsi: mipi_dsi_*_write functions don't need to ratelimit prints We really don't expect these errors to be printed over and over again. When a driver hits the error it should bail out. Just use a normal error print. This gives a nice space savings for users of these functions: $ scripts/bloat-o-meter \ .../before/panel-novatek-nt36672e.ko \ .../after/panel-novatek-nt36672e.ko add/remove: 0/1 grow/shrink: 0/1 up/down: 0/-16760 (-16760) Function old new delta nt36672e_1080x2408_60hz_init 17080 10640 -6440 nt36672e_1080x2408_60hz_init._rs 10320 - -10320 Total: Before=31815, After=15055, chg -52.68% Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.3.I9982cd5d8014de7a4513f5619f66f88da49ce4ec@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.3.I9982cd5d8014de7a4513f5619f66f88da49ce4ec@changeid --- include/drm/drm_mipi_dsi.h | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index e0f56564bf97..67967be48dbd 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -314,17 +314,16 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * @dsi: DSI peripheral device * @seq: buffer containing the payload */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - struct device *dev = &dsi->dev; \ - ssize_t ret; \ - ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err_ratelimited(dev, "transmit data failed: %zd\n", \ - ret); \ - return ret; \ - } \ +#define mipi_dsi_generic_write_seq(dsi, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + struct device *dev = &dsi->dev; \ + ssize_t ret; \ + ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) { \ + dev_err(dev, "transmit data failed: %zd\n", ret); \ + return ret; \ + } \ } while (0) /** @@ -340,8 +339,7 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, ssize_t ret; \ ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ if (ret < 0) { \ - dev_err_ratelimited( \ - dev, "sending command %#02x failed: %zd\n", \ + dev_err(dev, "sending command %#02x failed: %zd\n", \ cmd, ret); \ return ret; \ } \ -- cgit v1.2.3 From 3b724909a380fddb44dfa0072fc459c698a52658 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:54 -0700 Subject: drm/mipi-dsi: Reduce driver bloat of mipi_dsi_*_write_seq() Through a cooperative effort between Hsin-Yi Wang and Dmitry Baryshkov, we have realized the dev_err() in the mipi_dsi_*_write_seq() macros was causing quite a bit of bloat to the kernel. Let's hoist this call into drm_mipi_dsi.c by adding a "chatty" version of the functions that includes the print. While doing this, add a bit more comments to these macros making it clear that they print errors and also that they return out of _the caller's_ function. Without any changes to clients this gives a nice savings. Specifically the macro was inlined and thus the error report call was inlined into every call to mipi_dsi_dcs_write_seq() and mipi_dsi_generic_write_seq(). By using a call to a "chatty" function, the usage is reduced to one call in the chatty function and a function call at the invoking site. Building with my build system shows one example: $ scripts/bloat-o-meter \ .../before/panel-novatek-nt36672e.ko \ .../after/panel-novatek-nt36672e.ko add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-4404 (-4404) Function old new delta nt36672e_1080x2408_60hz_init 10640 6236 -4404 Total: Before=15055, After=10651, chg -29.25% Note that given the change in location of the print it's harder to include the "cmd" in the printout for mipi_dsi_dcs_write_seq() since, theoretically, someone could call the new chatty function with a zero-size array and it would be illegal to dereference data[0]. There's a printk format to print the whole buffer and this is probably more useful for debugging anyway. Given that we're doing this for mipi_dsi_dcs_write_seq(), let's also print the buffer for mipi_dsi_generic_write_seq() in the error case. It should be noted that the current consensus of DRM folks is that the mipi_dsi_*_write_seq() should be deprecated due to the non-intuitive return behavior. A future patch will formally mark them as deprecated and provide an alternative. Reviewed-by: Dmitry Baryshkov Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.4.Id15fae80582bc74a0d4f1338987fa375738f45b9@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.4.Id15fae80582bc74a0d4f1338987fa375738f45b9@changeid --- drivers/gpu/drm/drm_mipi_dsi.c | 56 ++++++++++++++++++++++++++++++++++++++++++ include/drm/drm_mipi_dsi.h | 47 +++++++++++++++++++---------------- 2 files changed, 82 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 795001bb7ff1..4d2685d5a6e0 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -764,6 +764,34 @@ ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, } EXPORT_SYMBOL(mipi_dsi_generic_write); +/** + * mipi_dsi_generic_write_chatty() - mipi_dsi_generic_write() w/ an error log + * @dsi: DSI peripheral device + * @payload: buffer containing the payload + * @size: size of payload buffer + * + * Like mipi_dsi_generic_write() but includes a dev_err() + * call for you and returns 0 upon success, not the number of bytes sent. + * + * Return: 0 on success or a negative error code on failure. + */ +int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, + const void *payload, size_t size) +{ + struct device *dev = &dsi->dev; + ssize_t ret; + + ret = mipi_dsi_generic_write(dsi, payload, size); + if (ret < 0) { + dev_err(dev, "sending generic data %*ph failed: %zd\n", + (int)size, payload, ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(mipi_dsi_generic_write_chatty); + /** * mipi_dsi_generic_read() - receive data using a generic read packet * @dsi: DSI peripheral device @@ -852,6 +880,34 @@ ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi, } EXPORT_SYMBOL(mipi_dsi_dcs_write_buffer); +/** + * mipi_dsi_dcs_write_buffer_chatty - mipi_dsi_dcs_write_buffer() w/ an error log + * @dsi: DSI peripheral device + * @data: buffer containing data to be transmitted + * @len: size of transmission buffer + * + * Like mipi_dsi_dcs_write_buffer() but includes a dev_err() + * call for you and returns 0 upon success, not the number of bytes sent. + * + * Return: 0 on success or a negative error code on failure. + */ +int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, + const void *data, size_t len) +{ + struct device *dev = &dsi->dev; + ssize_t ret; + + ret = mipi_dsi_dcs_write_buffer(dsi, data, len); + if (ret < 0) { + dev_err(dev, "sending dcs data %*ph failed: %zd\n", + (int)len, data, ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL(mipi_dsi_dcs_write_buffer_chatty); + /** * mipi_dsi_dcs_write() - send DCS write command * @dsi: DSI peripheral device diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 67967be48dbd..6d68d9927f46 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -256,6 +256,8 @@ int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); +int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, + const void *payload, size_t size); ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); @@ -279,6 +281,8 @@ enum mipi_dsi_dcs_tear_mode { ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi, const void *data, size_t len); +int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, + const void *data, size_t len); ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd, const void *data, size_t len); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, @@ -311,38 +315,39 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet + * + * This macro will print errors for you and will RETURN FROM THE CALLING + * FUNCTION (yes this is non-intuitive) upon error. + * * @dsi: DSI peripheral device * @seq: buffer containing the payload */ -#define mipi_dsi_generic_write_seq(dsi, seq...) \ - do { \ - static const u8 d[] = { seq }; \ - struct device *dev = &dsi->dev; \ - ssize_t ret; \ - ret = mipi_dsi_generic_write(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err(dev, "transmit data failed: %zd\n", ret); \ - return ret; \ - } \ +#define mipi_dsi_generic_write_seq(dsi, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + int ret; \ + ret = mipi_dsi_generic_write_chatty(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) \ + return ret; \ } while (0) /** * mipi_dsi_dcs_write_seq - transmit a DCS command with payload + * + * This macro will print errors for you and will RETURN FROM THE CALLING + * FUNCTION (yes this is non-intuitive) upon error. + * * @dsi: DSI peripheral device * @cmd: Command * @seq: buffer containing data to be transmitted */ -#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ - do { \ - static const u8 d[] = { cmd, seq }; \ - struct device *dev = &dsi->dev; \ - ssize_t ret; \ - ret = mipi_dsi_dcs_write_buffer(dsi, d, ARRAY_SIZE(d)); \ - if (ret < 0) { \ - dev_err(dev, "sending command %#02x failed: %zd\n", \ - cmd, ret); \ - return ret; \ - } \ +#define mipi_dsi_dcs_write_seq(dsi, cmd, seq...) \ + do { \ + static const u8 d[] = { cmd, seq }; \ + int ret; \ + ret = mipi_dsi_dcs_write_buffer_chatty(dsi, d, ARRAY_SIZE(d)); \ + if (ret < 0) \ + return ret; \ } while (0) /** -- cgit v1.2.3 From 966e397e4f6032b73438f8d775756541513e7daf Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 14 May 2024 10:20:55 -0700 Subject: drm/mipi-dsi: Introduce mipi_dsi_*_write_seq_multi() The current mipi_dsi_*_write_seq() macros are non-intutitive because they contain a hidden "return" statement that will return out of the _caller_ of the macro. Let's mark them as deprecated and instead introduce some new macros that are more intuitive. These new macros are less optimal when an error occurs but should behave more optimally when there is no error. Specifically these new macros cause smaller code to get generated and the code size savings (less to fetch from RAM, less cache space used, less RAM used) are important. Since the error case isn't something we need to optimize for and these new macros are easier to understand and more flexible, they should be used. After converting to use these new functions, one example shows some nice savings while also being easier to understand. $ scripts/bloat-o-meter \ ...after/panel-novatek-nt36672e.ko \ ...ctx/panel-novatek-nt36672e.ko add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-988 (-988) Function old new delta nt36672e_1080x2408_60hz_init 6236 5248 -988 Total: Before=10651, After=9663, chg -9.28% Reviewed-by: Neil Armstrong Reviewed-by: Linus Walleij Signed-off-by: Douglas Anderson Link: https://lore.kernel.org/r/20240514102056.v5.5.Ie94246c30fe95101e0e26dd5f96e976dbeb8f242@changeid Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240514102056.v5.5.Ie94246c30fe95101e0e26dd5f96e976dbeb8f242@changeid --- Documentation/gpu/todo.rst | 18 ++++++++++++ drivers/gpu/drm/drm_mipi_dsi.c | 56 ++++++++++++++++++++++++++++++++++++++ include/drm/drm_mipi_dsi.h | 62 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 136 insertions(+) (limited to 'include') diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index e2a0585915b3..2734b8a34541 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -494,6 +494,24 @@ Contact: Douglas Anderson Level: Starter/Intermediate +Transition away from using mipi_dsi_*_write_seq() +------------------------------------------------- + +The macros mipi_dsi_generic_write_seq() and mipi_dsi_dcs_write_seq() are +non-intuitive because, if there are errors, they return out of the *caller's* +function. We should move all callers to use mipi_dsi_generic_write_seq_multi() +and mipi_dsi_dcs_write_seq_multi() macros instead. + +Once all callers are transitioned, the macros and the functions that they call, +mipi_dsi_generic_write_chatty() and mipi_dsi_dcs_write_buffer_chatty(), can +probably be removed. Alternatively, if people feel like the _multi() variants +are overkill for some use cases, we could keep the mipi_dsi_*_write_seq() +variants but change them not to return out of the caller. + +Contact: Douglas Anderson + +Level: Starter + Core refactorings ================= diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 4d2685d5a6e0..26c7383406c1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -792,6 +792,34 @@ int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, } EXPORT_SYMBOL(mipi_dsi_generic_write_chatty); +/** + * mipi_dsi_generic_write_multi() - mipi_dsi_generic_write_chatty() w/ accum_err + * @ctx: Context for multiple DSI transactions + * @payload: buffer containing the payload + * @size: size of payload buffer + * + * Like mipi_dsi_generic_write_chatty() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, + const void *payload, size_t size) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_generic_write(dsi, payload, size); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending generic data %*ph failed: %d\n", + (int)size, payload, ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_generic_write_multi); + /** * mipi_dsi_generic_read() - receive data using a generic read packet * @dsi: DSI peripheral device @@ -908,6 +936,34 @@ int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, } EXPORT_SYMBOL(mipi_dsi_dcs_write_buffer_chatty); +/** + * mipi_dsi_dcs_write_buffer_multi - mipi_dsi_dcs_write_buffer_chatty() w/ accum_err + * @ctx: Context for multiple DSI transactions + * @data: buffer containing data to be transmitted + * @len: size of transmission buffer + * + * Like mipi_dsi_dcs_write_buffer_chatty() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, + const void *data, size_t len) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_write_buffer(dsi, data, len); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending dcs data %*ph failed: %d\n", + (int)len, data, ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_write_buffer_multi); + /** * mipi_dsi_dcs_write() - send DCS write command * @dsi: DSI peripheral device diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 6d68d9927f46..5e9cad541bd6 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -197,6 +197,27 @@ struct mipi_dsi_device { struct drm_dsc_config *dsc; }; +/** + * struct mipi_dsi_multi_context - Context to call multiple MIPI DSI funcs in a row + */ +struct mipi_dsi_multi_context { + /** + * @dsi: Pointer to the MIPI DSI device + */ + struct mipi_dsi_device *dsi; + + /** + * @accum_err: Storage for the accumulated error over the multiple calls + * + * Init to 0. If a function encounters an error then the error code + * will be stored here. If you call a function and this points to a + * non-zero value then the function will be a noop. This allows calling + * a function many times in a row and just checking the error at the + * end to see if any of them failed. + */ + int accum_err; +}; + #define MIPI_DSI_MODULE_PREFIX "mipi-dsi:" #define to_mipi_dsi_device(__dev) container_of_const(__dev, struct mipi_dsi_device, dev) @@ -258,6 +279,8 @@ ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, const void *payload, size_t size); +void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, + const void *payload, size_t size); ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); @@ -283,6 +306,8 @@ ssize_t mipi_dsi_dcs_write_buffer(struct mipi_dsi_device *dsi, const void *data, size_t len); int mipi_dsi_dcs_write_buffer_chatty(struct mipi_dsi_device *dsi, const void *data, size_t len); +void mipi_dsi_dcs_write_buffer_multi(struct mipi_dsi_multi_context *ctx, + const void *data, size_t len); ssize_t mipi_dsi_dcs_write(struct mipi_dsi_device *dsi, u8 cmd, const void *data, size_t len); ssize_t mipi_dsi_dcs_read(struct mipi_dsi_device *dsi, u8 cmd, void *data, @@ -319,6 +344,9 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, * This macro will print errors for you and will RETURN FROM THE CALLING * FUNCTION (yes this is non-intuitive) upon error. * + * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED. + * Please replace calls of it with mipi_dsi_generic_write_seq_multi(). + * * @dsi: DSI peripheral device * @seq: buffer containing the payload */ @@ -331,12 +359,30 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, return ret; \ } while (0) +/** + * mipi_dsi_generic_write_seq_multi - transmit data using a generic write packet + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @ctx: Context for multiple DSI transactions + * @seq: buffer containing the payload + */ +#define mipi_dsi_generic_write_seq_multi(ctx, seq...) \ + do { \ + static const u8 d[] = { seq }; \ + mipi_dsi_generic_write_multi(ctx, d, ARRAY_SIZE(d)); \ + } while (0) + /** * mipi_dsi_dcs_write_seq - transmit a DCS command with payload * * This macro will print errors for you and will RETURN FROM THE CALLING * FUNCTION (yes this is non-intuitive) upon error. * + * Because of the non-intuitive return behavior, THIS MACRO IS DEPRECATED. + * Please replace calls of it with mipi_dsi_dcs_write_seq_multi(). + * * @dsi: DSI peripheral device * @cmd: Command * @seq: buffer containing data to be transmitted @@ -350,6 +396,22 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, return ret; \ } while (0) +/** + * mipi_dsi_dcs_write_seq_multi - transmit a DCS command with payload + * + * This macro will print errors for you and error handling is optimized for + * callers that call this multiple times in a row. + * + * @ctx: Context for multiple DSI transactions + * @cmd: Command + * @seq: buffer containing data to be transmitted + */ +#define mipi_dsi_dcs_write_seq_multi(ctx, cmd, seq...) \ + do { \ + static const u8 d[] = { cmd, seq }; \ + mipi_dsi_dcs_write_buffer_multi(ctx, d, ARRAY_SIZE(d)); \ + } while (0) + /** * struct mipi_dsi_driver - DSI driver * @driver: device driver model driver -- cgit v1.2.3 From 0f1bb41bf39695c84c83ce6f69e125b562d1d7ab Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 14 May 2024 15:59:39 +0100 Subject: drm/i915: Support replaying GPU hangs with captured context image When debugging GPU hangs Mesa developers are finding it useful to replay the captured error state against the simulator. But due various simulator limitations which prevent replicating all hangs, one step further is being able to replay against a real GPU. This is almost doable today with the missing part being able to upload the captured context image into the driver state prior to executing the uploaded hanging batch and all the buffers. To enable this last part we add a new context parameter called I915_CONTEXT_PARAM_CONTEXT_IMAGE. It follows the existing SSEU configuration pattern of being able to select which context to apply against, paired with the actual image and its size. Since this is adding a new concept of debug only uapi, we hide it behind a new kconfig option and also require activation with a module parameter. Together with a warning banner printed at driver load, all those combined should be sufficient to guard against inadvertently enabling the feature. In terms of implementation we allow the legacy context set param to be used since that removes the need to record the per context data in the proto context, while still allowing flexibility of specifying context images for any context. Mesa MR using the uapi can be seen at: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27594 v2: * Fix whitespace alignment as per checkpatch. * Added warning on userspace misuse. * Rebase for extracting ce->default_state shadowing. v3: * Rebase for I915_CONTEXT_PARAM_LOW_LATENCY. Signed-off-by: Tvrtko Ursulin Cc: Lionel Landwerlin Cc: Carlos Santa Cc: Rodrigo Vivi Reviewed-by: Rodrigo Vivi Tested-by: Carlos Santa Signed-off-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20240514145939.87427-2-tursulin@igalia.com --- drivers/gpu/drm/i915/Kconfig.debug | 17 ++++ drivers/gpu/drm/i915/gem/i915_gem_context.c | 113 ++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/intel_context.c | 2 + drivers/gpu/drm/i915/gt/intel_context.h | 22 +++++ drivers/gpu/drm/i915/gt/intel_context_types.h | 1 + drivers/gpu/drm/i915/gt/intel_lrc.c | 3 +- drivers/gpu/drm/i915/gt/intel_ring_submission.c | 3 +- drivers/gpu/drm/i915/i915_params.c | 5 ++ drivers/gpu/drm/i915/i915_params.h | 3 +- include/uapi/drm/i915_drm.h | 27 ++++++ 10 files changed, 193 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index d8397065c3f0..1852e0804942 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -16,6 +16,23 @@ config DRM_I915_WERROR If in doubt, say "N". +config DRM_I915_REPLAY_GPU_HANGS_API + bool "Enable GPU hang replay userspace API" + depends on DRM_I915 + depends on EXPERT + default n + help + Choose this option if you want to enable special and unstable + userspace API used for replaying GPU hangs on a running system. + + This API is intended to be used by userspace graphics stack developers + and provides no stability guarantees. + + The API needs to be activated at boot time using the + enable_debug_only_api module parameter. + + If in doubt, say "N". + config DRM_I915_DEBUG bool "Enable additional driver debugging" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 81f65cab1330..c0543c35cd6a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -78,6 +78,7 @@ #include "gt/intel_engine_user.h" #include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" +#include "gt/shmem_utils.h" #include "pxp/intel_pxp.h" @@ -957,6 +958,7 @@ static int set_proto_ctx_param(struct drm_i915_file_private *fpriv, case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: default: ret = -EINVAL; break; @@ -2104,6 +2106,95 @@ static int get_protected(struct i915_gem_context *ctx, return 0; } +static int set_context_image(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct i915_gem_context_param_context_image user; + struct intel_context *ce; + struct file *shmem_state; + unsigned long lookup; + void *state; + int ret = 0; + + if (!IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) + return -EINVAL; + + if (!ctx->i915->params.enable_debug_only_api) + return -EINVAL; + + if (args->size < sizeof(user)) + return -EINVAL; + + if (copy_from_user(&user, u64_to_user_ptr(args->value), sizeof(user))) + return -EFAULT; + + if (user.mbz) + return -EINVAL; + + if (user.flags & ~(I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX)) + return -EINVAL; + + lookup = 0; + if (user.flags & I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX) + lookup |= LOOKUP_USER_INDEX; + + ce = lookup_user_engine(ctx, lookup, &user.engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + if (user.size < ce->engine->context_size) { + ret = -EINVAL; + goto out_ce; + } + + if (drm_WARN_ON_ONCE(&ctx->i915->drm, + test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) { + /* + * This is racy but for a debug only API, if userspace is keen + * to create and configure contexts, while simultaneously using + * them from a second thread, let them suffer by potentially not + * executing with the context image they just raced to apply. + */ + ret = -EBUSY; + goto out_ce; + } + + state = kmalloc(ce->engine->context_size, GFP_KERNEL); + if (!state) { + ret = -ENOMEM; + goto out_ce; + } + + if (copy_from_user(state, u64_to_user_ptr(user.image), + ce->engine->context_size)) { + ret = -EFAULT; + goto out_state; + } + + shmem_state = shmem_create_from_data(ce->engine->name, + state, ce->engine->context_size); + if (IS_ERR(shmem_state)) { + ret = PTR_ERR(shmem_state); + goto out_state; + } + + if (intel_context_set_own_state(ce)) { + ret = -EBUSY; + fput(shmem_state); + goto out_state; + } + + ce->default_state = shmem_state; + + args->size = sizeof(user); + +out_state: + kfree(state); +out_ce: + intel_context_put(ce); + return ret; +} + static int ctx_setparam(struct drm_i915_file_private *fpriv, struct i915_gem_context *ctx, struct drm_i915_gem_context_param *args) @@ -2156,6 +2247,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv, ret = set_persistence(ctx, args); break; + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: + ret = set_context_image(ctx, args); + break; + case I915_CONTEXT_PARAM_PROTECTED_CONTENT: case I915_CONTEXT_PARAM_NO_ZEROMAP: case I915_CONTEXT_PARAM_BAN_PERIOD: @@ -2500,6 +2595,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_BAN_PERIOD: case I915_CONTEXT_PARAM_ENGINES: case I915_CONTEXT_PARAM_RINGSIZE: + case I915_CONTEXT_PARAM_CONTEXT_IMAGE: default: ret = -EINVAL; break; @@ -2612,5 +2708,22 @@ int __init i915_gem_context_module_init(void) if (!slab_luts) return -ENOMEM; + if (IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API)) { + pr_notice("**************************************************************\n"); + pr_notice("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_notice("** **\n"); + if (i915_modparams.enable_debug_only_api) + pr_notice("** i915.enable_debug_only_api is intended to be set **\n"); + else + pr_notice("** CONFIG_DRM_I915_REPLAY_GPU_HANGS_API builds are intended **\n"); + pr_notice("** for specific userspace graphics stack developers only! **\n"); + pr_notice("** **\n"); + pr_notice("** If you are seeing this message please report this to the **\n"); + pr_notice("** provider of your kernel build. **\n"); + pr_notice("** **\n"); + pr_notice("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); + pr_notice("**************************************************************\n"); + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index a2f1245741bb..b1b8695ba7c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -27,6 +27,8 @@ static void rcu_context_free(struct rcu_head *rcu) struct intel_context *ce = container_of(rcu, typeof(*ce), rcu); trace_intel_context_free(ce); + if (intel_context_has_own_state(ce)) + fput(ce->default_state); kmem_cache_free(slab_ce, ce); } diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 25564c01507e..9f523999acd1 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -375,6 +375,28 @@ intel_context_clear_nopreempt(struct intel_context *ce) clear_bit(CONTEXT_NOPREEMPT, &ce->flags); } +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return test_bit(CONTEXT_OWN_STATE, &ce->flags); +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return test_and_set_bit(CONTEXT_OWN_STATE, &ce->flags); +} +#else +static inline bool intel_context_has_own_state(const struct intel_context *ce) +{ + return false; +} + +static inline bool intel_context_set_own_state(struct intel_context *ce) +{ + return true; +} +#endif + u64 intel_context_get_total_runtime_ns(struct intel_context *ce); u64 intel_context_get_avg_runtime_ns(struct intel_context *ce); diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 6ae8abfeccdb..98c7f6052069 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -133,6 +133,7 @@ struct intel_context { #define CONTEXT_IS_PARKING 12 #define CONTEXT_EXITING 13 #define CONTEXT_LOW_LATENCY 14 +#define CONTEXT_OWN_STATE 15 struct { u64 timeout_us; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index d4ffb352403c..7bd5d2c29056 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1130,7 +1130,8 @@ int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) GEM_BUG_ON(ce->state); - ce->default_state = engine->default_state; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; vma = __lrc_alloc_state(ce, engine); if (IS_ERR(vma)) diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index 8625e88e785f..72277bc8322e 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -569,7 +569,8 @@ static int ring_context_alloc(struct intel_context *ce) { struct intel_engine_cs *engine = ce->engine; - ce->default_state = engine->default_state; + if (!intel_context_has_own_state(ce)) + ce->default_state = engine->default_state; /* One ringbuffer to rule them all */ GEM_BUG_ON(!engine->legacy.ring); diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 8c00169e3ab7..316e55f3e87b 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -131,6 +131,11 @@ i915_param_named_unsafe(lmem_size, uint, 0400, i915_param_named_unsafe(lmem_bar_size, uint, 0400, "Set the lmem bar size(in MiB)."); +#if IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) +i915_param_named(enable_debug_only_api, bool, 0400, + "Enable support for unstable debug only userspace API. (default:false)"); +#endif + static void _param_print_bool(struct drm_printer *p, const char *name, bool val) { diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 2eb3f2115ff2..0fbcb5b6d7bf 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -63,7 +63,8 @@ struct drm_printer; /* leave bools at the end to not create holes */ \ param(bool, enable_hangcheck, true, 0600) \ param(bool, error_capture, true, IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) ? 0600 : 0) \ - param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) + param(bool, enable_gvt, false, IS_ENABLED(CONFIG_DRM_I915_GVT) ? 0400 : 0) \ + param(bool, enable_debug_only_api, false, IS_ENABLED(CONFIG_DRM_I915_REPLAY_GPU_HANGS_API) ? 0400 : 0) #define MEMBER(T, member, ...) T member; struct i915_params { diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. -- cgit v1.2.3 From f79d6d28d8fe77b14beeaebe5393d9f294f8d09d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sun, 12 May 2024 02:00:19 +0300 Subject: drm/mipi-dsi: wrap more functions for streamline handling Follow the pattern of mipi_dsi_dcs_*_multi() and wrap several existing MIPI DSI functions to use the context for processing. This simplifies and streamlines driver code to use simpler code pattern. Note, msleep function is also wrapped in this way as it is frequently called inbetween other mipi_dsi_dcs_*() functions. Signed-off-by: Dmitry Baryshkov Reviewed-by: Douglas Anderson Reviewed-by: Neil Armstrong Link: https://lore.kernel.org/r/20240512-dsi-panels-upd-api-v2-2-e31ca14d102e@linaro.org Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240512-dsi-panels-upd-api-v2-2-e31ca14d102e@linaro.org --- drivers/gpu/drm/drm_mipi_dsi.c | 210 +++++++++++++++++++++++++++++++++++++++++ include/drm/drm_mipi_dsi.h | 21 +++++ 2 files changed, 231 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 26c7383406c1..a471c46f5ca6 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -1429,6 +1429,216 @@ int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, } EXPORT_SYMBOL(mipi_dsi_dcs_get_display_brightness_large); +/** + * mipi_dsi_picture_parameter_set_multi() - transmit the DSC PPS to the peripheral + * @ctx: Context for multiple DSI transactions + * @pps: VESA DSC 1.1 Picture Parameter Set + * + * Like mipi_dsi_picture_parameter_set() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx, + const struct drm_dsc_picture_parameter_set *pps) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_picture_parameter_set(dsi, pps); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending PPS failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_picture_parameter_set_multi); + +/** + * mipi_dsi_compression_mode_ext_multi() - enable/disable DSC on the peripheral + * @ctx: Context for multiple DSI transactions + * @enable: Whether to enable or disable the DSC + * @algo: Selected compression algorithm + * @pps_selector: Select PPS from the table of pre-stored or uploaded PPS entries + * + * Like mipi_dsi_compression_mode_ext() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_compression_mode_ext_multi(struct mipi_dsi_multi_context *ctx, + bool enable, + enum mipi_dsi_compression_algo algo, + unsigned int pps_selector) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_compression_mode_ext(dsi, enable, algo, pps_selector); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending COMPRESSION_MODE failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_compression_mode_ext_multi); + +/** + * mipi_dsi_dcs_nop_multi() - send DCS NOP packet + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_dcs_nop() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_nop_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_nop(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending DCS NOP failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_nop_multi); + +/** + * mipi_dsi_dcs_enter_sleep_mode_multi() - send DCS ENTER_SLEEP_MODE packet + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_dcs_enter_sleep_mode() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_enter_sleep_mode_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_enter_sleep_mode(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending DCS ENTER_SLEEP_MODE failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_enter_sleep_mode_multi); + +/** + * mipi_dsi_dcs_exit_sleep_mode_multi() - send DCS EXIT_SLEEP_MODE packet + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_dcs_exit_sleep_mode() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_exit_sleep_mode_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_exit_sleep_mode(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending DCS EXIT_SLEEP_MODE failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_exit_sleep_mode_multi); + +/** + * mipi_dsi_dcs_set_display_off_multi() - send DCS SET_DISPLAY_OFF packet + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_dcs_set_display_off() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_display_off(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending DCS SET_DISPLAY_OFF failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_display_off_multi); + +/** + * mipi_dsi_dcs_set_display_on_multi() - send DCS SET_DISPLAY_ON packet + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_dcs_set_display_on() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_display_on(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending DCS SET_DISPLAY_ON failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_display_on_multi); + +/** + * mipi_dsi_dcs_set_tear_on_multi() - send DCS SET_TEAR_ON packet + * @ctx: Context for multiple DSI transactions + * @mode: the Tearing Effect Output Line mode + * + * Like mipi_dsi_dcs_set_tear_on() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx, + enum mipi_dsi_dcs_tear_mode mode) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + ssize_t ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_tear_on(dsi, mode); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "sending DCS SET_TEAR_ON failed: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_on_multi); + static int mipi_dsi_drv_probe(struct device *dev) { struct mipi_dsi_driver *drv = to_mipi_dsi_driver(dev->driver); diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 5e9cad541bd6..bd5a0b6d0711 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -275,6 +275,13 @@ int mipi_dsi_compression_mode_ext(struct mipi_dsi_device *dsi, bool enable, int mipi_dsi_picture_parameter_set(struct mipi_dsi_device *dsi, const struct drm_dsc_picture_parameter_set *pps); +void mipi_dsi_compression_mode_ext_multi(struct mipi_dsi_multi_context *ctx, + bool enable, + enum mipi_dsi_compression_algo algo, + unsigned int pps_selector); +void mipi_dsi_picture_parameter_set_multi(struct mipi_dsi_multi_context *ctx, + const struct drm_dsc_picture_parameter_set *pps); + ssize_t mipi_dsi_generic_write(struct mipi_dsi_device *dsi, const void *payload, size_t size); int mipi_dsi_generic_write_chatty(struct mipi_dsi_device *dsi, @@ -284,6 +291,12 @@ void mipi_dsi_generic_write_multi(struct mipi_dsi_multi_context *ctx, ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, size_t num_params, void *data, size_t size); +#define mipi_dsi_msleep(ctx, delay) \ + do { \ + if (!ctx.accum_err) \ + msleep(delay); \ + } while (0) + /** * enum mipi_dsi_dcs_tear_mode - Tearing Effect Output Line mode * @MIPI_DSI_DCS_TEAR_MODE_VBLANK: the TE output line consists of V-Blanking @@ -338,6 +351,14 @@ int mipi_dsi_dcs_set_display_brightness_large(struct mipi_dsi_device *dsi, int mipi_dsi_dcs_get_display_brightness_large(struct mipi_dsi_device *dsi, u16 *brightness); +void mipi_dsi_dcs_nop_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_enter_sleep_mode_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_exit_sleep_mode_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx, + enum mipi_dsi_dcs_tear_mode mode); + /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet * -- cgit v1.2.3 From 36b75080e68b4a27ae1c40beffb3d6131f8eeeff Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Sun, 12 May 2024 19:23:26 -0300 Subject: drm/v3d: Create a new V3D parameter for the maximum number of perfcnt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The maximum number of performance counters can change from version to version and it's important for userspace to know this value, as it needs to use the counters for performance queries. Therefore, expose the maximum number of performance counters to userspace as a parameter. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-4-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 3 +++ include/uapi/drm/v3d_drm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index 6b9dd26df9fe..d2c1d5053132 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -94,6 +94,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data, case DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE: args->value = 1; return 0; + case DRM_V3D_PARAM_MAX_PERF_COUNTERS: + args->value = v3d->max_counters; + return 0; default: DRM_DEBUG("Unknown parameter %d\n", args->param); return -EINVAL; diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index dce1835eced4..215b01bb69c3 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -286,6 +286,7 @@ enum drm_v3d_param { DRM_V3D_PARAM_SUPPORTS_PERFMON, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE, + DRM_V3D_PARAM_MAX_PERF_COUNTERS, }; struct drm_v3d_get_param { -- cgit v1.2.3 From f33fe58298e686e7cc2d24f747c980457812b566 Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Sun, 12 May 2024 19:23:27 -0300 Subject: drm/v3d: Create new IOCTL to expose performance counters information MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace usually needs some information about the performance counters available. Although we could replicate this information in the kernel and user-space, let's use the kernel as the "single source of truth" to avoid issues in the future (e.g. list of performance counters is updated in user-space, but not in the kernel, generating invalid requests). Therefore, create a new IOCTL to expose the performance counters information, that is name, category, and description. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-5-mcanal@igalia.com --- drivers/gpu/drm/v3d/v3d_drv.c | 1 + drivers/gpu/drm/v3d/v3d_drv.h | 2 ++ drivers/gpu/drm/v3d/v3d_perfmon.c | 33 +++++++++++++++++++++++++++++++++ include/uapi/drm/v3d_drm.h | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 73 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c index d2c1d5053132..f7477488b1cc 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.c +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -211,6 +211,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = { DRM_IOCTL_DEF_DRV(V3D_PERFMON_DESTROY, v3d_perfmon_destroy_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_VALUES, v3d_perfmon_get_values_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CPU, v3d_submit_cpu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + DRM_IOCTL_DEF_DRV(V3D_PERFMON_GET_COUNTER, v3d_perfmon_get_counter_ioctl, DRM_RENDER_ALLOW), }; static const struct drm_driver v3d_drm_driver = { diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index bd1e38f7d10a..44cfddedebde 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -582,6 +582,8 @@ int v3d_perfmon_destroy_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); +int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); /* v3d_sysfs.c */ int v3d_sysfs_init(struct device *dev); diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c index f268d9466c0f..73e2bb8bdb7f 100644 --- a/drivers/gpu/drm/v3d/v3d_perfmon.c +++ b/drivers/gpu/drm/v3d/v3d_perfmon.c @@ -217,3 +217,36 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data, return ret; } + +int v3d_perfmon_get_counter_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_perfmon_get_counter *req = data; + struct v3d_dev *v3d = to_v3d_dev(dev); + const struct v3d_perf_counter_desc *counter; + + for (int i = 0; i < ARRAY_SIZE(req->reserved); i++) { + if (req->reserved[i] != 0) + return -EINVAL; + } + + /* Make sure that the counter ID is valid */ + if (req->counter >= v3d->max_counters) + return -EINVAL; + + if (v3d->ver >= 71) { + WARN_ON(v3d->max_counters != ARRAY_SIZE(v3d_v71_performance_counters)); + counter = &v3d_v71_performance_counters[req->counter]; + } else if (v3d->ver >= 42) { + WARN_ON(v3d->max_counters != ARRAY_SIZE(v3d_v42_performance_counters)); + counter = &v3d_v42_performance_counters[req->counter]; + } else { + return -EOPNOTSUPP; + } + + strscpy(req->name, counter->name, sizeof(req->name)); + strscpy(req->category, counter->category, sizeof(req->category)); + strscpy(req->description, counter->description, sizeof(req->description)); + + return 0; +} diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 215b01bb69c3..0860ddb3d0b6 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -42,6 +42,7 @@ extern "C" { #define DRM_V3D_PERFMON_DESTROY 0x09 #define DRM_V3D_PERFMON_GET_VALUES 0x0a #define DRM_V3D_SUBMIT_CPU 0x0b +#define DRM_V3D_PERFMON_GET_COUNTER 0x0c #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) @@ -58,6 +59,8 @@ extern "C" { #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, \ struct drm_v3d_perfmon_get_values) #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu) +#define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, \ + struct drm_v3d_perfmon_get_counter) #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01 #define DRM_V3D_SUBMIT_EXTENSION 0x02 @@ -718,6 +721,40 @@ struct drm_v3d_perfmon_get_values { __u64 values_ptr; }; +#define DRM_V3D_PERFCNT_MAX_NAME 64 +#define DRM_V3D_PERFCNT_MAX_CATEGORY 32 +#define DRM_V3D_PERFCNT_MAX_DESCRIPTION 256 + +/** + * struct drm_v3d_perfmon_get_counter - ioctl to get the description of a + * performance counter + * + * As userspace needs to retrieve information about the performance counters + * available, this IOCTL allows users to get information about a performance + * counter (name, category and description). + */ +struct drm_v3d_perfmon_get_counter { + /* + * Counter ID + * + * Must be smaller than the maximum number of performance counters, which + * can be retrieve through DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ + __u8 counter; + + /* Name of the counter */ + __u8 name[DRM_V3D_PERFCNT_MAX_NAME]; + + /* Category of the counter */ + __u8 category[DRM_V3D_PERFCNT_MAX_CATEGORY]; + + /* Description of the counter */ + __u8 description[DRM_V3D_PERFCNT_MAX_DESCRIPTION]; + + /* mbz */ + __u8 reserved[7]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 673087d8b023faf34b84e8faf63bbeea3da87bab Mon Sep 17 00:00:00 2001 From: Maíra Canal Date: Sun, 12 May 2024 19:23:29 -0300 Subject: drm/v3d: Deprecate the use of the Performance Counters enum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Performance Counters enum used to identify the index of each performance counter and provide the total number of performance counters (V3D_PERFCNT_NUM). But, this enum is only valid for V3D 4.2, not for V3D 7.1. As we implemented a new flexible structure to retrieve performance counters information, we can deprecate this enum. Signed-off-by: Maíra Canal Reviewed-by: Iago Toral Quiroga Link: https://patchwork.freedesktop.org/patch/msgid/20240512222655.2792754-7-mcanal@igalia.com --- include/uapi/drm/v3d_drm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/v3d_drm.h b/include/uapi/drm/v3d_drm.h index 0860ddb3d0b6..87fc5bb0a61e 100644 --- a/include/uapi/drm/v3d_drm.h +++ b/include/uapi/drm/v3d_drm.h @@ -603,6 +603,16 @@ struct drm_v3d_submit_cpu { __u64 extensions; }; +/* The performance counters index represented by this enum are deprecated and + * must no longer be used. These counters are only valid for V3D 4.2. + * + * In order to check for performance counter information, + * use DRM_IOCTL_V3D_PERFMON_GET_COUNTER. + * + * Don't use V3D_PERFCNT_NUM to retrieve the maximum number of performance + * counters. You should use DRM_IOCTL_V3D_GET_PARAM with the following + * parameter: DRM_V3D_PARAM_MAX_PERF_COUNTERS. + */ enum { V3D_PERFCNT_FEP_VALID_PRIMTS_NO_PIXELS, V3D_PERFCNT_FEP_VALID_PRIMS, -- cgit v1.2.3 From 8a0a7b98d4b6eeeab337ec25daa4bc0a5e710a15 Mon Sep 17 00:00:00 2001 From: Wayne Lin Date: Thu, 7 Mar 2024 14:29:57 +0800 Subject: drm/mst: Fix NULL pointer dereference at drm_dp_add_payload_part2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [Why] Commit: - commit 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") accidently overwrite the commit - commit 54d217406afe ("drm: use mgr->dev in drm_dbg_kms in drm_dp_add_payload_part2") which cause regression. [How] Recover the original NULL fix and remove the unnecessary input parameter 'state' for drm_dp_add_payload_part2(). Fixes: 5aa1dfcdf0a4 ("drm/mst: Refactor the flow for payload allocation/removement") Reported-by: Leon Weiß Link: https://lore.kernel.org/r/38c253ea42072cc825dc969ac4e6b9b600371cc8.camel@ruhr-uni-bochum.de/ Cc: lyude@redhat.com Cc: imre.deak@intel.com Cc: stable@vger.kernel.org Cc: regressions@lists.linux.dev Reviewed-by: Harry Wentland Acked-by: Jani Nikula Signed-off-by: Wayne Lin Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/20240307062957.2323620-1-Wayne.Lin@amd.com --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 2 +- drivers/gpu/drm/display/drm_dp_mst_topology.c | 4 +--- drivers/gpu/drm/i915/display/intel_dp_mst.c | 2 +- drivers/gpu/drm/nouveau/dispnv50/disp.c | 2 +- include/drm/display/drm_dp_mst_helper.h | 1 - 5 files changed, 4 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index c27063305a13..2c36f3d00ca2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -363,7 +363,7 @@ void dm_helpers_dp_mst_send_payload_allocation( mst_state = to_drm_dp_mst_topology_state(mst_mgr->base.state); new_payload = drm_atomic_get_mst_payload_state(mst_state, aconnector->mst_output_port); - ret = drm_dp_add_payload_part2(mst_mgr, mst_state->base.state, new_payload); + ret = drm_dp_add_payload_part2(mst_mgr, new_payload); if (ret) { amdgpu_dm_set_mst_status(&aconnector->mst_status, diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 3577786b5db2..7f8e1cfbe19d 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3421,7 +3421,6 @@ EXPORT_SYMBOL(drm_dp_remove_payload_part2); /** * drm_dp_add_payload_part2() - Execute payload update part 2 * @mgr: Manager to use. - * @state: The global atomic state * @payload: The payload to update * * If @payload was successfully assigned a starting time slot by drm_dp_add_payload_part1(), this @@ -3430,14 +3429,13 @@ EXPORT_SYMBOL(drm_dp_remove_payload_part2); * Returns: 0 on success, negative error code on failure. */ int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, - struct drm_atomic_state *state, struct drm_dp_mst_atomic_payload *payload) { int ret = 0; /* Skip failed payloads */ if (payload->payload_allocation_status != DRM_DP_MST_PAYLOAD_ALLOCATION_DFP) { - drm_dbg_kms(state->dev, "Part 1 of payload creation for %s failed, skipping part 2\n", + drm_dbg_kms(mgr->dev, "Part 1 of payload creation for %s failed, skipping part 2\n", payload->port->connector->name); return -EIO; } diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c index c772ba19c547..715d2f59f565 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c @@ -1241,7 +1241,7 @@ static void intel_mst_enable_dp(struct intel_atomic_state *state, if (first_mst_stream) intel_ddi_wait_for_fec_status(encoder, pipe_config, true); - drm_dp_add_payload_part2(&intel_dp->mst_mgr, &state->base, + drm_dp_add_payload_part2(&intel_dp->mst_mgr, drm_atomic_get_mst_payload_state(mst_state, connector->port)); if (DISPLAY_VER(dev_priv) >= 12) diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 0c3d88ad0b0e..88728a0b2c25 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -915,7 +915,7 @@ nv50_msto_cleanup(struct drm_atomic_state *state, msto->disabled = false; drm_dp_remove_payload_part2(mgr, new_mst_state, old_payload, new_payload); } else if (msto->enabled) { - drm_dp_add_payload_part2(mgr, state, new_payload); + drm_dp_add_payload_part2(mgr, new_payload); msto->enabled = false; } } diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index 3546b58a121b..cfe096389d94 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -871,7 +871,6 @@ int drm_dp_add_payload_part1(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, struct drm_dp_mst_atomic_payload *payload); int drm_dp_add_payload_part2(struct drm_dp_mst_topology_mgr *mgr, - struct drm_atomic_state *state, struct drm_dp_mst_atomic_payload *payload); void drm_dp_remove_payload_part1(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_topology_state *mst_state, -- cgit v1.2.3 From 5716146295183651cfd89ae2ea46c5ef0d31faa8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:35 +0200 Subject: wifi: regulatory: remove extra documentation The struct member country_ie_checksum doesn't exist, so don't document it. Link: https://msgid.link/20240515093852.ebcc9673558b.Ie0b58c1249c6375c60859fa6474d7cdd8862b065@changeid Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebf9e028d1ef..a103f4c8cf75 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -71,8 +71,6 @@ enum environment_cap { * CRDA and can be used by other regulatory requests. When a * the last request is not yet processed we must yield until it * is processed before processing any new requests. - * @country_ie_checksum: checksum of the last processed and accepted - * country IE * @country_ie_env: lets us know if the AP is telling us we are outdoor, * indoor, or if it doesn't matter * @list: used to insert into the reg_requests_list linked list -- cgit v1.2.3 From 3bb8dce41c023e39ec4d79a83742403b5064a276 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:36 +0200 Subject: wifi: ieee80211: add missing doc short descriptions Some structures erroneously don't have a short description, add the missing descriptions. Link: https://msgid.link/20240515093852.16f4355e918e.I940276a4fb006ada68ab1a3e6077e3229fff0f14@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index de2dce743ee2..713266ce48a7 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1101,7 +1101,7 @@ enum ieee80211_vht_opmode_bits { }; /** - * enum ieee80211_s1g_chanwidth + * enum ieee80211_s1g_chanwidth - S1G channel widths * These are defined in IEEE802.11-2016ah Table 10-20 * as BSS Channel Width * @@ -4145,7 +4145,7 @@ enum ieee80211_idle_options { }; /** - * struct ieee80211_bss_max_idle_period_ie + * struct ieee80211_bss_max_idle_period_ie - BSS max idle period element struct * * This structure refers to "BSS Max idle period element" * @@ -4180,7 +4180,7 @@ enum ieee80211_sa_query_action { }; /** - * struct ieee80211_bssid_index + * struct ieee80211_bssid_index - multiple BSSID index element structure * * This structure refers to "Multiple BSSID-index element" * @@ -4195,7 +4195,8 @@ struct ieee80211_bssid_index { }; /** - * struct ieee80211_multiple_bssid_configuration + * struct ieee80211_multiple_bssid_configuration - multiple BSSID configuration + * element structure * * This structure refers to "Multiple BSSID Configuration element" * -- cgit v1.2.3 From 1c26f09b20bafc92e0de80e84942be77d4d3c61e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:37 +0200 Subject: wifi: radiotap: document ieee80211_get_radiotap_len() return value Document the return value of ieee80211_get_radiotap_len() in the proper kernel-doc format. Link: https://msgid.link/20240515093852.143aadfdb094.I8795ec1e8cfd7106d58325fb514bae92625fb45c@changeid Signed-off-by: Johannes Berg --- include/net/ieee80211_radiotap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index 925bac726a92..91762faecc13 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -582,6 +582,7 @@ enum ieee80211_radiotap_eht_usig_tb { /** * ieee80211_get_radiotap_len - get radiotap header length * @data: pointer to the header + * Return: the radiotap header length */ static inline u16 ieee80211_get_radiotap_len(const char *data) { -- cgit v1.2.3 From 54856871298c9a8717450351699a8e6fe706101c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:38 +0200 Subject: wifi: ieee80211: remove ieee80211_next_tbtt_present() This is actually completely equivalent to the other function ieee80211_is_s1g_short_beacon(), but open-codes the logic. Implement the necessary logic in ieee80211_is_s1g_short_beacon() and remove ieee80211_next_tbtt_present(). Link: https://msgid.link/20240515093852.774ced74dea8.I152525b4cff6e6a25be6c48fe6a4b89f17bab8a9@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 713266ce48a7..72b351abb4f6 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -604,25 +604,15 @@ static inline bool ieee80211_is_s1g_beacon(__le16 fc) } /** - * ieee80211_next_tbtt_present - check if IEEE80211_FTYPE_EXT && - * IEEE80211_STYPE_S1G_BEACON && IEEE80211_S1G_BCN_NEXT_TBTT - * @fc: frame control bytes in little-endian byteorder - */ -static inline bool ieee80211_next_tbtt_present(__le16 fc) -{ - return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == - cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON) && - fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT); -} - -/** - * ieee80211_is_s1g_short_beacon - check if next tbtt present bit is set. Only - * true for S1G beacons when they're short. + * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an S1G short beacon, + * i.e. it is an S1G beacon with 'next TBTT' flag set */ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) { - return ieee80211_is_s1g_beacon(fc) && ieee80211_next_tbtt_present(fc); + return ieee80211_is_s1g_beacon(fc) && + (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); } /** -- cgit v1.2.3 From 3c75e99c7036a87f159dbd526060554afb3482f5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:39 +0200 Subject: wifi: ieee80211: document function return values These are all missing, as pointed out when running kernel-doc. Add return value documentation and fix some small things while at it. Link: https://msgid.link/20240515093852.1cd5ad8f354d.Idc16e9767fa42de80b659c32efc58aea38c26996@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 106 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 72b351abb4f6..595f83783f0e 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -373,6 +373,7 @@ struct ieee80211_trigger { /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has to-DS set */ static inline bool ieee80211_has_tods(__le16 fc) { @@ -382,6 +383,7 @@ static inline bool ieee80211_has_tods(__le16 fc) /** * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has from-DS set */ static inline bool ieee80211_has_fromds(__le16 fc) { @@ -391,6 +393,7 @@ static inline bool ieee80211_has_fromds(__le16 fc) /** * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not it's a 4-address frame (from-DS and to-DS set) */ static inline bool ieee80211_has_a4(__le16 fc) { @@ -401,6 +404,7 @@ static inline bool ieee80211_has_a4(__le16 fc) /** * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame has more fragments (more frags bit set) */ static inline bool ieee80211_has_morefrags(__le16 fc) { @@ -410,6 +414,7 @@ static inline bool ieee80211_has_morefrags(__le16 fc) /** * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the retry flag is set */ static inline bool ieee80211_has_retry(__le16 fc) { @@ -419,6 +424,7 @@ static inline bool ieee80211_has_retry(__le16 fc) /** * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the power management flag is set */ static inline bool ieee80211_has_pm(__le16 fc) { @@ -428,6 +434,7 @@ static inline bool ieee80211_has_pm(__le16 fc) /** * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the more data flag is set */ static inline bool ieee80211_has_moredata(__le16 fc) { @@ -437,6 +444,7 @@ static inline bool ieee80211_has_moredata(__le16 fc) /** * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the protected flag is set */ static inline bool ieee80211_has_protected(__le16 fc) { @@ -446,6 +454,7 @@ static inline bool ieee80211_has_protected(__le16 fc) /** * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the order flag is set */ static inline bool ieee80211_has_order(__le16 fc) { @@ -455,6 +464,7 @@ static inline bool ieee80211_has_order(__le16 fc) /** * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is management */ static inline bool ieee80211_is_mgmt(__le16 fc) { @@ -465,6 +475,7 @@ static inline bool ieee80211_is_mgmt(__le16 fc) /** * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is control */ static inline bool ieee80211_is_ctl(__le16 fc) { @@ -475,6 +486,7 @@ static inline bool ieee80211_is_ctl(__le16 fc) /** * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a data frame */ static inline bool ieee80211_is_data(__le16 fc) { @@ -485,6 +497,7 @@ static inline bool ieee80211_is_data(__le16 fc) /** * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame type is extended */ static inline bool ieee80211_is_ext(__le16 fc) { @@ -496,6 +509,7 @@ static inline bool ieee80211_is_ext(__le16 fc) /** * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS data frame */ static inline bool ieee80211_is_data_qos(__le16 fc) { @@ -510,6 +524,8 @@ static inline bool ieee80211_is_data_qos(__le16 fc) /** * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS data frame that has data + * (i.e. is not null data) */ static inline bool ieee80211_is_data_present(__le16 fc) { @@ -524,6 +540,7 @@ static inline bool ieee80211_is_data_present(__le16 fc) /** * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an association request */ static inline bool ieee80211_is_assoc_req(__le16 fc) { @@ -534,6 +551,7 @@ static inline bool ieee80211_is_assoc_req(__le16 fc) /** * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an association response */ static inline bool ieee80211_is_assoc_resp(__le16 fc) { @@ -544,6 +562,7 @@ static inline bool ieee80211_is_assoc_resp(__le16 fc) /** * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a reassociation request */ static inline bool ieee80211_is_reassoc_req(__le16 fc) { @@ -554,6 +573,7 @@ static inline bool ieee80211_is_reassoc_req(__le16 fc) /** * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a reassociation response */ static inline bool ieee80211_is_reassoc_resp(__le16 fc) { @@ -564,6 +584,7 @@ static inline bool ieee80211_is_reassoc_resp(__le16 fc) /** * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a probe request */ static inline bool ieee80211_is_probe_req(__le16 fc) { @@ -574,6 +595,7 @@ static inline bool ieee80211_is_probe_req(__le16 fc) /** * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a probe response */ static inline bool ieee80211_is_probe_resp(__le16 fc) { @@ -584,6 +606,7 @@ static inline bool ieee80211_is_probe_resp(__le16 fc) /** * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a (regular, not S1G) beacon */ static inline bool ieee80211_is_beacon(__le16 fc) { @@ -595,6 +618,7 @@ static inline bool ieee80211_is_beacon(__le16 fc) * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT && * IEEE80211_STYPE_S1G_BEACON * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an S1G beacon */ static inline bool ieee80211_is_s1g_beacon(__le16 fc) { @@ -618,6 +642,7 @@ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an ATIM frame */ static inline bool ieee80211_is_atim(__le16 fc) { @@ -628,6 +653,7 @@ static inline bool ieee80211_is_atim(__le16 fc) /** * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a disassociation frame */ static inline bool ieee80211_is_disassoc(__le16 fc) { @@ -638,6 +664,7 @@ static inline bool ieee80211_is_disassoc(__le16 fc) /** * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an authentication frame */ static inline bool ieee80211_is_auth(__le16 fc) { @@ -648,6 +675,7 @@ static inline bool ieee80211_is_auth(__le16 fc) /** * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a deauthentication frame */ static inline bool ieee80211_is_deauth(__le16 fc) { @@ -658,6 +686,7 @@ static inline bool ieee80211_is_deauth(__le16 fc) /** * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an action frame */ static inline bool ieee80211_is_action(__le16 fc) { @@ -668,6 +697,7 @@ static inline bool ieee80211_is_action(__le16 fc) /** * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a block-ACK request frame */ static inline bool ieee80211_is_back_req(__le16 fc) { @@ -678,6 +708,7 @@ static inline bool ieee80211_is_back_req(__le16 fc) /** * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a block-ACK frame */ static inline bool ieee80211_is_back(__le16 fc) { @@ -688,6 +719,7 @@ static inline bool ieee80211_is_back(__le16 fc) /** * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a PS-poll frame */ static inline bool ieee80211_is_pspoll(__le16 fc) { @@ -698,6 +730,7 @@ static inline bool ieee80211_is_pspoll(__le16 fc) /** * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an RTS frame */ static inline bool ieee80211_is_rts(__le16 fc) { @@ -708,6 +741,7 @@ static inline bool ieee80211_is_rts(__le16 fc) /** * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CTS frame */ static inline bool ieee80211_is_cts(__le16 fc) { @@ -718,6 +752,7 @@ static inline bool ieee80211_is_cts(__le16 fc) /** * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is an ACK frame */ static inline bool ieee80211_is_ack(__le16 fc) { @@ -728,6 +763,7 @@ static inline bool ieee80211_is_ack(__le16 fc) /** * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CF-end frame */ static inline bool ieee80211_is_cfend(__le16 fc) { @@ -738,6 +774,7 @@ static inline bool ieee80211_is_cfend(__le16 fc) /** * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a CF-end-ack frame */ static inline bool ieee80211_is_cfendack(__le16 fc) { @@ -748,6 +785,7 @@ static inline bool ieee80211_is_cfendack(__le16 fc) /** * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a nullfunc frame */ static inline bool ieee80211_is_nullfunc(__le16 fc) { @@ -758,6 +796,7 @@ static inline bool ieee80211_is_nullfunc(__le16 fc) /** * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a QoS nullfunc frame */ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { @@ -768,6 +807,7 @@ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) /** * ieee80211_is_trigger - check if frame is trigger frame * @fc: frame control field in little-endian byteorder + * Return: whether or not the frame is a trigger frame */ static inline bool ieee80211_is_trigger(__le16 fc) { @@ -778,6 +818,7 @@ static inline bool ieee80211_is_trigger(__le16 fc) /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder + * Return: whether or not the frame is a nullfunc or QoS nullfunc frame */ static inline bool ieee80211_is_any_nullfunc(__le16 fc) { @@ -787,6 +828,8 @@ static inline bool ieee80211_is_any_nullfunc(__le16 fc) /** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder + * Return: whether or not the frame is the first fragment (also true if + * it's not fragmented at all) */ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { @@ -796,6 +839,7 @@ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) /** * ieee80211_is_frag - check if a frame is a fragment * @hdr: 802.11 header of the frame + * Return: whether or not the frame is a fragment */ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) { @@ -2349,6 +2393,8 @@ struct ieee80211_eht_operation_info { * @max_vht_nss: current maximum NSS as advertised by the STA in * operating mode notification, can be 0 in which case the * capability data will be used to derive this (from MCS support) + * Return: The maximum NSS that can be used for the given bandwidth/MCS + * combination * * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can * vary for a given BW/MCS. This function parses the data. @@ -4317,6 +4363,7 @@ struct ieee80211_he_6ghz_capa { /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame + * Return: a pointer to the QoS control field in the frame header * * The qos ctrl bytes come after the frame_control, duration, seq_num * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose @@ -4339,6 +4386,7 @@ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) /** * ieee80211_get_tid - get qos TID * @hdr: the frame + * Return: the TID from the QoS control field */ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) { @@ -4350,6 +4398,7 @@ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) /** * ieee80211_get_SA - get pointer to SA * @hdr: the frame + * Return: a pointer to the source address (SA) * * Given an 802.11 frame, this function returns the offset * to the source address (SA). It does not verify that the @@ -4369,6 +4418,7 @@ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr) /** * ieee80211_get_DA - get pointer to DA * @hdr: the frame + * Return: a pointer to the destination address (DA) * * Given an 802.11 frame, this function returns the offset * to the destination address (DA). It does not verify that @@ -4387,6 +4437,7 @@ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @skb: the skb to check, starting with the 802.11 header + * Return: whether or not the MMPDU is bufferable */ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) { @@ -4425,6 +4476,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) /** * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) + * Return: whether or not the frame is a robust management frame */ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) { @@ -4461,6 +4513,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) /** * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame * @skb: the skb containing the frame, length will be checked + * Return: whether or not the frame is a robust management frame */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { @@ -4473,6 +4526,7 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) * ieee80211_is_public_action - check if frame is a public action frame * @hdr: the frame * @len: length of the frame + * Return: whether or not the frame is a public action frame */ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, size_t len) @@ -4518,8 +4572,9 @@ ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed - * privacy action frame + * privacy action frame * @hdr: the frame + * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) { @@ -4535,8 +4590,9 @@ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) /** * ieee80211_is_group_privacy_action - check if frame is a group addressed - * privacy action frame + * privacy action frame * @skb: the skb containing the frame, length will be checked + * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { @@ -4548,6 +4604,7 @@ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs + * Return: the time value converted to microseconds */ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) { @@ -4559,6 +4616,7 @@ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) * @tim: the TIM IE * @tim_len: length of the TIM IE * @aid: the AID to look for + * Return: whether or not traffic is indicated in the TIM for the given AID */ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) @@ -4585,8 +4643,10 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, } /** - * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet) + * ieee80211_get_tdls_action - get TDLS action code * @skb: the skb containing the frame, length will not be checked + * Return: the TDLS action code, or -1 if it's not an encapsulated TDLS action + * frame * * This function assumes the frame is a data frame, and that the network header * is in the correct place. @@ -4626,6 +4686,7 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb) /** * ieee80211_action_contains_tpc - checks if the frame contains TPC element * @skb: the skb containing the frame, length will be checked + * Return: %true if the frame contains a TPC element, %false otherwise * * This function checks if it's either TPC report action frame or Link * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 @@ -4743,6 +4804,7 @@ struct element { * @element: element pointer after for_each_element() or friends * @data: same data pointer as passed to for_each_element() or friends * @datalen: same data length as passed to for_each_element() or friends + * Return: %true if all elements were iterated, %false otherwise; see notes * * This function returns %true if all the data was parsed or considered * while walking the elements. Only use this if your for_each_element() @@ -4946,6 +5008,7 @@ struct ieee80211_mle_tdls_common_info { * ieee80211_mle_common_size - check multi-link element common size * @data: multi-link element, must already be checked for size using * ieee80211_mle_size_ok() + * Return: the size of the multi-link element's "common" subfield */ static inline u8 ieee80211_mle_common_size(const u8 *data) { @@ -4978,11 +5041,10 @@ static inline u8 ieee80211_mle_common_size(const u8 *data) /** * ieee80211_mle_get_link_id - returns the link ID * @data: the basic multi link element + * Return: the link ID, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the BSS link ID can't be found, -1 will be returned */ static inline int ieee80211_mle_get_link_id(const u8 *data) { @@ -5002,12 +5064,10 @@ static inline int ieee80211_mle_get_link_id(const u8 *data) /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count * @data: pointer to the basic multi link element + * Return: the BSS Parameter Change Count field value, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the BSS parameter change count value can't be found (the presence bit - * for it is clear), -1 will be returned. */ static inline int ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) @@ -5030,13 +5090,13 @@ ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) /** * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the medium synchronization delay field value from the multi-link + * element, or the default value (%IEEE80211_MED_SYNC_DELAY_DEFAULT) + * if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the medium synchronization is not present, then the default value is - * returned. */ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) { @@ -5060,12 +5120,12 @@ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) /** * ieee80211_mle_get_eml_cap - returns the EML capability - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the EML capability field value from the multi-link element, + * or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the EML capability is not present, 0 will be returned. */ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) { @@ -5091,13 +5151,12 @@ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) /** * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. - * @data: pointer to the multi link EHT IE + * @data: pointer to the multi-link element + * Return: the MLD capabilities and operations field value from the multi-link + * element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the MLD capabilities and operations field is not present, 0 will be - * returned. */ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) { @@ -5128,12 +5187,11 @@ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) /** * ieee80211_mle_get_mld_id - returns the MLD ID - * @data: pointer to the multi link element + * @data: pointer to the multi-link element + * Return: The MLD ID in the given multi-link element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). - * - * If the MLD ID is not present, 0 will be returned. */ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) { @@ -5168,6 +5226,7 @@ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element + * Return: whether or not the multi-link element size is OK */ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { @@ -5237,6 +5296,7 @@ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) * @data: pointer to the element data * @type: expected type of the element * @len: length of the containing element + * Return: whether or not the multi-link element type matches and size is OK */ static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len) { @@ -5280,6 +5340,7 @@ struct ieee80211_mle_per_sta_profile { * profile size * @data: pointer to the sub element data * @len: length of the containing sub element + * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data, size_t len) @@ -5364,6 +5425,7 @@ ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta * element sta profile size. * @data: pointer to the sub element data * @len: length of the containing sub element + * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, size_t len) -- cgit v1.2.3 From 9d222c12834d80849afbad7b42822b8ffbbc025f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 May 2024 09:38:40 +0200 Subject: wifi: ieee80211: document two FTM related functions Add some documentation to ieee80211_is_timing_measurement() and ieee80211_is_ftm(). Link: https://msgid.link/20240515093852.229aa69e972c.Ifae6762a698e79cd5a49a055fe4c32330e826200@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 595f83783f0e..fb50a99daa93 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4731,6 +4731,11 @@ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) return true; } +/** + * ieee80211_is_timing_measurement - check if frame is timing measurement response + * @skb: the SKB to check + * Return: whether or not the frame is a valid timing measurement response + */ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; @@ -4750,6 +4755,11 @@ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) return false; } +/** + * ieee80211_is_ftm - check if frame is FTM response + * @skb: the SKB to check + * Return: whether or not the frame is a valid FTM response action frame + */ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; -- cgit v1.2.3 From 8592fd7ccc95a7cf222985e6297041463e8b4e9d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:54 +0200 Subject: wifi: ieee80211/ath11k: remove IEEE80211_MAX_NUM_PWR_LEVEL The define IEEE80211_MAX_NUM_PWR_LEVEL doesn't make much sense. Yes, that table has a maximum value of 8, and the table will actually remain that way, but EHT introduced a way to encode more levels for 320 MHz channels. Remove IEEE80211_MAX_NUM_PWR_LEVEL and, for ath11k being the only user, add ATH11K_NUM_PWR_LEVELS, where it makes sense since it cannot support 320 MHz channels. Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.9818e5471055.Icece7e47e963d6b68e0d97ba13c102b37fbaa689@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/core.h | 9 ++++++--- drivers/net/wireless/ath/ath11k/mac.c | 2 +- include/linux/ieee80211.h | 5 ----- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index 205f40ee6b66..df24f0e409af 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -330,6 +330,9 @@ struct ath11k_chan_power_info { s8 tx_power; }; +/* ath11k only deals with 160 MHz, so 8 subchannels */ +#define ATH11K_NUM_PWR_LEVELS 8 + /** * struct ath11k_reg_tpc_power_info - regulatory TPC power info * @is_psd_power: is PSD power or not @@ -346,10 +349,10 @@ struct ath11k_reg_tpc_power_info { u8 eirp_power; enum wmi_reg_6ghz_ap_type ap_power_type; u8 num_pwr_levels; - u8 reg_max[IEEE80211_MAX_NUM_PWR_LEVEL]; + u8 reg_max[ATH11K_NUM_PWR_LEVELS]; u8 ap_constraint_power; - s8 tpe[IEEE80211_MAX_NUM_PWR_LEVEL]; - struct ath11k_chan_power_info chan_power_info[IEEE80211_MAX_NUM_PWR_LEVEL]; + s8 tpe[ATH11K_NUM_PWR_LEVELS]; + struct ath11k_chan_power_info chan_power_info[ATH11K_NUM_PWR_LEVELS]; }; struct ath11k_vif { diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 4f62e38ba48b..59e5e8085742 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -7688,7 +7688,7 @@ void ath11k_mac_fill_reg_tpc_info(struct ath11k *ar, struct ieee80211_channel *chan, *temp_chan; u8 pwr_lvl_idx, num_pwr_levels, pwr_reduction; bool is_psd_power = false, is_tpe_present = false; - s8 max_tx_power[IEEE80211_MAX_NUM_PWR_LEVEL], + s8 max_tx_power[ATH11K_NUM_PWR_LEVELS], psd_power, tx_power; s8 eirp_power = 0; u16 start_freq, center_freq; diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index fb50a99daa93..1c3a683a3ee2 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2833,11 +2833,6 @@ struct ieee80211_he_6ghz_oper { * So it it totally max 8 Transmit Power Envelope element. */ #define IEEE80211_TPE_MAX_IE_COUNT 8 -/* - * In "Table 9-277—Meaning of Maximum Transmit Power Count subfield" - * of "IEEE Std 802.11ax™‐2021", the max power level is 8. - */ -#define IEEE80211_MAX_NUM_PWR_LEVEL 8 #define IEEE80211_TPE_MAX_POWER_COUNT 8 -- cgit v1.2.3 From 39dc8b8ea387ce7f4fe2d2d6d550ed52aa9aa040 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:56 +0200 Subject: wifi: mac80211: pass parsed TPE data to drivers Instead of passing the full TPE elements, in all their glory and mixed up data formats for HE backward compatibility, parse them fully into the right values, and pass that to the drivers. Also introduce proper validation already in mac80211, so that drivers don't need to do it, and parse the EHT portions. The code now passes the values in the right order according to the channel used by an interface, which could also be a subset of the data advertised by the AP, if we couldn't connect with the full bandwidth (for whatever reason.) Also add kunit tests for the more complicated bits of it. Reviewed-by: Miriam Rachel Korenblit Acked-by: Kalle Valo Link: https://msgid.link/20240506214536.2aa839969b60.I265b28209e0b29772b2f125f7f83de44a4da877b@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 143 ++++------------- include/linux/ieee80211.h | 102 ++++++++++-- include/net/mac80211.h | 41 ++++- net/mac80211/ieee80211_i.h | 13 +- net/mac80211/mlme.c | 230 +++++++++++++++++++++++---- net/mac80211/parse.c | 92 +++++++++-- net/mac80211/tests/Makefile | 2 +- net/mac80211/tests/tpe.c | 284 ++++++++++++++++++++++++++++++++++ net/mac80211/util.c | 25 +++ 9 files changed, 761 insertions(+), 171 deletions(-) create mode 100644 net/mac80211/tests/tpe.c (limited to 'include') diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 73876760afe9..d4225bf30e51 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -7507,32 +7507,6 @@ static int ath11k_mac_stop_vdev_early(struct ieee80211_hw *hw, return 0; } -static u8 ath11k_mac_get_tpe_count(u8 txpwr_intrprt, u8 txpwr_cnt) -{ - switch (txpwr_intrprt) { - /* Refer "Table 9-276-Meaning of Maximum Transmit Power Count subfield - * if the Maximum Transmit Power Interpretation subfield is 0 or 2" of - * "IEEE Std 802.11ax 2021". - */ - case IEEE80211_TPE_LOCAL_EIRP: - case IEEE80211_TPE_REG_CLIENT_EIRP: - txpwr_cnt = txpwr_cnt <= 3 ? txpwr_cnt : 3; - txpwr_cnt = txpwr_cnt + 1; - break; - /* Refer "Table 9-277-Meaning of Maximum Transmit Power Count subfield - * if Maximum Transmit Power Interpretation subfield is 1 or 3" of - * "IEEE Std 802.11ax 2021". - */ - case IEEE80211_TPE_LOCAL_EIRP_PSD: - case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: - txpwr_cnt = txpwr_cnt <= 4 ? txpwr_cnt : 4; - txpwr_cnt = txpwr_cnt ? (BIT(txpwr_cnt - 1)) : 1; - break; - } - - return txpwr_cnt; -} - static u8 ath11k_mac_get_num_pwr_levels(struct cfg80211_chan_def *chan_def) { if (chan_def->chan->flags & IEEE80211_CHAN_PSD) { @@ -7859,33 +7833,23 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, struct ath11k_base *ab = ar->ab; struct ath11k_vif *arvif = ath11k_vif_to_arvif(vif); struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; - struct ieee80211_tx_pwr_env *single_tpe; + struct ieee80211_parsed_tpe_eirp *non_psd = NULL; + struct ieee80211_parsed_tpe_psd *psd = NULL; enum wmi_reg_6ghz_client_type client_type; struct cur_regulatory_info *reg_info; + u8 local_tpe_count, reg_tpe_count; + bool use_local_tpe; int i; - u8 pwr_count, pwr_interpret, pwr_category; - u8 psd_index = 0, non_psd_index = 0, local_tpe_count = 0, reg_tpe_count = 0; - bool use_local_tpe, non_psd_set = false, psd_set = false; reg_info = &ab->reg_info_store[ar->pdev_idx]; client_type = reg_info->client_type; - for (i = 0; i < bss_conf->tx_pwr_env_num; i++) { - single_tpe = &bss_conf->tx_pwr_env[i]; - pwr_category = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_CATEGORY); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - - if (pwr_category == client_type) { - if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP || - pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) - local_tpe_count++; - else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP || - pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) - reg_tpe_count++; - } - } + local_tpe_count = + bss_conf->tpe.max_local[client_type].valid + + bss_conf->tpe.psd_local[client_type].valid; + reg_tpe_count = + bss_conf->tpe.max_reg_client[client_type].valid + + bss_conf->tpe.psd_reg_client[client_type].valid; if (!reg_tpe_count && !local_tpe_count) { ath11k_warn(ab, @@ -7898,83 +7862,44 @@ static void ath11k_mac_parse_tx_pwr_env(struct ath11k *ar, use_local_tpe = false; } - for (i = 0; i < bss_conf->tx_pwr_env_num; i++) { - single_tpe = &bss_conf->tx_pwr_env[i]; - pwr_category = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_CATEGORY); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - - if (pwr_category != client_type) - continue; - - /* get local transmit power envelope */ - if (use_local_tpe) { - if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP) { - non_psd_index = i; - non_psd_set = true; - } else if (pwr_interpret == IEEE80211_TPE_LOCAL_EIRP_PSD) { - psd_index = i; - psd_set = true; - } - /* get regulatory transmit power envelope */ - } else { - if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP) { - non_psd_index = i; - non_psd_set = true; - } else if (pwr_interpret == IEEE80211_TPE_REG_CLIENT_EIRP_PSD) { - psd_index = i; - psd_set = true; - } - } + if (use_local_tpe) { + psd = &bss_conf->tpe.psd_local[client_type]; + if (!psd->valid) + psd = NULL; + non_psd = &bss_conf->tpe.max_local[client_type]; + if (!non_psd->valid) + non_psd = NULL; + } else { + psd = &bss_conf->tpe.psd_reg_client[client_type]; + if (!psd->valid) + psd = NULL; + non_psd = &bss_conf->tpe.max_reg_client[client_type]; + if (!non_psd->valid) + non_psd = NULL; } - if (non_psd_set && !psd_set) { - single_tpe = &bss_conf->tx_pwr_env[non_psd_index]; - pwr_count = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_COUNT); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + if (non_psd && !psd) { arvif->reg_tpc_info.is_psd_power = false; arvif->reg_tpc_info.eirp_power = 0; - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_tpe_count(pwr_interpret, pwr_count); + arvif->reg_tpc_info.num_pwr_levels = non_psd->count; for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { ath11k_dbg(ab, ATH11K_DBG_MAC, "non PSD power[%d] : %d\n", - i, single_tpe->tx_power[i]); - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2; + i, non_psd->power[i]); + arvif->reg_tpc_info.tpe[i] = non_psd->power[i] / 2; } } - if (psd_set) { - single_tpe = &bss_conf->tx_pwr_env[psd_index]; - pwr_count = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_COUNT); - pwr_interpret = u8_get_bits(single_tpe->tx_power_info, - IEEE80211_TX_PWR_ENV_INFO_INTERPRET); - arvif->reg_tpc_info.is_psd_power = true; + if (psd) { + arvif->reg_tpc_info.num_pwr_levels = psd->count; - if (pwr_count == 0) { + for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { ath11k_dbg(ab, ATH11K_DBG_MAC, - "TPE PSD power : %d\n", single_tpe->tx_power[0]); - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_num_pwr_levels(&ctx->def); - - for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[0] / 2; - } else { - arvif->reg_tpc_info.num_pwr_levels = - ath11k_mac_get_tpe_count(pwr_interpret, pwr_count); - - for (i = 0; i < arvif->reg_tpc_info.num_pwr_levels; i++) { - ath11k_dbg(ab, ATH11K_DBG_MAC, - "TPE PSD power[%d] : %d\n", - i, single_tpe->tx_power[i]); - arvif->reg_tpc_info.tpe[i] = single_tpe->tx_power[i] / 2; - } + "TPE PSD power[%d] : %d\n", + i, psd->power[i]); + arvif->reg_tpc_info.tpe[i] = psd->power[i] / 2; } } } diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 1c3a683a3ee2..769008a51809 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2825,17 +2825,6 @@ struct ieee80211_he_6ghz_oper { u8 minrate; } __packed; -/* - * In "9.4.2.161 Transmit Power Envelope element" of "IEEE Std 802.11ax-2021", - * it show four types in "Table 9-275a-Maximum Transmit Power Interpretation - * subfield encoding", and two category for each type in "Table E-12-Regulatory - * Info subfield encoding in the United States". - * So it it totally max 8 Transmit Power Envelope element. - */ -#define IEEE80211_TPE_MAX_IE_COUNT 8 - -#define IEEE80211_TPE_MAX_POWER_COUNT 8 - /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, @@ -2844,24 +2833,107 @@ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_REG_CLIENT_EIRP_PSD, }; +/* category type of transmit power envelope element */ +enum ieee80211_tx_power_category_6ghz { + IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0, + IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1, +}; + +/* + * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP, + * setting to 63.5 dBm means no constraint. + */ +#define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT 127 + +/* + * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD, + * setting to 127 indicates no PSD limit for the 20 MHz channel. + */ +#define IEEE80211_TPE_PSD_NO_LIMIT 127 + /** * struct ieee80211_tx_pwr_env - Transmit Power Envelope - * @tx_power_info: Transmit Power Information field - * @tx_power: Maximum Transmit Power field + * @info: Transmit Power Information field + * @variable: Maximum Transmit Power field * * This structure represents the payload of the "Transmit Power * Envelope element" as described in IEEE Std 802.11ax-2021 section * 9.4.2.161 */ struct ieee80211_tx_pwr_env { - u8 tx_power_info; - s8 tx_power[IEEE80211_TPE_MAX_POWER_COUNT]; + u8 info; + u8 variable[]; } __packed; #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0 +#define IEEE80211_TX_PWR_ENV_EXT_COUNT 0xF + +static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 needed = sizeof(*env); + u8 N; /* also called N in the spec */ + + if (len < needed) + return false; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (category) { + case IEEE80211_TPE_CAT_6GHZ_DEFAULT: + case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE: + break; + default: + return false; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + if (count > 3) + return false; + + /* count == 0 encodes 1 value for 20 MHz, etc. */ + needed += count + 1; + + if (len < needed) + return false; + + /* there can be extension fields not accounted for in 'count' */ + + return true; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (count > 4) + return false; + + N = count ? 1 << (count - 1) : 1; + needed += N; + + if (len < needed) + return false; + + if (len > needed) { + u8 K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + needed += 1 + K; + if (len < needed) + return false; + } + + return true; + } + + return false; +} + /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cafc664ee531..a4efbfb8d796 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -550,6 +550,39 @@ struct ieee80211_fils_discovery { u32 max_interval; }; +#define IEEE80211_TPE_EIRP_ENTRIES_320MHZ 5 +struct ieee80211_parsed_tpe_eirp { + bool valid; + s8 power[IEEE80211_TPE_EIRP_ENTRIES_320MHZ]; + u8 count; +}; + +#define IEEE80211_TPE_PSD_ENTRIES_320MHZ 16 +struct ieee80211_parsed_tpe_psd { + bool valid; + s8 power[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 count, n; +}; + +/** + * struct ieee80211_parsed_tpe - parsed transmit power envelope information + * @max_local: maximum local EIRP, one value for 20, 40, 80, 160, 320 MHz each + * (indexed by TX power category) + * @max_reg_client: maximum regulatory client EIRP, one value for 20, 40, 80, + * 160, 320 MHz each + * (indexed by TX power category) + * @psd_local: maximum local power spectral density, one value for each 20 MHz + * subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + * @psd_reg_client: maximum regulatory power spectral density, one value for + * each 20 MHz subchannel per bss_conf's chanreq.oper + * (indexed by TX power category) + */ +struct ieee80211_parsed_tpe { + struct ieee80211_parsed_tpe_eirp max_local[2], max_reg_client[2]; + struct ieee80211_parsed_tpe_psd psd_local[2], psd_reg_client[2]; +}; + /** * struct ieee80211_bss_conf - holds the BSS's changing parameters * @@ -662,8 +695,7 @@ struct ieee80211_fils_discovery { * @beacon_tx_rate: The configured beacon transmit rate that needs to be passed * to driver when rate control is offloaded to firmware. * @power_type: power type of BSS for 6 GHz - * @tx_pwr_env: transmit power envelope array of BSS. - * @tx_pwr_env_num: number of @tx_pwr_env. + * @tpe: transmit power envelope information * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT * @csa_active: marks whether a channel switch is going on. @@ -766,8 +798,9 @@ struct ieee80211_bss_conf { u32 unsol_bcast_probe_resp_interval; struct cfg80211_bitrate_mask beacon_tx_rate; enum ieee80211_ap_reg_power power_type; - struct ieee80211_tx_pwr_env tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; + + struct ieee80211_parsed_tpe tpe; + u8 pwr_reduction; bool eht_support; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eb62b7d4b4f7..7d541a2355f6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -1708,7 +1709,6 @@ struct ieee802_11_elems { const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; const struct ieee80211_he_6ghz_capa *he_6ghz_capa; - const struct ieee80211_tx_pwr_env *tx_pwr_env[IEEE80211_TPE_MAX_IE_COUNT]; const u8 *uora_element; const u8 *mesh_id; const u8 *peering; @@ -1746,6 +1746,9 @@ struct ieee802_11_elems { const struct ieee80211_bandwidth_indication *bandwidth_indication; const struct ieee80211_ttlm_elem *ttlm[IEEE80211_TTLM_MAX_CNT]; + /* not the order in the psd values is per element, not per chandef */ + struct ieee80211_parsed_tpe tpe; + /* length of them, respectively */ u8 ext_capab_len; u8 ssid_len; @@ -1764,8 +1767,6 @@ struct ieee802_11_elems { u8 perr_len; u8 country_elem_len; u8 bssid_index_len; - u8 tx_pwr_env_len[IEEE80211_TPE_MAX_IE_COUNT]; - u8 tx_pwr_env_num; u8 eht_cap_len; /* mult-link element can be de-fragmented and thus u8 is not sufficient */ @@ -2243,6 +2244,7 @@ int ieee80211_frame_duration(enum nl80211_band band, size_t len, void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_queue_params *qparam, int ac); +void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe); void ieee80211_set_wmm_default(struct ieee80211_link_data *link, bool bss_notify, bool enable_qos); void ieee80211_xmit(struct ieee80211_sub_if_data *sdata, @@ -2681,6 +2683,11 @@ void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_d #define VISIBLE_IF_MAC80211_KUNIT ieee80211_rx_result ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx); +int ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, + u8 n_partial_subchans); +void ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used); #else #define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) #define VISIBLE_IF_MAC80211_KUNIT static diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index e410a43dc681..ed9851faac05 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -606,11 +606,195 @@ static bool ieee80211_chandef_usable(struct ieee80211_sub_if_data *sdata, return true; } +static int ieee80211_chandef_num_subchans(const struct cfg80211_chan_def *c) +{ + if (c->width == NL80211_CHAN_WIDTH_80P80) + return 4 + 4; + + return nl80211_chan_width_to_mhz(c->width) / 20; +} + +static int ieee80211_chandef_num_widths(const struct cfg80211_chan_def *c) +{ + switch (c->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + return 1; + case NL80211_CHAN_WIDTH_40: + return 2; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + return 3; + case NL80211_CHAN_WIDTH_160: + return 4; + case NL80211_CHAN_WIDTH_320: + return 5; + default: + WARN_ON(1); + return 0; + } +} + +VISIBLE_IF_MAC80211_KUNIT int +ieee80211_calc_chandef_subchan_offset(const struct cfg80211_chan_def *ap, + u8 n_partial_subchans) +{ + int n = ieee80211_chandef_num_subchans(ap); + struct cfg80211_chan_def tmp = *ap; + int offset = 0; + + /* + * Given a chandef (in this context, it's the AP's) and a number + * of subchannels that we want to look at ('n_partial_subchans'), + * calculate the offset in number of subchannels between the full + * and the subset with the desired width. + */ + + /* same number of subchannels means no offset, obviously */ + if (n == n_partial_subchans) + return 0; + + /* don't WARN - misconfigured APs could cause this if their N > width */ + if (n < n_partial_subchans) + return 0; + + while (ieee80211_chandef_num_subchans(&tmp) > n_partial_subchans) { + u32 prev = tmp.center_freq1; + + ieee80211_chandef_downgrade(&tmp, NULL); + + /* + * if center_freq moved up, half the original channels + * are gone now but were below, so increase offset + */ + if (prev < tmp.center_freq1) + offset += ieee80211_chandef_num_subchans(&tmp); + } + + /* + * 80+80 with secondary 80 below primary - four subchannels for it + * (we cannot downgrade *to* 80+80, so no need to consider 'tmp') + */ + if (ap->width == NL80211_CHAN_WIDTH_80P80 && + ap->center_freq2 < ap->center_freq1) + offset += 4; + + return offset; +} +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_calc_chandef_subchan_offset); + +VISIBLE_IF_MAC80211_KUNIT void +ieee80211_rearrange_tpe_psd(struct ieee80211_parsed_tpe_psd *psd, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used) +{ + u8 needed = ieee80211_chandef_num_subchans(used); + u8 have = ieee80211_chandef_num_subchans(ap); + u8 tmp[IEEE80211_TPE_PSD_ENTRIES_320MHZ]; + u8 offset; + + if (!psd->valid) + return; + + /* if N is zero, all defaults were used, no point in rearranging */ + if (!psd->n) + goto out; + + BUILD_BUG_ON(sizeof(tmp) != sizeof(psd->power)); + + /* + * This assumes that 'N' is consistent with the HE channel, as + * it should be (otherwise the AP is broken). + * + * In psd->power we have values in the order 0..N, 0..K, where + * N+K should cover the entire channel per 'ap', but even if it + * doesn't then we've pre-filled 'unlimited' as defaults. + * + * But this is all the wrong order, we want to have them in the + * order of the 'used' channel. + * + * So for example, we could have a 320 MHz EHT AP, which has the + * HE channel as 80 MHz (e.g. due to puncturing, which doesn't + * seem to be considered for the TPE), as follows: + * + * EHT 320: | | | | | | | | | | | | | | | | | + * HE 80: | | | | | + * used 160: | | | | | | | | | + * + * N entries: |--|--|--|--| + * K entries: |--|--|--|--|--|--|--|--| |--|--|--|--| + * power idx: 4 5 6 7 8 9 10 11 0 1 2 3 12 13 14 15 + * full chan: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + * used chan: 0 1 2 3 4 5 6 7 + * + * The idx in the power array ('power idx') is like this since it + * comes directly from the element's N and K entries in their + * element order, and those are this way for HE compatibility. + * + * Rearrange them as desired here, first by putting them into the + * 'full chan' order, and then selecting the necessary subset for + * the 'used chan'. + */ + + /* first reorder according to AP channel */ + offset = ieee80211_calc_chandef_subchan_offset(ap, psd->n); + for (int i = 0; i < have; i++) { + if (i < offset) + tmp[i] = psd->power[i + psd->n]; + else if (i < offset + psd->n) + tmp[i] = psd->power[i - offset]; + else + tmp[i] = psd->power[i]; + } + + /* + * and then select the subset for the used channel + * (set everything to defaults first in case a driver is confused) + */ + memset(psd->power, IEEE80211_TPE_PSD_NO_LIMIT, sizeof(psd->power)); + offset = ieee80211_calc_chandef_subchan_offset(ap, needed); + for (int i = 0; i < needed; i++) + psd->power[i] = tmp[offset + i]; + +out: + /* limit, but don't lie if there are defaults in the data */ + if (needed < psd->count) + psd->count = needed; +} +EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_rearrange_tpe_psd); + +static void ieee80211_rearrange_tpe(struct ieee80211_parsed_tpe *tpe, + const struct cfg80211_chan_def *ap, + const struct cfg80211_chan_def *used) +{ + /* ignore this completely for narrow/invalid channels */ + if (!ieee80211_chandef_num_subchans(ap) || + !ieee80211_chandef_num_subchans(used)) { + ieee80211_clear_tpe(tpe); + return; + } + + for (int i = 0; i < 2; i++) { + int needed_pwr_count; + + ieee80211_rearrange_tpe_psd(&tpe->psd_local[i], ap, used); + ieee80211_rearrange_tpe_psd(&tpe->psd_reg_client[i], ap, used); + + /* limit this to the widths we actually need */ + needed_pwr_count = ieee80211_chandef_num_widths(used); + if (needed_pwr_count < tpe->max_local[i].count) + tpe->max_local[i].count = needed_pwr_count; + if (needed_pwr_count < tpe->max_reg_client[i].count) + tpe->max_reg_client[i].count = needed_pwr_count; + } +} + static struct ieee802_11_elems * ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, struct ieee80211_conn_settings *conn, struct cfg80211_bss *cbss, int link_id, - struct ieee80211_chan_req *chanreq) + struct ieee80211_chan_req *chanreq, + struct cfg80211_chan_def *ap_chandef) { const struct cfg80211_bss_ies *ies = rcu_dereference(cbss->ies); struct ieee80211_bss *bss = (void *)cbss->priv; @@ -623,7 +807,6 @@ ieee80211_determine_chan_mode(struct ieee80211_sub_if_data *sdata, }; struct ieee802_11_elems *elems; struct ieee80211_supported_band *sband; - struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; int ret; @@ -634,7 +817,7 @@ again: return ERR_PTR(-ENOMEM); ap_mode = ieee80211_determine_ap_chan(sdata, channel, bss->vht_cap_info, - elems, false, conn, &ap_chandef); + elems, false, conn, ap_chandef); /* this should be impossible since parsing depends on our mode */ if (WARN_ON(ap_mode > conn->mode)) { @@ -701,12 +884,12 @@ again: break; } - chanreq->oper = ap_chandef; + chanreq->oper = *ap_chandef; /* wider-bandwidth OFDMA is only done in EHT */ if (conn->mode >= IEEE80211_CONN_MODE_EHT && !(sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW)) - chanreq->ap = ap_chandef; + chanreq->ap = *ap_chandef; else chanreq->ap.chan = NULL; @@ -738,7 +921,7 @@ again: IEEE80211_CONN_BW_LIMIT_160); } - if (chanreq->oper.width != ap_chandef.width || ap_mode != conn->mode) + if (chanreq->oper.width != ap_chandef->width || ap_mode != conn->mode) sdata_info(sdata, "regulatory prevented using AP config, downgraded\n"); @@ -3275,9 +3458,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, sdata->vif.bss_conf.power_type = IEEE80211_REG_UNSET_AP; sdata->vif.bss_conf.pwr_reduction = 0; - sdata->vif.bss_conf.tx_pwr_env_num = 0; - memset(sdata->vif.bss_conf.tx_pwr_env, 0, - sizeof(sdata->vif.bss_conf.tx_pwr_env)); + ieee80211_clear_tpe(&sdata->vif.bss_conf.tpe); sdata->vif.cfg.eml_cap = 0; sdata->vif.cfg.eml_med_sync_delay = 0; @@ -5018,15 +5199,15 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ; struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef; struct ieee802_11_elems *elems; int ret; - u32 i; lockdep_assert_wiphy(local->hw.wiphy); rcu_read_lock(); elems = ieee80211_determine_chan_mode(sdata, conn, cbss, link_id, - &chanreq); + &chanreq, &ap_chandef); if (IS_ERR(elems)) { rcu_read_unlock(); @@ -5042,35 +5223,22 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, if (link && is_6ghz && conn->mode >= IEEE80211_CONN_MODE_HE) { const struct ieee80211_he_6ghz_oper *he_6ghz_oper; - struct ieee80211_bss_conf *bss_conf; - u8 j = 0; - - bss_conf = link->conf; if (elems->pwr_constr_elem) - bss_conf->pwr_reduction = *elems->pwr_constr_elem; - - BUILD_BUG_ON(ARRAY_SIZE(bss_conf->tx_pwr_env) != - ARRAY_SIZE(elems->tx_pwr_env)); - - for (i = 0; i < elems->tx_pwr_env_num; i++) { - if (elems->tx_pwr_env_len[i] > sizeof(bss_conf->tx_pwr_env[j])) - continue; - - bss_conf->tx_pwr_env_num++; - memcpy(&bss_conf->tx_pwr_env[j], elems->tx_pwr_env[i], - elems->tx_pwr_env_len[i]); - j++; - } + link->conf->pwr_reduction = *elems->pwr_constr_elem; he_6ghz_oper = ieee80211_he_6ghz_oper(elems->he_operation); if (he_6ghz_oper) - bss_conf->power_type = + link->conf->power_type = ieee80211_ap_power_type(he_6ghz_oper->control); else link_info(link, "HE 6 GHz operation missing (on %d MHz), expect issues\n", cbss->channel->center_freq); + + link->conf->tpe = elems->tpe; + ieee80211_rearrange_tpe(&link->conf->tpe, &ap_chandef, + &chanreq.oper); } rcu_read_unlock(); /* the element data was RCU protected so no longer valid anyway */ @@ -7558,6 +7726,8 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link) wiphy_delayed_work_init(&link->u.mgd.chswitch_work, ieee80211_chswitch_work); + ieee80211_clear_tpe(&link->conf->tpe); + if (sdata->u.mgd.assoc_data) ether_addr_copy(link->conf->addr, sdata->u.mgd.assoc_data->link[link_id].addr); diff --git a/net/mac80211/parse.c b/net/mac80211/parse.c index 55e5497f8978..6efeb977f8e5 100644 --- a/net/mac80211/parse.c +++ b/net/mac80211/parse.c @@ -187,6 +187,84 @@ ieee80211_parse_extension_element(u32 *crc, *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); } +static void ieee80211_parse_tpe(struct ieee80211_parsed_tpe *tpe, + const u8 *data, u8 len) +{ + const struct ieee80211_tx_pwr_env *env = (const void *)data; + u8 count, interpret, category; + u8 *out, N, *cnt_out = NULL, *N_out = NULL; + + if (!ieee80211_valid_tpe_element(data, len)) + return; + + count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); + interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); + category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + out = tpe->max_local[category].power; + cnt_out = &tpe->max_local[category].count; + tpe->max_local[category].valid = true; + break; + case IEEE80211_TPE_REG_CLIENT_EIRP: + out = tpe->max_reg_client[category].power; + cnt_out = &tpe->max_reg_client[category].count; + tpe->max_reg_client[category].valid = true; + break; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + out = tpe->psd_local[category].power; + cnt_out = &tpe->psd_local[category].count; + N_out = &tpe->psd_local[category].n; + tpe->psd_local[category].valid = true; + break; + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + out = tpe->psd_reg_client[category].power; + cnt_out = &tpe->psd_reg_client[category].count; + N_out = &tpe->psd_reg_client[category].n; + tpe->psd_reg_client[category].valid = true; + break; + } + + switch (interpret) { + case IEEE80211_TPE_LOCAL_EIRP: + case IEEE80211_TPE_REG_CLIENT_EIRP: + /* count was validated <= 3, plus 320 MHz */ + BUILD_BUG_ON(IEEE80211_TPE_EIRP_ENTRIES_320MHZ < 5); + memcpy(out, env->variable, count + 1); + *cnt_out = count + 1; + /* separately take 320 MHz if present */ + if (count == 3 && len > sizeof(*env) + count + 1) { + out[4] = env->variable[count + 2]; + *cnt_out = 5; + } + break; + case IEEE80211_TPE_LOCAL_EIRP_PSD: + case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: + if (!count) { + memset(out, env->variable[0], + IEEE80211_TPE_PSD_ENTRIES_320MHZ); + *cnt_out = IEEE80211_TPE_PSD_ENTRIES_320MHZ; + break; + } + + N = 1 << (count - 1); + memcpy(out, env->variable, N); + *cnt_out = N; + *N_out = N; + + if (len > sizeof(*env) + N) { + int K = u8_get_bits(env->variable[N], + IEEE80211_TX_PWR_ENV_EXT_COUNT); + + K = min(K, IEEE80211_TPE_PSD_ENTRIES_320MHZ - N); + memcpy(out + N, env->variable + N + 1, K); + (*cnt_out) += K; + } + break; + } +} + static u32 _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, struct ieee80211_elems_parse *elems_parse, @@ -593,16 +671,9 @@ _ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params, elems->rsnx_len = elen; break; case WLAN_EID_TX_POWER_ENVELOPE: - if (elen < 1 || - elen > sizeof(struct ieee80211_tx_pwr_env)) - break; - - if (elems->tx_pwr_env_num >= ARRAY_SIZE(elems->tx_pwr_env)) + if (params->mode < IEEE80211_CONN_MODE_HE) break; - - elems->tx_pwr_env[elems->tx_pwr_env_num] = (void *)pos; - elems->tx_pwr_env_len[elems->tx_pwr_env_num] = elen; - elems->tx_pwr_env_num++; + ieee80211_parse_tpe(&elems->tpe, pos, elen); break; case WLAN_EID_EXTENSION: ieee80211_parse_extension_element(calc_crc ? @@ -889,6 +960,9 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params) elems->ie_start = params->start; elems->total_len = params->len; + /* set all TPE entries to unlimited (but invalid) */ + ieee80211_clear_tpe(&elems->tpe); + nontransmitted_profile = elems_parse->scratch_pos; nontransmitted_profile_len = ieee802_11_find_bssid_profile(params->start, params->len, diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile index 4fdaf3feaca3..511dfa226699 100644 --- a/net/mac80211/tests/Makefile +++ b/net/mac80211/tests/Makefile @@ -1,3 +1,3 @@ -mac80211-tests-y += module.o elems.o mfp.o +mac80211-tests-y += module.o elems.o mfp.o tpe.o obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o diff --git a/net/mac80211/tests/tpe.c b/net/mac80211/tests/tpe.c new file mode 100644 index 000000000000..dd63303a2985 --- /dev/null +++ b/net/mac80211/tests/tpe.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * KUnit tests for TPE element handling + * + * Copyright (C) 2024 Intel Corporation + */ +#include +#include "../ieee80211_i.h" + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); + +static struct ieee80211_channel chan6g_1 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 5955, +}; + +static struct ieee80211_channel chan6g_33 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6115, +}; + +static struct ieee80211_channel chan6g_61 = { + .band = NL80211_BAND_6GHZ, + .center_freq = 6255, +}; + +static const struct subchan_test_case { + const char *desc; + struct cfg80211_chan_def c; + u8 n; + int expect; +} subchan_offset_cases[] = { + { + .desc = "identical 20 MHz", + .c.width = NL80211_CHAN_WIDTH_20, + .c.chan = &chan6g_1, + .c.center_freq1 = 5955, + .n = 1, + .expect = 0, + }, + { + .desc = "identical 40 MHz", + .c.width = NL80211_CHAN_WIDTH_40, + .c.chan = &chan6g_1, + .c.center_freq1 = 5965, + .n = 2, + .expect = 0, + }, + { + .desc = "identical 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_1, + .c.center_freq1 = 5985, + .c.center_freq2 = 6225, + .n = 16, + .expect = 0, + }, + { + .desc = "identical 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_1, + .c.center_freq1 = 6105, + .n = 16, + .expect = 0, + }, + { + .desc = "lower 160 MHz of 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_1, + .c.center_freq1 = 6105, + .n = 8, + .expect = 0, + }, + { + .desc = "upper 160 MHz of 320 MHz", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_61, + .c.center_freq1 = 6105, + .n = 8, + .expect = 8, + }, + { + .desc = "upper 160 MHz of 320 MHz, go to 40", + .c.width = NL80211_CHAN_WIDTH_320, + .c.chan = &chan6g_61, + .c.center_freq1 = 6105, + .n = 2, + .expect = 8 + 4 + 2, + }, + { + .desc = "secondary 80 above primary in 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_1, + .c.center_freq1 = 5985, + .c.center_freq2 = 6225, + .n = 4, + .expect = 0, + }, + { + .desc = "secondary 80 below primary in 80+80 MHz", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_61, + .c.center_freq1 = 6225, + .c.center_freq2 = 5985, + .n = 4, + .expect = 4, + }, + { + .desc = "secondary 80 below primary in 80+80 MHz, go to 20", + /* not really is valid? doesn't matter for the test */ + .c.width = NL80211_CHAN_WIDTH_80P80, + .c.chan = &chan6g_61, + .c.center_freq1 = 6225, + .c.center_freq2 = 5985, + .n = 1, + .expect = 7, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(subchan_offset, subchan_offset_cases, desc); + +static void subchan_offset(struct kunit *test) +{ + const struct subchan_test_case *params = test->param_value; + int offset; + + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->c), true); + + offset = ieee80211_calc_chandef_subchan_offset(¶ms->c, params->n); + + KUNIT_EXPECT_EQ(test, params->expect, offset); +} + +static const struct psd_reorder_test_case { + const char *desc; + struct cfg80211_chan_def ap, used; + struct ieee80211_parsed_tpe_psd psd, out; +} psd_reorder_cases[] = { + { + .desc = "no changes, 320 MHz", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_320, + .used.chan = &chan6g_1, + .used.center_freq1 = 6105, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 8, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 16, + .out.n = 8, + .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + }, + { + .desc = "no changes, 320 MHz, 160 MHz used, n=0", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_1, + .used.center_freq1 = 6025, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 0, + .psd.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + + .out.valid = true, + .out.count = 8, + .out.n = 0, + .out.power = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, + }, + { + .desc = "320 MHz, HE is 80, used 160, all lower", + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_1, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_1, + .used.center_freq1 = 6025, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 0, 1, 2, 3, 4, 5, 6, 7, 127, 127, 127, 127, 127, 127, 127, 127}, + }, + { + .desc = "320 MHz, HE is 80, used 160, all upper", + /* + * EHT: | | | | | | | | | | | | | | | | | + * HE: | | | | | + * used: | | | | | | | | | + */ + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_61, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_61, + .used.center_freq1 = 6185, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 12, 13, 14, 15, 0, 1, 2, 3, 127, 127, 127, 127, 127, 127, 127, 127}, + }, + { + .desc = "320 MHz, HE is 80, used 160, split", + /* + * EHT: | | | | | | | | | | | | | | | | | + * HE: | | | | | + * used: | | | | | | | | | + */ + + .ap.width = NL80211_CHAN_WIDTH_320, + .ap.chan = &chan6g_33, + .ap.center_freq1 = 6105, + + .used.width = NL80211_CHAN_WIDTH_160, + .used.chan = &chan6g_33, + .used.center_freq1 = 6185, + + .psd.valid = true, + .psd.count = 16, + .psd.n = 4, + .psd.power = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + + .out.valid = true, + .out.count = 8, + .out.n = 4, + .out.power = { 0, 1, 2, 3, 12, 13, 14, 15, 127, 127, 127, 127, 127, 127, 127, 127}, + }, +}; + +KUNIT_ARRAY_PARAM_DESC(psd_reorder, psd_reorder_cases, desc); + +static void psd_reorder(struct kunit *test) +{ + const struct psd_reorder_test_case *params = test->param_value; + struct ieee80211_parsed_tpe_psd tmp = params->psd; + + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->ap), true); + KUNIT_ASSERT_EQ(test, cfg80211_chandef_valid(¶ms->used), true); + + ieee80211_rearrange_tpe_psd(&tmp, ¶ms->ap, ¶ms->used); + KUNIT_EXPECT_MEMEQ(test, &tmp, ¶ms->out, sizeof(tmp)); +} + +static struct kunit_case tpe_test_cases[] = { + KUNIT_CASE_PARAM(subchan_offset, subchan_offset_gen_params), + KUNIT_CASE_PARAM(psd_reorder, psd_reorder_gen_params), + {} +}; + +static struct kunit_suite tpe = { + .name = "mac80211-tpe", + .test_cases = tpe_test_cases, +}; + +kunit_test_suite(tpe); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 6139c930b572..183de2075fb9 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -4334,3 +4334,28 @@ ieee80211_min_bw_limit_from_chandef(struct cfg80211_chan_def *chandef) return IEEE80211_CONN_BW_LIMIT_20; } } + +void ieee80211_clear_tpe(struct ieee80211_parsed_tpe *tpe) +{ + for (int i = 0; i < 2; i++) { + tpe->max_local[i].valid = false; + memset(tpe->max_local[i].power, + IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT, + sizeof(tpe->max_local[i].power)); + + tpe->max_reg_client[i].valid = false; + memset(tpe->max_reg_client[i].power, + IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT, + sizeof(tpe->max_reg_client[i].power)); + + tpe->psd_local[i].valid = false; + memset(tpe->psd_local[i].power, + IEEE80211_TPE_PSD_NO_LIMIT, + sizeof(tpe->psd_local[i].power)); + + tpe->psd_reg_client[i].valid = false; + memset(tpe->psd_reg_client[i].power, + IEEE80211_TPE_PSD_NO_LIMIT, + sizeof(tpe->psd_reg_client[i].power)); + } +} -- cgit v1.2.3 From 5a009b42e0418d30b3ffaff2f46c534cd79b3f23 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 6 May 2024 21:37:57 +0200 Subject: wifi: mac80211: track changes in AP's TPE If the TPE (transmit power envelope) is changed, detect and report that to the driver. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240506214536.103dda923f45.I990877e409ab8eade9ed7c172272e0cae57256cf@changeid Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ net/mac80211/mlme.c | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a4efbfb8d796..a59eacfe0480 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -362,6 +362,7 @@ struct ieee80211_vif_chanctx_switch { * status changed. * @BSS_CHANGED_MLD_VALID_LINKS: MLD valid links status changed. * @BSS_CHANGED_MLD_TTLM: negotiated TID to link mapping was changed + * @BSS_CHANGED_TPE: transmit power envelope changed */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -398,6 +399,7 @@ enum ieee80211_bss_change { BSS_CHANGED_UNSOL_BCAST_PROBE_RESP = 1<<31, BSS_CHANGED_MLD_VALID_LINKS = BIT_ULL(33), BSS_CHANGED_MLD_TTLM = BIT_ULL(34), + BSS_CHANGED_TPE = BIT_ULL(35), /* when adding here, make sure to change ieee80211_reconfig */ }; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ed9851faac05..08c0999746fb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -973,6 +973,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, struct ieee80211_channel *channel = link->conf->chanreq.oper.chan; struct ieee80211_sub_if_data *sdata = link->sdata; struct ieee80211_chan_req chanreq = {}; + struct cfg80211_chan_def ap_chandef; enum ieee80211_conn_mode ap_mode; u32 vht_cap_info = 0; u16 ht_opmode; @@ -988,7 +989,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ap_mode = ieee80211_determine_ap_chan(sdata, channel, vht_cap_info, elems, true, &link->u.mgd.conn, - &chanreq.ap); + &ap_chandef); if (ap_mode != link->u.mgd.conn.mode) { link_info(link, @@ -998,7 +999,8 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, return -EINVAL; } - chanreq.oper = chanreq.ap; + chanreq.ap = ap_chandef; + chanreq.oper = ap_chandef; if (link->u.mgd.conn.mode < IEEE80211_CONN_MODE_EHT || sdata->vif.driver_flags & IEEE80211_VIF_IGNORE_OFDMA_WIDER_BW) chanreq.ap.chan = NULL; @@ -1026,6 +1028,16 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link, ieee80211_min_bw_limit_from_chandef(&chanreq.oper)) ieee80211_chandef_downgrade(&chanreq.oper, NULL); + if (ap_chandef.chan->band == NL80211_BAND_6GHZ && + link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HE) { + ieee80211_rearrange_tpe(&elems->tpe, &ap_chandef, + &chanreq.oper); + if (memcmp(&link->conf->tpe, &elems->tpe, sizeof(elems->tpe))) { + link->conf->tpe = elems->tpe; + *changed |= BSS_CHANGED_TPE; + } + } + if (ieee80211_chanreq_identical(&chanreq, &link->conf->chanreq)) return 0; -- cgit v1.2.3 From f9a0757a4b2f5df4376963c25a3d7d7aeba78444 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Chitrapu Date: Wed, 15 May 2024 11:13:18 -0700 Subject: wifi: mac80211: Add EHT UL MU-MIMO flag in ieee80211_bss_conf Add flag for Full Bandwidth UL MU-MIMO for EHT. This is utilized to pass EHT MU-MIMO configurations from user space to driver in AP mode. Tested-on: QCN9274 hw2.0 PCI WLAN.WBE.1.0.1-00029-QCAHKSWPL_SILICONZ-1 Signed-off-by: Pradeep Kumar Chitrapu Link: https://msgid.link/20240515181327.12855-2-quic_pradeepc@quicinc.com Signed-off-by: Johannes Berg --- include/net/mac80211.h | 4 ++++ net/mac80211/cfg.c | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a59eacfe0480..ecfa65ade226 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -735,6 +735,9 @@ struct ieee80211_parsed_tpe { * beamformee * @eht_mu_beamformer: in AP-mode, does this BSS enable operation as an EHT MU * beamformer + * @eht_80mhz_full_bw_ul_mumimo: in AP-mode, does this BSS support the + * reception of an EHT TB PPDU on an RU that spans the entire PPDU + * bandwidth */ struct ieee80211_bss_conf { struct ieee80211_vif *vif; @@ -828,6 +831,7 @@ struct ieee80211_bss_conf { bool eht_su_beamformer; bool eht_su_beamformee; bool eht_mu_beamformer; + bool eht_80mhz_full_bw_ul_mumimo; }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3319c9360507..62119e957cd8 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1379,6 +1379,11 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, (IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ | IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ); + link_conf->eht_80mhz_full_bw_ul_mumimo = + params->eht_cap->fixed.phy_cap_info[7] & + (IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ | + IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ); } else { link_conf->eht_su_beamformer = false; link_conf->eht_su_beamformee = false; -- cgit v1.2.3 From aa4ec06c455d0200eea0a4361cc58eb5b8bf68c4 Mon Sep 17 00:00:00 2001 From: Dmitry Antipov Date: Fri, 17 May 2024 18:33:31 +0300 Subject: wifi: cfg80211: use __counted_by where appropriate Annotate 'sub_specs' of 'struct cfg80211_sar_specs', 'channels' of 'struct cfg80211_sched_scan_request', 'channels' of 'struct cfg80211_wowlan_nd_match', and 'matches' of 'struct cfg80211_wowlan_nd_info' with '__counted_by' attribute. Briefly tested with clang 18.1.1 and CONFIG_UBSAN_BOUNDS running iwlwifi. Signed-off-by: Dmitry Antipov Link: https://msgid.link/20240517153332.18271-1-dmantipov@yandex.ru Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index cbf1664dc569..d79180bec7a1 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2200,7 +2200,7 @@ struct cfg80211_sar_sub_specs { struct cfg80211_sar_specs { enum nl80211_sar_type type; u32 num_sub_specs; - struct cfg80211_sar_sub_specs sub_specs[]; + struct cfg80211_sar_sub_specs sub_specs[] __counted_by(num_sub_specs); }; @@ -2838,7 +2838,7 @@ struct cfg80211_sched_scan_request { struct list_head list; /* keep last */ - struct ieee80211_channel *channels[]; + struct ieee80211_channel *channels[] __counted_by(n_channels); }; /** @@ -3582,7 +3582,7 @@ struct cfg80211_coalesce { struct cfg80211_wowlan_nd_match { struct cfg80211_ssid ssid; int n_channels; - u32 channels[]; + u32 channels[] __counted_by(n_channels); }; /** @@ -3596,7 +3596,7 @@ struct cfg80211_wowlan_nd_match { */ struct cfg80211_wowlan_nd_info { int n_matches; - struct cfg80211_wowlan_nd_match *matches[]; + struct cfg80211_wowlan_nd_match *matches[] __counted_by(n_matches); }; /** -- cgit v1.2.3 From 3d9d313d518c5bc9e5ab6aeab86c9fa4bece095c Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 10 May 2024 18:08:13 +0300 Subject: drm/connector: update edid_blob_ptr documentation Accessing the EDID via edid_blob_ptr causes chicken-and-egg problems. Keep edid_blob_ptr as the userspace interface that should be accessed via dedicated functions. Reviewed-by: Chaitanya Kumar Borah Acked-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/b6aa1ea30ae85ef9e9814315d3437e82f0ba6754.1715353572.git.jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/drm_connector.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index fe88d7fc6b8f..58ee9adf9091 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1636,8 +1636,12 @@ struct drm_connector { /** * @edid_blob_ptr: DRM property containing EDID if present. Protected by - * &drm_mode_config.mutex. This should be updated only by calling + * &drm_mode_config.mutex. + * + * This must be updated only by calling drm_edid_connector_update() or * drm_connector_update_edid_property(). + * + * This must not be used by drivers directly. */ struct drm_property_blob *edid_blob_ptr; -- cgit v1.2.3 From 4d25ca2d6801cfcf26f7f39c561611ba5be99bf8 Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:32 -0700 Subject: net: Rename mono_delivery_time to tstamp_type for scalabilty mono_delivery_time was added to check if skb->tstamp has delivery time in mono clock base (i.e. EDT) otherwise skb->tstamp has timestamp in ingress and delivery_time at egress. Renaming the bitfield from mono_delivery_time to tstamp_type is for extensibilty for other timestamps such as userspace timestamp (i.e. SO_TXTIME) set via sock opts. As we are renaming the mono_delivery_time to tstamp_type, it makes sense to start assigning tstamp_type based on enum defined in this commit. Earlier we used bool arg flag to check if the tstamp is mono in function skb_set_delivery_time, Now the signature of the functions accepts tstamp_type to distinguish between mono and real time. Also skb_set_delivery_type_by_clockid is a new function which accepts clockid to determine the tstamp_type. In future tstamp_type:1 can be extended to support userspace timestamp by increasing the bitfield. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-2-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 52 ++++++++++++++++++++++-------- include/net/inet_frag.h | 4 +-- net/bridge/netfilter/nf_conntrack_bridge.c | 6 ++-- net/core/dev.c | 2 +- net/core/filter.c | 10 +++--- net/ieee802154/6lowpan/reassembly.c | 2 +- net/ipv4/inet_fragment.c | 2 +- net/ipv4/ip_fragment.c | 2 +- net/ipv4/ip_output.c | 9 +++--- net/ipv4/tcp_output.c | 14 ++++---- net/ipv6/ip6_output.c | 6 ++-- net/ipv6/netfilter.c | 6 ++-- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- net/ipv6/reassembly.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- net/sched/act_bpf.c | 4 +-- net/sched/cls_bpf.c | 4 +-- 17 files changed, 78 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c0b97c93a6de..3a721cc3b644 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -706,6 +706,11 @@ typedef unsigned int sk_buff_data_t; typedef unsigned char *sk_buff_data_t; #endif +enum skb_tstamp_type { + SKB_CLOCK_REALTIME, + SKB_CLOCK_MONOTONIC, +}; + /** * DOC: Basic sk_buff geometry * @@ -823,10 +828,8 @@ typedef unsigned char *sk_buff_data_t; * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required - * @mono_delivery_time: When set, skb->tstamp has the - * delivery_time in mono clock base (i.e. EDT). Otherwise, the - * skb->tstamp has the (rcv) timestamp at ingress and - * delivery_time at egress. + * @tstamp_type: When set, skb->tstamp has the + * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. @@ -954,7 +957,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 mono_delivery_time:1; /* See SKB_MONO_DELIVERY_TIME_MASK */ + __u8 tstamp_type:1; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -4183,7 +4186,7 @@ static inline void skb_get_new_timestampns(const struct sk_buff *skb, static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) @@ -4192,10 +4195,33 @@ static inline ktime_t net_timedelta(ktime_t t) } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, - bool mono) + u8 tstamp_type) { skb->tstamp = kt; - skb->mono_delivery_time = kt && mono; + + if (kt) + skb->tstamp_type = tstamp_type; + else + skb->tstamp_type = SKB_CLOCK_REALTIME; +} + +static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, + ktime_t kt, clockid_t clockid) +{ + u8 tstamp_type = SKB_CLOCK_REALTIME; + + switch (clockid) { + case CLOCK_REALTIME: + break; + case CLOCK_MONOTONIC: + tstamp_type = SKB_CLOCK_MONOTONIC; + break; + default: + WARN_ON_ONCE(1); + kt = 0; + } + + skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); @@ -4205,8 +4231,8 @@ DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { - if (skb->mono_delivery_time) { - skb->mono_delivery_time = 0; + if (skb->tstamp_type) { + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else @@ -4216,7 +4242,7 @@ static inline void skb_clear_delivery_time(struct sk_buff *skb) static inline void skb_clear_tstamp(struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return; skb->tstamp = 0; @@ -4224,7 +4250,7 @@ static inline void skb_clear_tstamp(struct sk_buff *skb) static inline ktime_t skb_tstamp(const struct sk_buff *skb) { - if (skb->mono_delivery_time) + if (skb->tstamp_type) return 0; return skb->tstamp; @@ -4232,7 +4258,7 @@ static inline ktime_t skb_tstamp(const struct sk_buff *skb) static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { - if (!skb->mono_delivery_time && skb->tstamp) + if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 153960663ce4..5af6eb14c5db 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -76,7 +76,7 @@ struct frag_v6_compare_key { * @stamp: timestamp of the last received fragment * @len: total length of the original datagram * @meat: length of received fragments so far - * @mono_delivery_time: stamp has a mono delivery time (EDT) + * @tstamp_type: stamp has a mono delivery time (EDT) * @flags: fragment queue flags * @max_size: maximum received fragment size * @fqdir: pointer to struct fqdir @@ -97,7 +97,7 @@ struct inet_frag_queue { ktime_t stamp; int len; int meat; - u8 mono_delivery_time; + u8 tstamp_type; __u8 flags; u16 max_size; struct fqdir *fqdir; diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index c3c51b9a6826..816bb0fde718 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -32,7 +32,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; unsigned int hlen, ll_rs, mtu; ktime_t tstamp = skb->tstamp; struct ip_frag_state state; @@ -82,7 +82,7 @@ static int nf_br_ip_fragment(struct net *net, struct sock *sk, if (iter.frag) ip_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -113,7 +113,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/core/dev.c b/net/core/dev.c index e1bb6d7856d9..85fe8138f3e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2160,7 +2160,7 @@ EXPORT_SYMBOL(net_disable_timestamp); static inline void net_timestamp_set(struct sk_buff *skb) { skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); } diff --git a/net/core/filter.c b/net/core/filter.c index 2510464692af..a3781a796da4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7730,13 +7730,13 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, if (!tstamp) return -EINVAL; skb->tstamp = tstamp; - skb->mono_delivery_time = 1; + skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; case BPF_SKB_TSTAMP_UNSPEC: if (tstamp) return -EINVAL; skb->tstamp = 0; - skb->mono_delivery_time = 0; + skb->tstamp_type = SKB_CLOCK_REALTIME; break; default: return -EINVAL; @@ -9443,7 +9443,7 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); - /* skb->tc_at_ingress && skb->mono_delivery_time, + /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ *insn++ = BPF_MOV64_IMM(value_reg, 0); @@ -9468,7 +9468,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, * the bpf prog is aware the tstamp could have delivery time. * Thus, write skb->tstamp as is if tstamp_type_access is true. * Otherwise, writing at ingress will have to clear the - * mono_delivery_time bit also. + * skb->tstamp_type bit also. */ if (!prog->tstamp_type_access) { __u8 tmp_reg = BPF_REG_AX; @@ -9478,7 +9478,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); /* goto */ *insn++ = BPF_JMP_A(2); - /* : mono_delivery_time */ + /* : skb->tstamp_type */ *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 56ef873828f4..867d637d86f0 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -130,7 +130,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, goto err; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; if (frag_type == LOWPAN_DISPATCH_FRAG1) fq->q.flags |= INET_FRAG_FIRST_IN; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index faaec92a46ac..d179a2c84222 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -619,7 +619,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, skb_mark_not_on_list(head); head->prev = NULL; head->tstamp = q->stamp; - head->mono_delivery_time = q->mono_delivery_time; + head->tstamp_type = q->tstamp_type; if (sk) refcount_add(sum_truesize - head_truesize, &sk->sk_wmem_alloc); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 08e2c92e25ab..a92664a5ef2e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -355,7 +355,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->iif = dev->ifindex; qp->q.stamp = skb->tstamp; - qp->q.mono_delivery_time = skb->mono_delivery_time; + qp->q.tstamp_type = skb->tstamp_type; qp->q.meat += skb->len; qp->ecn |= ecn; add_frag_mem_limit(qp->q.fqdir, skb->truesize); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9500031a1f55..fe86cadfa85b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -764,7 +764,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, { struct iphdr *iph; struct sk_buff *skb2; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct rtable *rt = skb_rtable(skb); unsigned int mtu, hlen, ll_rs; struct ip_fraglist_iter iter; @@ -856,7 +856,7 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, } } - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) @@ -912,7 +912,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, skb2); if (err) goto fail; @@ -1649,7 +1649,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, arg->csumoffset) = csum_fold(csum_add(nskb->csum, arg->csum)); nskb->ip_summed = CHECKSUM_NONE; - nskb->mono_delivery_time = !!transmit_time; + if (transmit_time) + nskb->tstamp_type = SKB_CLOCK_MONOTONIC; if (txhash) skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4); ip_push_pending_frames(sk, &fl4); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 95caf8aaa8be..d44371cfa6ec 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1301,7 +1301,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp = tcp_sk(sk); prior_wstamp = tp->tcp_wstamp_ns; tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); if (clone_it) { oskb = skb; @@ -1655,7 +1655,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, skb_split(skb, buff, len); - skb_set_delivery_time(buff, skb->tstamp, true); + skb_set_delivery_time(buff, skb->tstamp, SKB_CLOCK_MONOTONIC); tcp_fragment_tstamp(skb, buff); old_factor = tcp_skb_pcount(skb); @@ -2764,7 +2764,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp_ns" is used as a start point for the retransmit timer */ tp->tcp_wstamp_ns = tp->tcp_clock_cache; - skb_set_delivery_time(skb, tp->tcp_wstamp_ns, true); + skb_set_delivery_time(skb, tp->tcp_wstamp_ns, SKB_CLOCK_MONOTONIC); list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); goto repair; /* Skip network transmission */ @@ -3752,11 +3752,11 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) skb_set_delivery_time(skb, cookie_init_timestamp(req, now), - true); + SKB_CLOCK_MONOTONIC); else #endif { - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); if (!tcp_rsk(req)->snt_synack) /* Timestamp first SYNACK */ tcp_rsk(req)->snt_synack = tcp_skb_timestamp_us(skb); } @@ -3843,7 +3843,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, bpf_skops_write_hdr_opt((struct sock *)sk, skb, req, syn_skb, synack_type, &opts); - skb_set_delivery_time(skb, now, true); + skb_set_delivery_time(skb, now, SKB_CLOCK_MONOTONIC); tcp_add_tx_delay(skb, tp); return skb; @@ -4027,7 +4027,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, true); + skb_set_delivery_time(syn, syn_data->skb_mstamp_ns, SKB_CLOCK_MONOTONIC); /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 27d8725445e3..1ab0f23d37bf 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -859,7 +859,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; @@ -955,7 +955,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), @@ -1016,7 +1016,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - skb_set_delivery_time(frag, tstamp, mono_delivery_time); + skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 53d255838e6a..e0c2347b4dc6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -126,7 +126,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; - bool mono_delivery_time = skb->mono_delivery_time; + u8 tstamp_type = skb->tstamp_type; ktime_t tstamp = skb->tstamp; struct ip6_frag_state state; u8 *prevhdr, nexthdr = 0; @@ -192,7 +192,7 @@ int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (iter.frag) ip6_fraglist_prepare(skb, &iter); - skb_set_delivery_time(skb, tstamp, mono_delivery_time); + skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, data, skb); if (err || !iter.frag) break; @@ -225,7 +225,7 @@ slow_path: goto blackhole; } - skb_set_delivery_time(skb2, tstamp, mono_delivery_time); + skb_set_delivery_time(skb2, tstamp, tstamp_type); err = output(net, sk, data, skb2); if (err) goto blackhole; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 5e1b50c6a44d..6f0844c9315d 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -263,7 +263,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; if (payload_len > fq->q.max_size) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 327caca64257..a48be617a8ab 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -198,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->iif = dev->ifindex; fq->q.stamp = skb->tstamp; - fq->q.mono_delivery_time = skb->mono_delivery_time; + fq->q.tstamp_type = skb->tstamp_type; fq->q.meat += skb->len; fq->ecn |= ecn; add_frag_mem_limit(fq->q.fqdir, skb->truesize); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4c3605485b68..8333005c5c2e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -975,7 +975,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); - skb_set_delivery_time(buff, tcp_transmit_time(sk), true); + skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 0e3cf11ae5fc..396b576390d0 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -54,8 +54,8 @@ TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (skb_sk_is_prefetched(skb) && filter_res != TC_ACT_OK) skb_orphan(skb); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 5e83e890f6a4..1941ebec23ff 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -104,8 +104,8 @@ TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, bpf_compute_data_pointers(skb); filter_res = bpf_prog_run(prog->filter, skb); } - if (unlikely(!skb->tstamp && skb->mono_delivery_time)) - skb->mono_delivery_time = 0; + if (unlikely(!skb->tstamp && skb->tstamp_type)) + skb->tstamp_type = SKB_CLOCK_REALTIME; if (prog->exts_integrated) { res->class = 0; -- cgit v1.2.3 From 1693c5db6ab8262e6f5263f9d211855959aa5acd Mon Sep 17 00:00:00 2001 From: Abhishek Chauhan Date: Thu, 9 May 2024 14:18:33 -0700 Subject: net: Add additional bit to support clockid_t timestamp type tstamp_type is now set based on actual clockid_t compressed into 2 bits. To make the design scalable for future needs this commit bring in the change to extend the tstamp_type:1 to tstamp_type:2 to support other clockid_t timestamp. We now support CLOCK_TAI as part of tstamp_type as part of this commit with existing support CLOCK_MONOTONIC and CLOCK_REALTIME. Signed-off-by: Abhishek Chauhan Reviewed-by: Willem de Bruijn Reviewed-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20240509211834.3235191-3-quic_abchauha@quicinc.com Signed-off-by: Martin KaFai Lau --- include/linux/skbuff.h | 18 +++++++++++------ include/uapi/linux/bpf.h | 15 +++++++++----- net/core/filter.c | 46 ++++++++++++++++++++++++------------------ net/ipv4/ip_output.c | 5 ++++- net/ipv4/raw.c | 2 +- net/ipv4/tcp_ipv4.c | 2 ++ net/ipv6/ip6_output.c | 5 ++++- net/ipv6/raw.c | 2 +- net/ipv6/tcp_ipv6.c | 10 +++++++-- net/packet/af_packet.c | 7 +++---- tools/include/uapi/linux/bpf.h | 15 +++++++++----- 11 files changed, 81 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3a721cc3b644..1e5c97daaa37 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -709,6 +709,8 @@ typedef unsigned char *sk_buff_data_t; enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, + SKB_CLOCK_TAI, + __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** @@ -957,7 +959,7 @@ struct sk_buff { /* private: */ __u8 __mono_tc_offset[0]; /* public: */ - __u8 tstamp_type:1; /* See skb_tstamp_type */ + __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; @@ -1087,15 +1089,16 @@ struct sk_buff { #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) -/* if you move tc_at_ingress or mono_delivery_time +/* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 7) -#define TC_AT_INGRESS_MASK (1 << 6) +#define SKB_TSTAMP_TYPE_MASK (3 << 6) +#define SKB_TSTAMP_TYPE_RSHIFT (6) +#define TC_AT_INGRESS_MASK (1 << 5) #else -#define SKB_MONO_DELIVERY_TIME_MASK (1 << 0) -#define TC_AT_INGRESS_MASK (1 << 1) +#define SKB_TSTAMP_TYPE_MASK (3) +#define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) @@ -4216,6 +4219,9 @@ static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; + case CLOCK_TAI: + tstamp_type = SKB_CLOCK_TAI; + break; default: WARN_ON_ONCE(1); kt = 0; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; diff --git a/net/core/filter.c b/net/core/filter.c index a3781a796da4..c6edfe9f41bc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7726,17 +7726,21 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb, return -EOPNOTSUPP; switch (tstamp_type) { - case BPF_SKB_TSTAMP_DELIVERY_MONO: + case BPF_SKB_CLOCK_REALTIME: + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_REALTIME; + break; + case BPF_SKB_CLOCK_MONOTONIC: if (!tstamp) return -EINVAL; skb->tstamp = tstamp; skb->tstamp_type = SKB_CLOCK_MONOTONIC; break; - case BPF_SKB_TSTAMP_UNSPEC: - if (tstamp) + case BPF_SKB_CLOCK_TAI: + if (!tstamp) return -EINVAL; - skb->tstamp = 0; - skb->tstamp_type = SKB_CLOCK_REALTIME; + skb->tstamp = tstamp; + skb->tstamp_type = SKB_CLOCK_TAI; break; default: return -EINVAL; @@ -9387,16 +9391,17 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si, { __u8 value_reg = si->dst_reg; __u8 skb_reg = si->src_reg; - /* AX is needed because src_reg and dst_reg could be the same */ - __u8 tmp_reg = BPF_REG_AX; - - *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, - SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, - SKB_MONO_DELIVERY_TIME_MASK, 2); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC); - *insn++ = BPF_JMP_A(1); - *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO); + BUILD_BUG_ON(__SKB_CLOCK_MAX != (int)BPF_SKB_CLOCK_TAI); + BUILD_BUG_ON(SKB_CLOCK_REALTIME != (int)BPF_SKB_CLOCK_REALTIME); + BUILD_BUG_ON(SKB_CLOCK_MONOTONIC != (int)BPF_SKB_CLOCK_MONOTONIC); + BUILD_BUG_ON(SKB_CLOCK_TAI != (int)BPF_SKB_CLOCK_TAI); + *insn++ = BPF_LDX_MEM(BPF_B, value_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); + *insn++ = BPF_ALU32_IMM(BPF_AND, value_reg, SKB_TSTAMP_TYPE_MASK); +#ifdef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_RSH, value_reg, SKB_TSTAMP_TYPE_RSHIFT); +#else + BUILD_BUG_ON(!(SKB_TSTAMP_TYPE_MASK & 0x1)); +#endif return insn; } @@ -9439,10 +9444,11 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog, __u8 tmp_reg = BPF_REG_AX; *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET); - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK); - *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg, - TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2); + /* check if ingress mask bits is set */ + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1); + *insn++ = BPF_JMP_A(4); + *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, SKB_TSTAMP_TYPE_MASK, 1); + *insn++ = BPF_JMP_A(2); /* skb->tc_at_ingress && skb->tstamp_type, * read 0 as the (rcv) timestamp. */ @@ -9479,7 +9485,7 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog, /* goto */ *insn++ = BPF_JMP_A(2); /* : skb->tstamp_type */ - *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK); + *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TSTAMP_TYPE_MASK); *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET); } #endif diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fe86cadfa85b..b90d0f78ac80 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1457,7 +1457,10 @@ struct sk_buff *__ip_make_skb(struct sock *sk, skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); skb->mark = cork->mark; - skb->tstamp = cork->transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->transmit_time, sk->sk_clockid); /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec * on dst refcount diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 4cb43401e0e0..1a0953650356 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->protocol = htons(ETH_P_IP); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); *rtp = NULL; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 30ef0c8f5e92..8f70b8d1d1e5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3625,6 +3625,8 @@ void __init tcp_v4_init(void) */ inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + sk->sk_clockid = CLOCK_MONOTONIC; + per_cpu(ipv4_tcp_sk, cpu) = sk; } if (register_pernet_subsys(&tcp_sk_ops)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1ab0f23d37bf..e7a19df3125e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1924,7 +1924,10 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, skb->priority = READ_ONCE(sk->sk_priority); skb->mark = cork->base.mark; - skb->tstamp = cork->base.transmit_time; + if (sk_is_tcp(sk)) + skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); + else + skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 2eedf255600b..f838366e8256 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc->mark; - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_put(skb, length); skb_reset_network_header(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 8333005c5c2e..750aa681779c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2387,8 +2387,14 @@ static struct inet_protosw tcpv6_protosw = { static int __net_init tcpv6_net_init(struct net *net) { - return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, - SOCK_RAW, IPPROTO_TCP, net); + int res; + + res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, + SOCK_RAW, IPPROTO_TCP, net); + if (!res) + net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; + + return res; } static void __net_exit tcpv6_net_exit(struct net *net) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ea3ebc160e25..fce390887591 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2056,8 +2056,7 @@ retry: skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = READ_ONCE(sk->sk_mark); - skb->tstamp = sockc.transmit_time; - + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); skb_setup_tx_timestamp(skb, sockc.tsflags); if (unlikely(extra_len == 4)) @@ -2584,7 +2583,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb->dev = dev; skb->priority = READ_ONCE(po->sk.sk_priority); skb->mark = READ_ONCE(po->sk.sk_mark); - skb->tstamp = sockc->transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, po->sk.sk_clockid); skb_setup_tx_timestamp(skb, sockc->tsflags); skb_zcopy_set_nouarg(skb, ph.raw); @@ -3062,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->mark = sockc.mark; - skb->tstamp = sockc.transmit_time; + skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); if (unlikely(extra_len == 4)) skb->no_fcs = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 90706a47f6ff..25ea393cf084 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -6207,12 +6207,17 @@ union { \ __u64 :64; \ } __attribute__((aligned(8))) +/* The enum used in skb->tstamp_type. It specifies the clock type + * of the time stored in the skb->tstamp. + */ enum { - BPF_SKB_TSTAMP_UNSPEC, - BPF_SKB_TSTAMP_DELIVERY_MONO, /* tstamp has mono delivery time */ - /* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle, - * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC - * and try to deduce it by ingress, egress or skb->sk->sk_clockid. + BPF_SKB_TSTAMP_UNSPEC = 0, /* DEPRECATED */ + BPF_SKB_TSTAMP_DELIVERY_MONO = 1, /* DEPRECATED */ + BPF_SKB_CLOCK_REALTIME = 0, + BPF_SKB_CLOCK_MONOTONIC = 1, + BPF_SKB_CLOCK_TAI = 2, + /* For any future BPF_SKB_CLOCK_* that the bpf prog cannot handle, + * the bpf prog can try to deduce it by ingress/egress/skb->sk->sk_clockid. */ }; -- cgit v1.2.3 From 2c1713a8f1c94033a6e00aae4693ab03e8a3b9f1 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 18 May 2024 16:58:47 +0200 Subject: bpf: constify member bpf_sysctl_kern:: Table MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sysctl core is preparing to only expose instances of struct ctl_table as "const". This will also affect the ctl_table argument of sysctl handlers, for which bpf_sysctl_kern::table is also used. As the function prototype of all sysctl handlers throughout the tree needs to stay consistent that change will be done in one commit. To reduce the size of that final commit, switch this utility type which is not bound by "typedef proc_handler" to "const struct ctl_table". No functional change. Signed-off-by: Thomas Weißschuh Signed-off-by: Daniel Borkmann Reviewed-by: Joel Granados Link: https://lore.kernel.org/bpf/20240518-sysctl-const-handler-bpf-v1-1-f0d7186743c1@weissschuh.net --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0f12cf01070e..b02aea291b7e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1406,7 +1406,7 @@ struct bpf_sock_ops_kern { struct bpf_sysctl_kern { struct ctl_table_header *head; - struct ctl_table *table; + const struct ctl_table *table; void *cur_val; size_t cur_len; void *new_val; -- cgit v1.2.3 From 73e75e6fc352bdca08f7e0893d5b6bb37171bdd2 Mon Sep 17 00:00:00 2001 From: Michal Koutný Date: Tue, 21 May 2024 11:21:26 +0200 Subject: cgroup/pids: Separate semantics of pids.events related to pids.max MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, when pids.max limit is breached in the hierarchy, the event is counted and reported in the cgroup where the forking task resides. This decouples the limit and the notification caused by the limit making it hard to detect when the actual limit was effected. Redefine the pids.events:max as: the number of times the limit of the cgroup was hit. (Implementation differentiates also "forkfail" event but this is currently not exposed as it would better fit into pids.stat. It also differs from pids.events:max only when pids.max is configured on non-leaf cgroups.) Since it changes semantics of the original "max" event, introduce this change only in the v2 API of the controller and add a cgroup2 mount option to revert to the legacy behavior. Signed-off-by: Michal Koutný Signed-off-by: Tejun Heo --- Documentation/admin-guide/cgroup-v1/pids.rst | 3 +- Documentation/admin-guide/cgroup-v2.rst | 13 +++++--- include/linux/cgroup-defs.h | 7 ++++- kernel/cgroup/cgroup.c | 15 +++++++++- kernel/cgroup/pids.c | 44 +++++++++++++++++++++------- 5 files changed, 64 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/cgroup-v1/pids.rst b/Documentation/admin-guide/cgroup-v1/pids.rst index 6acebd9e72c8..0f9f9a7b1f6c 100644 --- a/Documentation/admin-guide/cgroup-v1/pids.rst +++ b/Documentation/admin-guide/cgroup-v1/pids.rst @@ -36,7 +36,8 @@ superset of parent/child/pids.current. The pids.events file contains event counters: - - max: Number of times fork failed because limit was hit. + - max: Number of times fork failed in the cgroup because limit was hit in + self or ancestors. Example ------- diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 8fbb0519d556..dfeb51c994e6 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -239,6 +239,10 @@ cgroup v2 currently supports the following mount options. will not be tracked by the memory controller (even if cgroup v2 is remounted later on). + pids_localevents + Represent fork failures inside cgroup's pids.events:max (v1 behavior), + not its limit being hit (v2 behavior). + Organizing Processes and Threads -------------------------------- @@ -2205,12 +2209,13 @@ PID Interface Files descendants has ever reached. pids.events - A read-only flat-keyed file which exists on non-root cgroups. The - following entries are defined. Unless specified otherwise, a value - change in this file generates a file modified event. + A read-only flat-keyed file which exists on non-root cgroups. Unless + specified otherwise, a value change in this file generates a file + modified event. The following entries are defined. max - Number of times fork failed because limit was hit. + The number of times the cgroup's number of processes hit the + limit (see also pids_localevents). Organisational operations are not blocked by cgroup policies, so it is possible to have pids.current > pids.max. This can be done by either diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ea48c861cd36..b36690ca0d3f 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -119,7 +119,12 @@ enum { /* * Enable hugetlb accounting for the memory controller. */ - CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), + CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), + + /* + * Enable legacy local pids.events. + */ + CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20), }; /* cftype->flags */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e32b6972c478..9c9943ea5f89 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1922,6 +1922,7 @@ enum cgroup2_param { Opt_memory_localevents, Opt_memory_recursiveprot, Opt_memory_hugetlb_accounting, + Opt_pids_localevents, nr__cgroup2_params }; @@ -1931,6 +1932,7 @@ static const struct fs_parameter_spec cgroup2_fs_parameters[] = { fsparam_flag("memory_localevents", Opt_memory_localevents), fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot), fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting), + fsparam_flag("pids_localevents", Opt_pids_localevents), {} }; @@ -1960,6 +1962,9 @@ static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param case Opt_memory_hugetlb_accounting: ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING; return 0; + case Opt_pids_localevents: + ctx->flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS; + return 0; } return -EINVAL; } @@ -1989,6 +1994,11 @@ static void apply_cgroup_root_flags(unsigned int root_flags) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING; else cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING; + + if (root_flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) + cgrp_dfl_root.flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS; + else + cgrp_dfl_root.flags &= ~CGRP_ROOT_PIDS_LOCAL_EVENTS; } } @@ -2004,6 +2014,8 @@ static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root seq_puts(seq, ",memory_recursiveprot"); if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING) seq_puts(seq, ",memory_hugetlb_accounting"); + if (cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) + seq_puts(seq, ",pids_localevents"); return 0; } @@ -7062,7 +7074,8 @@ static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr, "favordynmods\n" "memory_localevents\n" "memory_recursiveprot\n" - "memory_hugetlb_accounting\n"); + "memory_hugetlb_accounting\n" + "pids_localevents\n"); } static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features); diff --git a/kernel/cgroup/pids.c b/kernel/cgroup/pids.c index 0e5ec7d59b4d..a557f5c8300b 100644 --- a/kernel/cgroup/pids.c +++ b/kernel/cgroup/pids.c @@ -38,6 +38,14 @@ #define PIDS_MAX (PID_MAX_LIMIT + 1ULL) #define PIDS_MAX_STR "max" +enum pidcg_event { + /* Fork failed in subtree because this pids_cgroup limit was hit. */ + PIDCG_MAX, + /* Fork failed in this pids_cgroup because ancestor limit was hit. */ + PIDCG_FORKFAIL, + NR_PIDCG_EVENTS, +}; + struct pids_cgroup { struct cgroup_subsys_state css; @@ -52,8 +60,7 @@ struct pids_cgroup { /* Handle for "pids.events" */ struct cgroup_file events_file; - /* Number of times fork failed because limit was hit. */ - atomic64_t events_limit; + atomic64_t events[NR_PIDCG_EVENTS]; }; static struct pids_cgroup *css_pids(struct cgroup_subsys_state *css) @@ -148,12 +155,13 @@ static void pids_charge(struct pids_cgroup *pids, int num) * pids_try_charge - hierarchically try to charge the pid count * @pids: the pid cgroup state * @num: the number of pids to charge + * @fail: storage of pid cgroup causing the fail * * This function follows the set limit. It will fail if the charge would cause * the new value to exceed the hierarchical limit. Returns 0 if the charge * succeeded, otherwise -EAGAIN. */ -static int pids_try_charge(struct pids_cgroup *pids, int num) +static int pids_try_charge(struct pids_cgroup *pids, int num, struct pids_cgroup **fail) { struct pids_cgroup *p, *q; @@ -166,9 +174,10 @@ static int pids_try_charge(struct pids_cgroup *pids, int num) * p->limit is %PIDS_MAX then we know that this test will never * fail. */ - if (new > limit) + if (new > limit) { + *fail = p; goto revert; - + } /* * Not technically accurate if we go over limit somewhere up * the hierarchy, but that's tolerable for the watermark. @@ -236,7 +245,7 @@ static void pids_cancel_attach(struct cgroup_taskset *tset) static int pids_can_fork(struct task_struct *task, struct css_set *cset) { struct cgroup_subsys_state *css; - struct pids_cgroup *pids; + struct pids_cgroup *pids, *pids_over_limit; int err; if (cset) @@ -244,15 +253,23 @@ static int pids_can_fork(struct task_struct *task, struct css_set *cset) else css = task_css_check(current, pids_cgrp_id, true); pids = css_pids(css); - err = pids_try_charge(pids, 1); + err = pids_try_charge(pids, 1, &pids_over_limit); if (err) { - /* Only log the first time events_limit is incremented. */ - if (atomic64_inc_return(&pids->events_limit) == 1) { + /* compatibility on v1 where events were notified in leaves. */ + if (!cgroup_subsys_on_dfl(pids_cgrp_subsys)) + pids_over_limit = pids; + + /* Only log the first time limit is hit. */ + if (atomic64_inc_return(&pids->events[PIDCG_FORKFAIL]) == 1) { pr_info("cgroup: fork rejected by pids controller in "); - pr_cont_cgroup_path(css->cgroup); + pr_cont_cgroup_path(pids->css.cgroup); pr_cont("\n"); } + atomic64_inc(&pids_over_limit->events[PIDCG_MAX]); + cgroup_file_notify(&pids->events_file); + if (pids_over_limit != pids) + cgroup_file_notify(&pids_over_limit->events_file); } return err; } @@ -340,8 +357,13 @@ static s64 pids_peak_read(struct cgroup_subsys_state *css, static int pids_events_show(struct seq_file *sf, void *v) { struct pids_cgroup *pids = css_pids(seq_css(sf)); + enum pidcg_event pe = PIDCG_MAX; + + if (!cgroup_subsys_on_dfl(pids_cgrp_subsys) || + cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) + pe = PIDCG_FORKFAIL; - seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events_limit)); + seq_printf(sf, "max %lld\n", (s64)atomic64_read(&pids->events[pe])); return 0; } -- cgit v1.2.3 From 965cc040bf0698e81b0c0aef359ae650b42b428e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 20 May 2024 16:19:56 +0200 Subject: ASoC: Constify channel mapping array arguments in set_channel_map() There is no need for implementations of DAI set_channel_map() to modify contents of passed arrays with actual channel mapping. Additionally, the caller keeps full ownership of the array. Constify these pointer arguments so the code will be safer and easier to read (documenting the caller's ownership). Acked-by: Charles Keepax Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240520-asoc-x1e80100-4-channel-mapping-v4-1-f657159b4aad@linaro.org Signed-off-by: Mark Brown --- include/sound/cs35l41.h | 4 ++-- include/sound/soc-dai.h | 8 ++++---- sound/soc/codecs/adau7118.c | 6 ++++-- sound/soc/codecs/cs35l41-lib.c | 4 ++-- sound/soc/codecs/cs35l41.c | 3 ++- sound/soc/codecs/max98504.c | 6 ++++-- sound/soc/codecs/wcd9335.c | 6 ++++-- sound/soc/codecs/wcd934x.c | 6 ++++-- sound/soc/qcom/qdsp6/q6afe-dai.c | 16 ++++++++++------ sound/soc/qcom/qdsp6/q6apm-lpass-dais.c | 6 ++++-- sound/soc/soc-dai.c | 4 ++-- 11 files changed, 42 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/sound/cs35l41.h b/include/sound/cs35l41.h index bb70782d15d0..43c6a9ef8d9f 100644 --- a/include/sound/cs35l41.h +++ b/include/sound/cs35l41.h @@ -896,8 +896,8 @@ int cs35l41_test_key_lock(struct device *dev, struct regmap *regmap); int cs35l41_otp_unpack(struct device *dev, struct regmap *regmap); int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsigned int reg_revid); int cs35l41_set_channels(struct device *dev, struct regmap *reg, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot); + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot); int cs35l41_gpio_config(struct regmap *regmap, struct cs35l41_hw_cfg *hw_cfg); void cs35l41_configure_cs_dsp(struct device *dev, struct regmap *reg, struct cs_dsp *dsp); int cs35l41_set_cspl_mbox_cmd(struct device *dev, struct regmap *regmap, diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index adcd8719d343..15ef268c9845 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -188,8 +188,8 @@ int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width); int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot); + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot); int snd_soc_dai_set_tristate(struct snd_soc_dai *dai, int tristate); @@ -305,8 +305,8 @@ struct snd_soc_dai_ops { unsigned int tx_mask, unsigned int rx_mask, int slots, int slot_width); int (*set_channel_map)(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot); + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot); int (*get_channel_map)(struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); diff --git a/sound/soc/codecs/adau7118.c b/sound/soc/codecs/adau7118.c index a663d37e5776..abc4764697a5 100644 --- a/sound/soc/codecs/adau7118.c +++ b/sound/soc/codecs/adau7118.c @@ -121,8 +121,10 @@ static const struct snd_soc_dapm_widget adau7118_widgets[] = { }; static int adau7118_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, + const unsigned int *tx_slot, + unsigned int rx_num, + const unsigned int *rx_slot) { struct adau7118_data *st = snd_soc_component_get_drvdata(dai->component); diff --git a/sound/soc/codecs/cs35l41-lib.c b/sound/soc/codecs/cs35l41-lib.c index e9993a39f7d0..1702f26049d3 100644 --- a/sound/soc/codecs/cs35l41-lib.c +++ b/sound/soc/codecs/cs35l41-lib.c @@ -936,8 +936,8 @@ int cs35l41_register_errata_patch(struct device *dev, struct regmap *reg, unsign EXPORT_SYMBOL_GPL(cs35l41_register_errata_patch); int cs35l41_set_channels(struct device *dev, struct regmap *reg, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot) { unsigned int val, mask; int i; diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c index cb25c33cc9b9..1688c2c688f0 100644 --- a/sound/soc/codecs/cs35l41.c +++ b/sound/soc/codecs/cs35l41.c @@ -673,7 +673,8 @@ static const struct snd_soc_dapm_route cs35l41_audio_map[] = { }; static int cs35l41_set_channel_map(struct snd_soc_dai *dai, unsigned int tx_n, - unsigned int *tx_slot, unsigned int rx_n, unsigned int *rx_slot) + const unsigned int *tx_slot, + unsigned int rx_n, const unsigned int *rx_slot) { struct cs35l41_private *cs35l41 = snd_soc_component_get_drvdata(dai->component); diff --git a/sound/soc/codecs/max98504.c b/sound/soc/codecs/max98504.c index 93412b966b33..6b6a7ece4cec 100644 --- a/sound/soc/codecs/max98504.c +++ b/sound/soc/codecs/max98504.c @@ -220,8 +220,10 @@ static int max98504_set_tdm_slot(struct snd_soc_dai *dai, return 0; } static int max98504_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, + const unsigned int *tx_slot, + unsigned int rx_num, + const unsigned int *rx_slot) { struct max98504_priv *max98504 = snd_soc_dai_get_drvdata(dai); struct regmap *map = max98504->regmap; diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index deb15b95992d..42a99978fe5a 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -1983,8 +1983,10 @@ static int wcd9335_trigger(struct snd_pcm_substream *substream, int cmd, } static int wcd9335_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, + const unsigned int *tx_slot, + unsigned int rx_num, + const unsigned int *rx_slot) { struct wcd9335_codec *wcd; int i; diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index de870c7819ca..fcad2c9fba55 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1923,8 +1923,10 @@ static int wcd934x_trigger(struct snd_pcm_substream *substream, int cmd, } static int wcd934x_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, + const unsigned int *tx_slot, + unsigned int rx_num, + const unsigned int *rx_slot) { struct wcd934x_codec *wcd; int i; diff --git a/sound/soc/qcom/qdsp6/q6afe-dai.c b/sound/soc/qcom/qdsp6/q6afe-dai.c index a9c4f896a7df..7d9628cda875 100644 --- a/sound/soc/qcom/qdsp6/q6afe-dai.c +++ b/sound/soc/qcom/qdsp6/q6afe-dai.c @@ -172,8 +172,8 @@ static int q6tdm_set_tdm_slot(struct snd_soc_dai *dai, } static int q6tdm_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot) { struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev); @@ -250,8 +250,10 @@ static int q6tdm_hw_params(struct snd_pcm_substream *substream, } static int q6dma_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_ch_mask, - unsigned int rx_num, unsigned int *rx_ch_mask) + unsigned int tx_num, + const unsigned int *tx_ch_mask, + unsigned int rx_num, + const unsigned int *rx_ch_mask) { struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev); @@ -407,8 +409,10 @@ static int q6afe_dai_prepare(struct snd_pcm_substream *substream, } static int q6slim_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, + const unsigned int *tx_slot, + unsigned int rx_num, + const unsigned int *rx_slot) { struct q6afe_dai_data *dai_data = dev_get_drvdata(dai->dev); struct q6afe_port_config *pcfg = &dai_data->port_config[dai->id]; diff --git a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c index 68a38f63a2db..6bfbb52345e1 100644 --- a/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c +++ b/sound/soc/qcom/qdsp6/q6apm-lpass-dais.c @@ -25,8 +25,10 @@ struct q6apm_lpass_dai_data { }; static int q6dma_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_ch_mask, - unsigned int rx_num, unsigned int *rx_ch_mask) + unsigned int tx_num, + const unsigned int *tx_ch_mask, + unsigned int rx_num, + const unsigned int *rx_ch_mask) { struct q6apm_lpass_dai_data *dai_data = dev_get_drvdata(dai->dev); diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index fefe394dce72..03afd5efb24c 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -304,8 +304,8 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_tdm_slot); * configure the relationship between channel number and TDM slot number. */ int snd_soc_dai_set_channel_map(struct snd_soc_dai *dai, - unsigned int tx_num, unsigned int *tx_slot, - unsigned int rx_num, unsigned int *rx_slot) + unsigned int tx_num, const unsigned int *tx_slot, + unsigned int rx_num, const unsigned int *rx_slot) { int ret = -ENOTSUPP; -- cgit v1.2.3 From 734447685ecc7c6328e40cb1bd4aaeeac03c1413 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Mon, 13 May 2024 19:37:20 +0200 Subject: ASoC: topology: Constify an argument of snd_soc_tplg_component_load() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit snd_soc_tplg_component_load() does not modify its "*ops" argument. It only read some values and stores it in "soc_tplg.ops". This argument and the ops field in "struct soc_tplg" can be made const. Signed-off-by: Christophe JAILLET Reviewed-by: Amadeusz Sławiński Link: https://msgid.link/r/f2f983e791d7f941a95556bb147f426a345d84d4.1715526069.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/sound/soc-topology.h | 2 +- sound/soc/soc-topology.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/soc-topology.h b/include/sound/soc-topology.h index f055c6917f6c..1eedd203ac29 100644 --- a/include/sound/soc-topology.h +++ b/include/sound/soc-topology.h @@ -178,7 +178,7 @@ static inline const void *snd_soc_tplg_get_data(struct snd_soc_tplg_hdr *hdr) /* Dynamic Object loading and removal for component drivers */ int snd_soc_tplg_component_load(struct snd_soc_component *comp, - struct snd_soc_tplg_ops *ops, const struct firmware *fw); + const struct snd_soc_tplg_ops *ops, const struct firmware *fw); int snd_soc_tplg_component_remove(struct snd_soc_component *comp); /* Binds event handlers to dynamic widgets */ diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 90ca37e008b3..b00ec01361c2 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -73,7 +73,7 @@ struct soc_tplg { int bytes_ext_ops_count; /* optional fw loading callbacks to component drivers */ - struct snd_soc_tplg_ops *ops; + const struct snd_soc_tplg_ops *ops; }; /* check we dont overflow the data for this control chunk */ @@ -2334,7 +2334,7 @@ static int soc_tplg_load(struct soc_tplg *tplg) /* load audio component topology from "firmware" file */ int snd_soc_tplg_component_load(struct snd_soc_component *comp, - struct snd_soc_tplg_ops *ops, const struct firmware *fw) + const struct snd_soc_tplg_ops *ops, const struct firmware *fw) { struct soc_tplg tplg; int ret; -- cgit v1.2.3 From 7a147670035ddec35f3fb2ace538ad56ad82c861 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 12 May 2024 13:11:21 +0200 Subject: regulator: consumer: Reorder fields in 'struct regulator_bulk_data' Based on pahole, 2 holes can be combined in 'struct regulator_bulk_data'. On x86_64 and allmodconfig, this shrinks the size of the structure from 32 to 24 bytes. This is usually a win, because this structure is often used for static global variables. As an example: Before: text data bss dec hex filename 3557 162 0 3719 e87 drivers/gpu/drm/msm/dsi/dsi_cfg.o After: text data bss dec hex filename 3477 162 0 3639 e37 drivers/gpu/drm/msm/dsi/dsi_cfg.o Signed-off-by: Christophe JAILLET Link: https://msgid.link/r/35c4edf2dbc6d4f24fb771341ded2989ae32f779.1715512259.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index 59d0b9a79e6e..e6f81fc1fb17 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -128,11 +128,11 @@ struct regulator; * * @supply: The name of the supply. Initialised by the user before * using the bulk regulator APIs. + * @consumer: The regulator consumer for the supply. This will be managed + * by the bulk API. * @init_load_uA: After getting the regulator, regulator_set_load() will be * called with this load. Initialised by the user before * using the bulk regulator APIs. - * @consumer: The regulator consumer for the supply. This will be managed - * by the bulk API. * * The regulator APIs provide a series of regulator_bulk_() API calls as * a convenience to consumers which require multiple supplies. This @@ -140,8 +140,8 @@ struct regulator; */ struct regulator_bulk_data { const char *supply; - int init_load_uA; struct regulator *consumer; + int init_load_uA; /* private: Internal use */ int ret; -- cgit v1.2.3 From f261172d39f358dcecce13c310690d3937e0cca6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 May 2024 22:40:20 +0300 Subject: spi: bitbang: Use typedef for txrx_*() callbacks With a typedef for the txrx_*() callbacks the code looks neater. Note that typedef for a function is okay to have. Signed-off-by: Andy Shevchenko Link: https://msgid.link/r/20240517194104.747328-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-bitbang.c | 31 +++++++++---------------------- include/linux/spi/spi_bitbang.h | 7 ++++--- 2 files changed, 13 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/spi/spi-bitbang.c b/drivers/spi/spi-bitbang.c index ca5cc67555c5..d88110acdc5f 100644 --- a/drivers/spi/spi-bitbang.c +++ b/drivers/spi/spi-bitbang.c @@ -38,26 +38,19 @@ * working quickly, or testing for differences that aren't speed related. */ +typedef unsigned int (*spi_bb_txrx_bufs_fn)(struct spi_device *, spi_bb_txrx_word_fn, + unsigned int, struct spi_transfer *, + unsigned int); + struct spi_bitbang_cs { unsigned nsecs; /* (clock cycle time)/2 */ - u32 (*txrx_word)(struct spi_device *spi, unsigned nsecs, - u32 word, u8 bits, unsigned flags); - unsigned (*txrx_bufs)(struct spi_device *, - u32 (*txrx_word)( - struct spi_device *spi, - unsigned nsecs, - u32 word, u8 bits, - unsigned flags), - unsigned, struct spi_transfer *, - unsigned); + spi_bb_txrx_word_fn txrx_word; + spi_bb_txrx_bufs_fn txrx_bufs; }; static unsigned bitbang_txrx_8( struct spi_device *spi, - u32 (*txrx_word)(struct spi_device *spi, - unsigned nsecs, - u32 word, u8 bits, - unsigned flags), + spi_bb_txrx_word_fn txrx_word, unsigned ns, struct spi_transfer *t, unsigned flags @@ -83,10 +76,7 @@ static unsigned bitbang_txrx_8( static unsigned bitbang_txrx_16( struct spi_device *spi, - u32 (*txrx_word)(struct spi_device *spi, - unsigned nsecs, - u32 word, u8 bits, - unsigned flags), + spi_bb_txrx_word_fn txrx_word, unsigned ns, struct spi_transfer *t, unsigned flags @@ -112,10 +102,7 @@ static unsigned bitbang_txrx_16( static unsigned bitbang_txrx_32( struct spi_device *spi, - u32 (*txrx_word)(struct spi_device *spi, - unsigned nsecs, - u32 word, u8 bits, - unsigned flags), + spi_bb_txrx_word_fn txrx_word, unsigned ns, struct spi_transfer *t, unsigned flags diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index b930eca2ef7b..7ca08b430ed5 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -4,6 +4,8 @@ #include +typedef u32 (*spi_bb_txrx_word_fn)(struct spi_device *, unsigned int, u32, u8, unsigned int); + struct spi_bitbang { struct mutex lock; u8 busy; @@ -28,9 +30,8 @@ struct spi_bitbang { int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); /* txrx_word[SPI_MODE_*]() just looks like a shift register */ - u32 (*txrx_word[4])(struct spi_device *spi, - unsigned nsecs, - u32 word, u8 bits, unsigned flags); + spi_bb_txrx_word_fn txrx_word[4]; + int (*set_line_direction)(struct spi_device *spi, bool output); }; -- cgit v1.2.3 From b90cc232e2ce8c959b19dc4b183e23e7aec137ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 May 2024 22:40:22 +0300 Subject: spi: bitbang: Replace hard coded number of SPI modes Instead of using hard coded number of modes, replace it with SPI_MODE_X_MASK + 1 to show relation to the SPI modes. Signed-off-by: Andy Shevchenko Link: https://msgid.link/r/20240517194104.747328-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 7ca08b430ed5..d4cb83195f7a 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -30,7 +30,7 @@ struct spi_bitbang { int (*txrx_bufs)(struct spi_device *spi, struct spi_transfer *t); /* txrx_word[SPI_MODE_*]() just looks like a shift register */ - spi_bb_txrx_word_fn txrx_word[4]; + spi_bb_txrx_word_fn txrx_word[SPI_MODE_X_MASK + 1]; int (*set_line_direction)(struct spi_device *spi, bool output); }; -- cgit v1.2.3 From 983095eaf6c161ef73d96152bfc1a99ca051cd57 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 25 May 2024 18:00:31 +0200 Subject: dma-buf/fence-array: Add flex array to struct dma_fence_array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an effort to get rid of all multiplications from allocation functions in order to prevent integer overflows [1][2]. The "struct dma_fence_array" can be refactored to add a flex array in order to have the "callback structures allocated behind the array" be more explicit. Do so: - makes the code more readable and safer. - allows using __counted_by() for additional checks - avoids some pointer arithmetic in dma_fence_array_enable_signaling() Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#open-coded-arithmetic-in-allocator-arguments [1] Link: https://github.com/KSPP/linux/issues/160 [2] Signed-off-by: Christophe JAILLET Reviewed-by: Kees Cook Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/8b4e556e07b5dd78bb8a39b67ea0a43b199083c8.1716652811.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christian König --- drivers/dma-buf/dma-fence-array.c | 10 ++++------ include/linux/dma-fence-array.h | 3 +++ 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index 9b3ce8948351..c74ac197d5fe 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -70,7 +70,7 @@ static void dma_fence_array_cb_func(struct dma_fence *f, static bool dma_fence_array_enable_signaling(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - struct dma_fence_array_cb *cb = (void *)(&array[1]); + struct dma_fence_array_cb *cb = array->callbacks; unsigned i; for (i = 0; i < array->num_fences; ++i) { @@ -168,22 +168,20 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, bool signal_on_any) { struct dma_fence_array *array; - size_t size = sizeof(*array); WARN_ON(!num_fences || !fences); - /* Allocate the callback structures behind the array. */ - size += num_fences * sizeof(struct dma_fence_array_cb); - array = kzalloc(size, GFP_KERNEL); + array = kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); if (!array) return NULL; + array->num_fences = num_fences; + spin_lock_init(&array->lock); dma_fence_init(&array->base, &dma_fence_array_ops, &array->lock, context, seqno); init_irq_work(&array->work, irq_dma_fence_array_work); - array->num_fences = num_fences; atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences); array->fences = fences; diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index ec7f25def392..29c5650c1038 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -33,6 +33,7 @@ struct dma_fence_array_cb { * @num_pending: fences in the array still pending * @fences: array of the fences * @work: internal irq_work function + * @callbacks: array of callback helpers */ struct dma_fence_array { struct dma_fence base; @@ -43,6 +44,8 @@ struct dma_fence_array { struct dma_fence **fences; struct irq_work work; + + struct dma_fence_array_cb callbacks[] __counted_by(num_fences); }; /** -- cgit v1.2.3 From 0edb555a65d1ef047a9805051c36922b52a38a9d Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Mon, 9 Oct 2023 12:37:26 +0200 Subject: platform: Make platform_driver::remove() return void MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct platform_driver::remove returning an integer made driver authors expect that returning an error code was proper error handling. However the driver core ignores the error and continues to remove the device because there is nothing the core could do anyhow and reentering the remove callback again is only calling for trouble. To prevent such wrong assumptions, change the return type of the remove callback to void. This was prepared by introducing an alternative remove callback returning void and converting all drivers to that. So .remove() can be changed without further changes in drivers. This corresponds to step b) of the plan outlined in commit 5c5a7680e67b ("platform: Provide a remove callback that returns no value"). Signed-off-by: Uwe Kleine-König --- drivers/base/platform.c | 10 ++-------- include/linux/platform_device.h | 15 +++++++-------- 2 files changed, 9 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 10c577963418..c8aa1be70526 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -1420,14 +1420,8 @@ static void platform_remove(struct device *_dev) struct platform_driver *drv = to_platform_driver(_dev->driver); struct platform_device *dev = to_platform_device(_dev); - if (drv->remove_new) { - drv->remove_new(dev); - } else if (drv->remove) { - int ret = drv->remove(dev); - - if (ret) - dev_warn(_dev, "remove callback returned a non-zero value. This will be ignored.\n"); - } + if (drv->remove) + drv->remove(dev); dev_pm_domain_detach(_dev, true); } diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 7a41c72c1959..d422db6eec63 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -237,15 +237,14 @@ struct platform_driver { int (*probe)(struct platform_device *); /* - * Traditionally the remove callback returned an int which however is - * ignored by the driver core. This led to wrong expectations by driver - * authors who thought returning an error code was a valid error - * handling strategy. To convert to a callback returning void, new - * drivers should implement .remove_new() until the conversion it done - * that eventually makes .remove() return void. + * .remove_new() is a relic from a prototype conversion of .remove(). + * New drivers are supposed to implement .remove(). Once all drivers are + * converted to not use .remove_new any more, it will be dropped. */ - int (*remove)(struct platform_device *); - void (*remove_new)(struct platform_device *); + union { + void (*remove)(struct platform_device *); + void (*remove_new)(struct platform_device *); + }; void (*shutdown)(struct platform_device *); int (*suspend)(struct platform_device *, pm_message_t state); -- cgit v1.2.3 From c80c4490c280a1678e47d34d2a335a58f1318615 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 May 2024 12:45:31 +0000 Subject: cleanup: Standardize the header guard define's name At some point during early development, the header must have been named , as evidenced by the header guard name: #ifndef __LINUX_GUARDS_H #define __LINUX_GUARDS_H It ended up being , but the old guard name for a file name that was never upstream never changed. Do that now - and while at it, also use the canonical _LINUX prefix, instead of the less common __LINUX prefix. Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Link: https://lore.kernel.org/r/171664113181.10875.8784434350512348496.tip-bot2@tip-bot2 --- include/linux/cleanup.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index c2d09bc4f976..cef68e8e09b6 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __LINUX_GUARDS_H -#define __LINUX_GUARDS_H +#ifndef _LINUX_CLEANUP_H +#define _LINUX_CLEANUP_H #include @@ -247,4 +247,4 @@ __DEFINE_LOCK_GUARD_0(_name, _lock) { return class_##_name##_lock_ptr(_T); } -#endif /* __LINUX_GUARDS_H */ +#endif /* _LINUX_CLEANUP_H */ -- cgit v1.2.3 From 5350f6ec55df381cd99db2a6bde283328fb3f75a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 3 May 2024 21:42:30 +0300 Subject: mtd: cfi: Get rid of redundant 'else' In the snippets like the following if (...) return / goto / break / continue ...; else ... the 'else' is redundant. Get rid of it. Signed-off-by: Andy Shevchenko Reviewed-by: Jeff Johnson Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20240503184230.2927283-1-andriy.shevchenko@linux.intel.com --- include/linux/mtd/cfi.h | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index 947410faf9e2..35ca19ae21ae 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -308,32 +308,32 @@ static inline uint8_t cfi_read_query(struct map_info *map, uint32_t addr) { map_word val = map_read(map, addr); - if (map_bankwidth_is_1(map)) { + if (map_bankwidth_is_1(map)) return val.x[0]; - } else if (map_bankwidth_is_2(map)) { + if (map_bankwidth_is_2(map)) return cfi16_to_cpu(map, val.x[0]); - } else { - /* No point in a 64-bit byteswap since that would just be - swapping the responses from different chips, and we are - only interested in one chip (a representative sample) */ - return cfi32_to_cpu(map, val.x[0]); - } + /* + * No point in a 64-bit byteswap since that would just be + * swapping the responses from different chips, and we are + * only interested in one chip (a representative sample) + */ + return cfi32_to_cpu(map, val.x[0]); } static inline uint16_t cfi_read_query16(struct map_info *map, uint32_t addr) { map_word val = map_read(map, addr); - if (map_bankwidth_is_1(map)) { + if (map_bankwidth_is_1(map)) return val.x[0] & 0xff; - } else if (map_bankwidth_is_2(map)) { + if (map_bankwidth_is_2(map)) return cfi16_to_cpu(map, val.x[0]); - } else { - /* No point in a 64-bit byteswap since that would just be - swapping the responses from different chips, and we are - only interested in one chip (a representative sample) */ - return cfi32_to_cpu(map, val.x[0]); - } + /* + * No point in a 64-bit byteswap since that would just be + * swapping the responses from different chips, and we are + * only interested in one chip (a representative sample) + */ + return cfi32_to_cpu(map, val.x[0]); } void cfi_udelay(int us); -- cgit v1.2.3 From 56813b244e5f2ace887f04c4a69a2a0041992297 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Mon, 27 May 2024 11:27:47 +0200 Subject: w1: Add missing newline and fix typos in w1_bus_master comment - Add missing newline before @return - s/bytes/byte/ - s/handles/handle/ - s/exists/exist/ in dev_info() message Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20240527092746.263038-2-thorsten.blum@toblux.com [krzysztof: squash "w1: Fix typo in dev_info() message"] Signed-off-by: Krzysztof Kozlowski --- drivers/w1/w1.c | 2 +- include/linux/w1.h | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c index afb1cc4606c5..d82e86d3ddf6 100644 --- a/drivers/w1/w1.c +++ b/drivers/w1/w1.c @@ -504,7 +504,7 @@ static ssize_t w1_master_attribute_store_remove(struct device *dev, if (result == 0) result = count; } else { - dev_info(dev, "Device %02x-%012llx doesn't exists\n", rn.family, + dev_info(dev, "Device %02x-%012llx doesn't exist\n", rn.family, (unsigned long long)rn.id); result = -EINVAL; } diff --git a/include/linux/w1.h b/include/linux/w1.h index 9a2a0ef39018..064805bfae3f 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -85,7 +85,8 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64); * * @data: the first parameter in all the functions below * - * @read_bit: Sample the line level @return the level read (0 or 1) + * @read_bit: Sample the line level + * @return the level read (0 or 1) * * @write_bit: Sets the line level * @@ -95,7 +96,7 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64); * touch_bit(1) = write-1 / read cycle * @return the bit read (0 or 1) * - * @read_byte: Reads a bytes. Same as 8 touch_bit(1) calls. + * @read_byte: Reads a byte. Same as 8 touch_bit(1) calls. * @return the byte read * * @write_byte: Writes a byte. Same as 8 touch_bit(x) calls. @@ -114,7 +115,7 @@ typedef void (*w1_slave_found_callback)(struct w1_master *, u64); * @set_pullup: Put out a strong pull-up pulse of the specified duration. * @return -1=Error, 0=completed * - * @search: Really nice hardware can handles the different types of ROM search + * @search: Really nice hardware can handle the different types of ROM search * w1_master* is passed to the slave found callback. * u8 is search_type, W1_SEARCH or W1_ALARM_SEARCH * -- cgit v1.2.3 From 8cb3aeebcb86088e30232277c9bee9054837442b Mon Sep 17 00:00:00 2001 From: Mohan Kumar Date: Mon, 27 May 2024 12:56:06 +0000 Subject: ASoC: simple-card-utils: Split simple_fixup_sample_fmt func Split the simple_fixup_sample_fmt() into two functions by adding one more function named simple_util_get_sample_fmt() to return the sample format value. This is useful for drivers that wish to simply get the sample format without setting the mask. Signed-off-by: Mohan Kumar Signed-off-by: Sameer Pujar Link: https://msgid.link/r/20240527125608.2461300-2-spujar@nvidia.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 2 ++ sound/soc/generic/simple-card-utils.c | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index ad67957b7b48..2c2279d082ec 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -174,6 +174,8 @@ void simple_util_parse_convert(struct device_node *np, char *prefix, struct simple_util_data *data); bool simple_util_is_convert_required(const struct simple_util_data *data); +int simple_util_get_sample_fmt(struct simple_util_data *data); + int simple_util_parse_routing(struct snd_soc_card *card, char *prefix); int simple_util_parse_widgets(struct snd_soc_card *card, diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index b4876b4f259d..852989611362 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -13,12 +13,11 @@ #include #include -static void simple_fixup_sample_fmt(struct simple_util_data *data, - struct snd_pcm_hw_params *params) +int simple_util_get_sample_fmt(struct simple_util_data *data) { int i; - struct snd_mask *mask = hw_param_mask(params, - SNDRV_PCM_HW_PARAM_FORMAT); + int val = -EINVAL; + struct { char *fmt; u32 val; @@ -33,11 +32,26 @@ static void simple_fixup_sample_fmt(struct simple_util_data *data, for (i = 0; i < ARRAY_SIZE(of_sample_fmt_table); i++) { if (!strcmp(data->convert_sample_format, of_sample_fmt_table[i].fmt)) { - snd_mask_none(mask); - snd_mask_set(mask, of_sample_fmt_table[i].val); + val = of_sample_fmt_table[i].val; break; } } + return val; +} +EXPORT_SYMBOL_GPL(simple_util_get_sample_fmt); + +static void simple_fixup_sample_fmt(struct simple_util_data *data, + struct snd_pcm_hw_params *params) +{ + int val; + struct snd_mask *mask = hw_param_mask(params, + SNDRV_PCM_HW_PARAM_FORMAT); + + val = simple_util_get_sample_fmt(data); + if (val >= 0) { + snd_mask_none(mask); + snd_mask_set(mask, val); + } } void simple_util_parse_convert(struct device_node *np, -- cgit v1.2.3 From 3aa63a569c64e708df547a8913c84e64a06e7853 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 17 May 2024 20:08:40 -0400 Subject: fs: switch timespec64 fields in inode to discrete integers Adjacent struct timespec64's pack poorly. Switch them to discrete integer fields for the seconds and nanoseconds. This shaves 8 bytes off struct inode with a garden-variety Fedora Kconfig on x86_64, but that also moves the i_lock into the previous cacheline, away from the fields it protects. To remedy that, move i_generation above the i_lock, which moves the new 4-byte hole to just after the i_fsnotify_mask in my setup. Amir has plans to use that to expand the i_fsnotify_mask, so add a comment to that effect as well. Cc: Amir Goldstein Suggested-by: Linus Torvalds Signed-off-by: Jeff Layton Link: https://lore.kernel.org/r/20240517-amtime-v1-1-7b804ca4be8f@kernel.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- include/linux/fs.h | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..639885621608 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -660,9 +660,13 @@ struct inode { }; dev_t i_rdev; loff_t i_size; - struct timespec64 __i_atime; - struct timespec64 __i_mtime; - struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */ + time64_t i_atime_sec; + time64_t i_mtime_sec; + time64_t i_ctime_sec; + u32 i_atime_nsec; + u32 i_mtime_nsec; + u32 i_ctime_nsec; + u32 i_generation; spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ unsigned short i_bytes; u8 i_blkbits; @@ -719,10 +723,10 @@ struct inode { unsigned i_dir_seq; }; - __u32 i_generation; #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ + /* 32-bit hole reserved for expanding i_fsnotify_mask */ struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif @@ -1538,23 +1542,27 @@ struct timespec64 inode_set_ctime_current(struct inode *inode); static inline time64_t inode_get_atime_sec(const struct inode *inode) { - return inode->__i_atime.tv_sec; + return inode->i_atime_sec; } static inline long inode_get_atime_nsec(const struct inode *inode) { - return inode->__i_atime.tv_nsec; + return inode->i_atime_nsec; } static inline struct timespec64 inode_get_atime(const struct inode *inode) { - return inode->__i_atime; + struct timespec64 ts = { .tv_sec = inode_get_atime_sec(inode), + .tv_nsec = inode_get_atime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_atime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_atime = ts; + inode->i_atime_sec = ts.tv_sec; + inode->i_atime_nsec = ts.tv_nsec; return ts; } @@ -1563,28 +1571,32 @@ static inline struct timespec64 inode_set_atime(struct inode *inode, { struct timespec64 ts = { .tv_sec = sec, .tv_nsec = nsec }; + return inode_set_atime_to_ts(inode, ts); } static inline time64_t inode_get_mtime_sec(const struct inode *inode) { - return inode->__i_mtime.tv_sec; + return inode->i_mtime_sec; } static inline long inode_get_mtime_nsec(const struct inode *inode) { - return inode->__i_mtime.tv_nsec; + return inode->i_mtime_nsec; } static inline struct timespec64 inode_get_mtime(const struct inode *inode) { - return inode->__i_mtime; + struct timespec64 ts = { .tv_sec = inode_get_mtime_sec(inode), + .tv_nsec = inode_get_mtime_nsec(inode) }; + return ts; } static inline struct timespec64 inode_set_mtime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_mtime = ts; + inode->i_mtime_sec = ts.tv_sec; + inode->i_mtime_nsec = ts.tv_nsec; return ts; } @@ -1598,23 +1610,27 @@ static inline struct timespec64 inode_set_mtime(struct inode *inode, static inline time64_t inode_get_ctime_sec(const struct inode *inode) { - return inode->__i_ctime.tv_sec; + return inode->i_ctime_sec; } static inline long inode_get_ctime_nsec(const struct inode *inode) { - return inode->__i_ctime.tv_nsec; + return inode->i_ctime_nsec; } static inline struct timespec64 inode_get_ctime(const struct inode *inode) { - return inode->__i_ctime; + struct timespec64 ts = { .tv_sec = inode_get_ctime_sec(inode), + .tv_nsec = inode_get_ctime_nsec(inode) }; + + return ts; } static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode, struct timespec64 ts) { - inode->__i_ctime = ts; + inode->i_ctime_sec = ts.tv_sec; + inode->i_ctime_nsec = ts.tv_nsec; return ts; } -- cgit v1.2.3 From 447e140e66fd226350b3ce86cffc965eaae4c856 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 13:17:01 +0300 Subject: gpio: Remove legacy API documentation In order to discourage people to use old and legacy GPIO APIs remove the respective documentation completely. It also helps further cleanups of the legacy GPIO API leftovers, which is ongoing task. Signed-off-by: Andy Shevchenko Reviewed-by: Hu Haowen <2023002089@link.tyut.edu.cn> Link: https://lore.kernel.org/r/20240508101703.830066-1-andriy.shevchenko@linux.intel.com Signed-off-by: Bartosz Golaszewski --- Documentation/driver-api/gpio/board.rst | 6 - Documentation/driver-api/gpio/consumer.rst | 4 +- Documentation/driver-api/gpio/index.rst | 1 - Documentation/driver-api/gpio/intro.rst | 12 - Documentation/driver-api/gpio/legacy.rst | 679 --------------------- .../translations/zh_CN/driver-api/gpio/index.rst | 2 - .../translations/zh_CN/driver-api/gpio/legacy.rst | 618 ------------------- Documentation/translations/zh_TW/gpio.txt | 574 ----------------- include/linux/gpio.h | 6 +- 9 files changed, 2 insertions(+), 1900 deletions(-) delete mode 100644 Documentation/driver-api/gpio/legacy.rst delete mode 100644 Documentation/translations/zh_CN/driver-api/gpio/legacy.rst delete mode 100644 Documentation/translations/zh_TW/gpio.txt (limited to 'include') diff --git a/Documentation/driver-api/gpio/board.rst b/Documentation/driver-api/gpio/board.rst index b33aa04f213f..4fd1cbd8296e 100644 --- a/Documentation/driver-api/gpio/board.rst +++ b/Documentation/driver-api/gpio/board.rst @@ -4,12 +4,6 @@ GPIO Mappings This document explains how GPIOs can be assigned to given devices and functions. -Note that it only applies to the new descriptor-based interface. For a -description of the deprecated integer-based GPIO interface please refer to -legacy.rst (actually, there is no real mapping possible with the old -interface; you just fetch an integer from somewhere and request the -corresponding GPIO). - All platforms can enable the GPIO library, but if the platform strictly requires GPIO functionality to be present, it needs to select GPIOLIB from its Kconfig. Then, how GPIOs are mapped depends on what the platform uses to diff --git a/Documentation/driver-api/gpio/consumer.rst b/Documentation/driver-api/gpio/consumer.rst index ab56ab0dd7a6..bb3366047fad 100644 --- a/Documentation/driver-api/gpio/consumer.rst +++ b/Documentation/driver-api/gpio/consumer.rst @@ -2,9 +2,7 @@ GPIO Descriptor Consumer Interface ================================== -This document describes the consumer interface of the GPIO framework. Note that -it describes the new descriptor-based interface. For a description of the -deprecated integer-based GPIO interface please refer to legacy.rst. +This document describes the consumer interface of the GPIO framework. Guidelines for GPIOs consumers diff --git a/Documentation/driver-api/gpio/index.rst b/Documentation/driver-api/gpio/index.rst index 1d48fe248f05..34b57cee3391 100644 --- a/Documentation/driver-api/gpio/index.rst +++ b/Documentation/driver-api/gpio/index.rst @@ -13,7 +13,6 @@ Contents: consumer board drivers-on-gpio - legacy bt8xxgpio Core diff --git a/Documentation/driver-api/gpio/intro.rst b/Documentation/driver-api/gpio/intro.rst index c9c19243b97f..5936a9c57df3 100644 --- a/Documentation/driver-api/gpio/intro.rst +++ b/Documentation/driver-api/gpio/intro.rst @@ -10,18 +10,6 @@ The documents in this directory give detailed instructions on how to access GPIOs in drivers, and how to write a driver for a device that provides GPIOs itself. -Due to the history of GPIO interfaces in the kernel, there are two different -ways to obtain and use GPIOs: - - - The descriptor-based interface is the preferred way to manipulate GPIOs, - and is described by all the files in this directory excepted legacy.rst. - - The legacy integer-based interface which is considered deprecated (but still - usable for compatibility reasons) is documented in legacy.rst. - -The remainder of this document applies to the new descriptor-based interface. -legacy.rst contains the same information applied to the legacy -integer-based interface. - What is a GPIO? =============== diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst deleted file mode 100644 index 534dfe95d128..000000000000 --- a/Documentation/driver-api/gpio/legacy.rst +++ /dev/null @@ -1,679 +0,0 @@ -====================== -Legacy GPIO Interfaces -====================== - -This provides an overview of GPIO access conventions on Linux. - -These calls use the gpio_* naming prefix. No other calls should use that -prefix, or the related __gpio_* prefix. - - -What is a GPIO? -=============== -A "General Purpose Input/Output" (GPIO) is a flexible software-controlled -digital signal. They are provided from many kinds of chip, and are familiar -to Linux developers working with embedded and custom hardware. Each GPIO -represents a bit connected to a particular pin, or "ball" on Ball Grid Array -(BGA) packages. Board schematics show which external hardware connects to -which GPIOs. Drivers can be written generically, so that board setup code -passes such pin configuration data to drivers. - -System-on-Chip (SOC) processors heavily rely on GPIOs. In some cases, every -non-dedicated pin can be configured as a GPIO; and most chips have at least -several dozen of them. Programmable logic devices (like FPGAs) can easily -provide GPIOs; multifunction chips like power managers, and audio codecs -often have a few such pins to help with pin scarcity on SOCs; and there are -also "GPIO Expander" chips that connect using the I2C or SPI serial busses. -Most PC southbridges have a few dozen GPIO-capable pins (with only the BIOS -firmware knowing how they're used). - -The exact capabilities of GPIOs vary between systems. Common options: - - - Output values are writable (high=1, low=0). Some chips also have - options about how that value is driven, so that for example only one - value might be driven ... supporting "wire-OR" and similar schemes - for the other value (notably, "open drain" signaling). - - - Input values are likewise readable (1, 0). Some chips support readback - of pins configured as "output", which is very useful in such "wire-OR" - cases (to support bidirectional signaling). GPIO controllers may have - input de-glitch/debounce logic, sometimes with software controls. - - - Inputs can often be used as IRQ signals, often edge triggered but - sometimes level triggered. Such IRQs may be configurable as system - wakeup events, to wake the system from a low power state. - - - Usually a GPIO will be configurable as either input or output, as needed - by different product boards; single direction ones exist too. - - - Most GPIOs can be accessed while holding spinlocks, but those accessed - through a serial bus normally can't. Some systems support both types. - -On a given board each GPIO is used for one specific purpose like monitoring -MMC/SD card insertion/removal, detecting card writeprotect status, driving -a LED, configuring a transceiver, bitbanging a serial bus, poking a hardware -watchdog, sensing a switch, and so on. - - -GPIO conventions -================ -Note that this is called a "convention" because you don't need to do it this -way, and it's no crime if you don't. There **are** cases where portability -is not the main issue; GPIOs are often used for the kind of board-specific -glue logic that may even change between board revisions, and can't ever be -used on a board that's wired differently. Only least-common-denominator -functionality can be very portable. Other features are platform-specific, -and that can be critical for glue logic. - -Plus, this doesn't require any implementation framework, just an interface. -One platform might implement it as simple inline functions accessing chip -registers; another might implement it by delegating through abstractions -used for several very different kinds of GPIO controller. (There is some -optional code supporting such an implementation strategy, described later -in this document, but drivers acting as clients to the GPIO interface must -not care how it's implemented.) - -That said, if the convention is supported on their platform, drivers should -use it when possible. Platforms must select GPIOLIB if GPIO functionality -is strictly required. Drivers that can't work without -standard GPIO calls should have Kconfig entries which depend on GPIOLIB. The -GPIO calls are available, either as "real code" or as optimized-away stubs, -when drivers use the include file: - - #include - -If you stick to this convention then it'll be easier for other developers to -see what your code is doing, and help maintain it. - -Note that these operations include I/O barriers on platforms which need to -use them; drivers don't need to add them explicitly. - - -Identifying GPIOs ------------------ -GPIOs are identified by unsigned integers in the range 0..MAX_INT. That -reserves "negative" numbers for other purposes like marking signals as -"not available on this board", or indicating faults. Code that doesn't -touch the underlying hardware treats these integers as opaque cookies. - -Platforms define how they use those integers, and usually #define symbols -for the GPIO lines so that board-specific setup code directly corresponds -to the relevant schematics. In contrast, drivers should only use GPIO -numbers passed to them from that setup code, using platform_data to hold -board-specific pin configuration data (along with other board specific -data they need). That avoids portability problems. - -So for example one platform uses numbers 32-159 for GPIOs; while another -uses numbers 0..63 with one set of GPIO controllers, 64-79 with another -type of GPIO controller, and on one particular board 80-95 with an FPGA. -The numbers need not be contiguous; either of those platforms could also -use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. - -If you want to initialize a structure with an invalid GPIO number, use -some negative number (perhaps "-EINVAL"); that will never be valid. To -test if such number from such a structure could reference a GPIO, you -may use this predicate: - - int gpio_is_valid(int number); - -A number that's not valid will be rejected by calls which may request -or free GPIOs (see below). Other numbers may also be rejected; for -example, a number might be valid but temporarily unused on a given board. - -Whether a platform supports multiple GPIO controllers is a platform-specific -implementation issue, as are whether that support can leave "holes" in the space -of GPIO numbers, and whether new controllers can be added at runtime. Such issues -can affect things including whether adjacent GPIO numbers are both valid. - -Using GPIOs ------------ -The first thing a system should do with a GPIO is allocate it, using -the gpio_request() call; see later. - -One of the next things to do with a GPIO, often in board setup code when -setting up a platform_device using the GPIO, is mark its direction:: - - /* set as input or output, returning 0 or negative errno */ - int gpio_direction_input(unsigned gpio); - int gpio_direction_output(unsigned gpio, int value); - -The return value is zero for success, else a negative errno. It should -be checked, since the get/set calls don't have error returns and since -misconfiguration is possible. You should normally issue these calls from -a task context. However, for spinlock-safe GPIOs it's OK to use them -before tasking is enabled, as part of early board setup. - -For output GPIOs, the value provided becomes the initial output value. -This helps avoid signal glitching during system startup. - -For compatibility with legacy interfaces to GPIOs, setting the direction -of a GPIO implicitly requests that GPIO (see below) if it has not been -requested already. That compatibility is being removed from the optional -gpiolib framework. - -Setting the direction can fail if the GPIO number is invalid, or when -that particular GPIO can't be used in that mode. It's generally a bad -idea to rely on boot firmware to have set the direction correctly, since -it probably wasn't validated to do more than boot Linux. (Similarly, -that board setup code probably needs to multiplex that pin as a GPIO, -and configure pullups/pulldowns appropriately.) - - -Spinlock-Safe GPIO access -------------------------- -Most GPIO controllers can be accessed with memory read/write instructions. -Those don't need to sleep, and can safely be done from inside hard -(nonthreaded) IRQ handlers and similar contexts. - -Use the following calls to access such GPIOs:: - - /* GPIO INPUT: return zero or nonzero */ - int gpio_get_value(unsigned gpio); - - /* GPIO OUTPUT */ - void gpio_set_value(unsigned gpio, int value); - -The values are boolean, zero for low, nonzero for high. When reading the -value of an output pin, the value returned should be what's seen on the -pin ... that won't always match the specified output value, because of -issues including open-drain signaling and output latencies. - -The get/set calls have no error returns because "invalid GPIO" should have -been reported earlier from gpio_direction_*(). However, note that not all -platforms can read the value of output pins; those that can't should always -return zero. Also, using these calls for GPIOs that can't safely be accessed -without sleeping (see below) is an error. - -Platform-specific implementations are encouraged to optimize the two -calls to access the GPIO value in cases where the GPIO number (and for -output, value) are constant. It's normal for them to need only a couple -of instructions in such cases (reading or writing a hardware register), -and not to need spinlocks. Such optimized calls can make bitbanging -applications a lot more efficient (in both space and time) than spending -dozens of instructions on subroutine calls. - - -GPIO access that may sleep --------------------------- -Some GPIO controllers must be accessed using message based busses like I2C -or SPI. Commands to read or write those GPIO values require waiting to -get to the head of a queue to transmit a command and get its response. -This requires sleeping, which can't be done from inside IRQ handlers. -To access such GPIOs, a different set of accessors is defined:: - - /* GPIO INPUT: return zero or nonzero, might sleep */ - int gpio_get_value_cansleep(unsigned gpio); - - /* GPIO OUTPUT, might sleep */ - void gpio_set_value_cansleep(unsigned gpio, int value); - -Accessing such GPIOs requires a context which may sleep, for example -a threaded IRQ handler, and those accessors must be used instead of -spinlock-safe accessors without the cansleep() name suffix. - -Other than the fact that these accessors might sleep, and will work -on GPIOs that can't be accessed from hardIRQ handlers, these calls act -the same as the spinlock-safe calls. - -**IN ADDITION** calls to setup and configure such GPIOs must be made -from contexts which may sleep, since they may need to access the GPIO -controller chip too (These setup calls are usually made from board -setup or driver probe/teardown code, so this is an easy constraint.):: - - gpio_direction_input() - gpio_direction_output() - gpio_request() - - ## gpio_request_one() - - gpio_free() - - -Claiming and Releasing GPIOs ----------------------------- -To help catch system configuration errors, two calls are defined:: - - /* request GPIO, returning 0 or negative errno. - * non-null labels may be useful for diagnostics. - */ - int gpio_request(unsigned gpio, const char *label); - - /* release previously-claimed GPIO */ - void gpio_free(unsigned gpio); - -Passing invalid GPIO numbers to gpio_request() will fail, as will requesting -GPIOs that have already been claimed with that call. The return value of -gpio_request() must be checked. You should normally issue these calls from -a task context. However, for spinlock-safe GPIOs it's OK to request GPIOs -before tasking is enabled, as part of early board setup. - -These calls serve two basic purposes. One is marking the signals which -are actually in use as GPIOs, for better diagnostics; systems may have -several hundred potential GPIOs, but often only a dozen are used on any -given board. Another is to catch conflicts, identifying errors when -(a) two or more drivers wrongly think they have exclusive use of that -signal, or (b) something wrongly believes it's safe to remove drivers -needed to manage a signal that's in active use. That is, requesting a -GPIO can serve as a kind of lock. - -Some platforms may also use knowledge about what GPIOs are active for -power management, such as by powering down unused chip sectors and, more -easily, gating off unused clocks. - -For GPIOs that use pins known to the pinctrl subsystem, that subsystem should -be informed of their use; a gpiolib driver's .request() operation may call -pinctrl_gpio_request(), and a gpiolib driver's .free() operation may call -pinctrl_gpio_free(). The pinctrl subsystem allows a pinctrl_gpio_request() -to succeed concurrently with a pin or pingroup being "owned" by a device for -pin multiplexing. - -Any programming of pin multiplexing hardware that is needed to route the -GPIO signal to the appropriate pin should occur within a GPIO driver's -.direction_input() or .direction_output() operations, and occur after any -setup of an output GPIO's value. This allows a glitch-free migration from a -pin's special function to GPIO. This is sometimes required when using a GPIO -to implement a workaround on signals typically driven by a non-GPIO HW block. - -Some platforms allow some or all GPIO signals to be routed to different pins. -Similarly, other aspects of the GPIO or pin may need to be configured, such as -pullup/pulldown. Platform software should arrange that any such details are -configured prior to gpio_request() being called for those GPIOs, e.g. using -the pinctrl subsystem's mapping table, so that GPIO users need not be aware -of these details. - -Also note that it's your responsibility to have stopped using a GPIO -before you free it. - -Considering in most cases GPIOs are actually configured right after they -are claimed, three additional calls are defined:: - - /* request a single GPIO, with initial configuration specified by - * 'flags', identical to gpio_request() wrt other arguments and - * return value - */ - int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); - -where 'flags' is currently defined to specify the following properties: - - * GPIOF_DIR_IN - to configure direction as input - * GPIOF_DIR_OUT - to configure direction as output - - * GPIOF_INIT_LOW - as output, set initial level to LOW - * GPIOF_INIT_HIGH - as output, set initial level to HIGH - -since GPIOF_INIT_* are only valid when configured as output, so group valid -combinations as: - - * GPIOF_IN - configure as input - * GPIOF_OUT_INIT_LOW - configured as output, initial level LOW - * GPIOF_OUT_INIT_HIGH - configured as output, initial level HIGH - -Further more, to ease the claim/release of multiple GPIOs, 'struct gpio' is -introduced to encapsulate all three fields as:: - - struct gpio { - unsigned gpio; - unsigned long flags; - const char *label; - }; - -A typical example of usage:: - - static struct gpio leds_gpios[] = { - { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* default to ON */ - { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* default to OFF */ - { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* default to OFF */ - { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* default to OFF */ - { ... }, - }; - - err = gpio_request_one(31, GPIOF_IN, "Reset Button"); - if (err) - ... - - -GPIOs mapped to IRQs --------------------- -GPIO numbers are unsigned integers; so are IRQ numbers. These make up -two logically distinct namespaces (GPIO 0 need not use IRQ 0). You can -map between them using calls like:: - - /* map GPIO numbers to IRQ numbers */ - int gpio_to_irq(unsigned gpio); - -Those return either the corresponding number in the other namespace, or -else a negative errno code if the mapping can't be done. (For example, -some GPIOs can't be used as IRQs.) It is an unchecked error to use a GPIO -number that wasn't set up as an input using gpio_direction_input(), or -to use an IRQ number that didn't originally come from gpio_to_irq(). - -These two mapping calls are expected to cost on the order of a single -addition or subtraction. They're not allowed to sleep. - -Non-error values returned from gpio_to_irq() can be passed to request_irq() -or free_irq(). They will often be stored into IRQ resources for platform -devices, by the board-specific initialization code. Note that IRQ trigger -options are part of the IRQ interface, e.g. IRQF_TRIGGER_FALLING, as are -system wakeup capabilities. - - -Emulating Open Drain Signals ----------------------------- -Sometimes shared signals need to use "open drain" signaling, where only the -low signal level is actually driven. (That term applies to CMOS transistors; -"open collector" is used for TTL.) A pullup resistor causes the high signal -level. This is sometimes called a "wire-AND"; or more practically, from the -negative logic (low=true) perspective this is a "wire-OR". - -One common example of an open drain signal is a shared active-low IRQ line. -Also, bidirectional data bus signals sometimes use open drain signals. - -Some GPIO controllers directly support open drain outputs; many don't. When -you need open drain signaling but your hardware doesn't directly support it, -there's a common idiom you can use to emulate it with any GPIO pin that can -be used as either an input or an output: - - LOW: gpio_direction_output(gpio, 0) ... this drives the signal - and overrides the pullup. - - HIGH: gpio_direction_input(gpio) ... this turns off the output, - so the pullup (or some other device) controls the signal. - -If you are "driving" the signal high but gpio_get_value(gpio) reports a low -value (after the appropriate rise time passes), you know some other component -is driving the shared signal low. That's not necessarily an error. As one -common example, that's how I2C clocks are stretched: a slave that needs a -slower clock delays the rising edge of SCK, and the I2C master adjusts its -signaling rate accordingly. - - -GPIO controllers and the pinctrl subsystem ------------------------------------------- - -A GPIO controller on a SOC might be tightly coupled with the pinctrl -subsystem, in the sense that the pins can be used by other functions -together with an optional gpio feature. We have already covered the -case where e.g. a GPIO controller need to reserve a pin or set the -direction of a pin by calling any of:: - - pinctrl_gpio_request() - pinctrl_gpio_free() - pinctrl_gpio_direction_input() - pinctrl_gpio_direction_output() - -But how does the pin control subsystem cross-correlate the GPIO -numbers (which are a global business) to a certain pin on a certain -pin controller? - -This is done by registering "ranges" of pins, which are essentially -cross-reference tables. These are described in -Documentation/driver-api/pin-control.rst - -While the pin allocation is totally managed by the pinctrl subsystem, -gpio (under gpiolib) is still maintained by gpio drivers. It may happen -that different pin ranges in a SoC is managed by different gpio drivers. - -This makes it logical to let gpio drivers announce their pin ranges to -the pin ctrl subsystem before it will call 'pinctrl_gpio_request' in order -to request the corresponding pin to be prepared by the pinctrl subsystem -before any gpio usage. - -For this, the gpio controller can register its pin range with pinctrl -subsystem. There are two ways of doing it currently: with or without DT. - -For with DT support refer to Documentation/devicetree/bindings/gpio/gpio.txt. - -For non-DT support, user can call gpiochip_add_pin_range() with appropriate -parameters to register a range of gpio pins with a pinctrl driver. For this -exact name string of pinctrl device has to be passed as one of the -argument to this routine. - - -What do these conventions omit? -=============================== -One of the biggest things these conventions omit is pin multiplexing, since -this is highly chip-specific and nonportable. One platform might not need -explicit multiplexing; another might have just two options for use of any -given pin; another might have eight options per pin; another might be able -to route a given GPIO to any one of several pins. (Yes, those examples all -come from systems that run Linux today.) - -Related to multiplexing is configuration and enabling of the pullups or -pulldowns integrated on some platforms. Not all platforms support them, -or support them in the same way; and any given board might use external -pullups (or pulldowns) so that the on-chip ones should not be used. -(When a circuit needs 5 kOhm, on-chip 100 kOhm resistors won't do.) -Likewise drive strength (2 mA vs 20 mA) and voltage (1.8V vs 3.3V) is a -platform-specific issue, as are models like (not) having a one-to-one -correspondence between configurable pins and GPIOs. - -There are other system-specific mechanisms that are not specified here, -like the aforementioned options for input de-glitching and wire-OR output. -Hardware may support reading or writing GPIOs in gangs, but that's usually -configuration dependent: for GPIOs sharing the same bank. (GPIOs are -commonly grouped in banks of 16 or 32, with a given SOC having several such -banks.) Some systems can trigger IRQs from output GPIOs, or read values -from pins not managed as GPIOs. Code relying on such mechanisms will -necessarily be nonportable. - -Dynamic definition of GPIOs is not currently standard; for example, as -a side effect of configuring an add-on board with some GPIO expanders. - - -GPIO implementor's framework (OPTIONAL) -======================================= -As noted earlier, there is an optional implementation framework making it -easier for platforms to support different kinds of GPIO controller using -the same programming interface. This framework is called "gpiolib". - -As a debugging aid, if debugfs is available a /sys/kernel/debug/gpio file -will be found there. That will list all the controllers registered through -this framework, and the state of the GPIOs currently in use. - - -Controller Drivers: gpio_chip ------------------------------ -In this framework each GPIO controller is packaged as a "struct gpio_chip" -with information common to each controller of that type: - - - methods to establish GPIO direction - - methods used to access GPIO values - - flag saying whether calls to its methods may sleep - - optional debugfs dump method (showing extra state like pullup config) - - label for diagnostics - -There is also per-instance data, which may come from device.platform_data: -the number of its first GPIO, and how many GPIOs it exposes. - -The code implementing a gpio_chip should support multiple instances of the -controller, possibly using the driver model. That code will configure each -gpio_chip and issue gpiochip_add(). Removing a GPIO controller should be -rare; use gpiochip_remove() when it is unavoidable. - -Most often a gpio_chip is part of an instance-specific structure with state -not exposed by the GPIO interfaces, such as addressing, power management, -and more. Chips such as codecs will have complex non-GPIO state. - -Any debugfs dump method should normally ignore signals which haven't been -requested as GPIOs. They can use gpiochip_is_requested(), which returns -either NULL or the label associated with that GPIO when it was requested. - - -Platform Support ----------------- -To force-enable this framework, a platform's Kconfig will "select" GPIOLIB, -else it is up to the user to configure support for GPIO. - -If neither of these options are selected, the platform does not support -GPIOs through GPIO-lib and the code cannot be enabled by the user. - -Trivial implementations of those functions can directly use framework -code, which always dispatches through the gpio_chip:: - - #define gpio_get_value __gpio_get_value - #define gpio_set_value __gpio_set_value - -Fancier implementations could instead define those as inline functions with -logic optimizing access to specific SOC-based GPIOs. For example, if the -referenced GPIO is the constant "12", getting or setting its value could -cost as little as two or three instructions, never sleeping. When such an -optimization is not possible those calls must delegate to the framework -code, costing at least a few dozen instructions. For bitbanged I/O, such -instruction savings can be significant. - -For SOCs, platform-specific code defines and registers gpio_chip instances -for each bank of on-chip GPIOs. Those GPIOs should be numbered/labeled to -match chip vendor documentation, and directly match board schematics. They -may well start at zero and go up to a platform-specific limit. Such GPIOs -are normally integrated into platform initialization to make them always be -available, from arch_initcall() or earlier; they can often serve as IRQs. - - -Board Support -------------- -For external GPIO controllers -- such as I2C or SPI expanders, ASICs, multi -function devices, FPGAs or CPLDs -- most often board-specific code handles -registering controller devices and ensures that their drivers know what GPIO -numbers to use with gpiochip_add(). Their numbers often start right after -platform-specific GPIOs. - -For example, board setup code could create structures identifying the range -of GPIOs that chip will expose, and passes them to each GPIO expander chip -using platform_data. Then the chip driver's probe() routine could pass that -data to gpiochip_add(). - -Initialization order can be important. For example, when a device relies on -an I2C-based GPIO, its probe() routine should only be called after that GPIO -becomes available. That may mean the device should not be registered until -calls for that GPIO can work. One way to address such dependencies is for -such gpio_chip controllers to provide setup() and teardown() callbacks to -board specific code; those board specific callbacks would register devices -once all the necessary resources are available, and remove them later when -the GPIO controller device becomes unavailable. - - -Sysfs Interface for Userspace (OPTIONAL) -======================================== -Platforms which use the "gpiolib" implementors framework may choose to -configure a sysfs user interface to GPIOs. This is different from the -debugfs interface, since it provides control over GPIO direction and -value instead of just showing a gpio state summary. Plus, it could be -present on production systems without debugging support. - -Given appropriate hardware documentation for the system, userspace could -know for example that GPIO #23 controls the write protect line used to -protect boot loader segments in flash memory. System upgrade procedures -may need to temporarily remove that protection, first importing a GPIO, -then changing its output state, then updating the code before re-enabling -the write protection. In normal use, GPIO #23 would never be touched, -and the kernel would have no need to know about it. - -Again depending on appropriate hardware documentation, on some systems -userspace GPIO can be used to determine system configuration data that -standard kernels won't know about. And for some tasks, simple userspace -GPIO drivers could be all that the system really needs. - -Note that standard kernel drivers exist for common "LEDs and Buttons" -GPIO tasks: "leds-gpio" and "gpio_keys", respectively. Use those -instead of talking directly to the GPIOs; they integrate with kernel -frameworks better than your userspace code could. - - -Paths in Sysfs --------------- -There are three kinds of entry in /sys/class/gpio: - - - Control interfaces used to get userspace control over GPIOs; - - - GPIOs themselves; and - - - GPIO controllers ("gpio_chip" instances). - -That's in addition to standard files including the "device" symlink. - -The control interfaces are write-only: - - /sys/class/gpio/ - - "export" ... Userspace may ask the kernel to export control of - a GPIO to userspace by writing its number to this file. - - Example: "echo 19 > export" will create a "gpio19" node - for GPIO #19, if that's not requested by kernel code. - - "unexport" ... Reverses the effect of exporting to userspace. - - Example: "echo 19 > unexport" will remove a "gpio19" - node exported using the "export" file. - -GPIO signals have paths like /sys/class/gpio/gpio42/ (for GPIO #42) -and have the following read/write attributes: - - /sys/class/gpio/gpioN/ - - "direction" ... reads as either "in" or "out". This value may - normally be written. Writing as "out" defaults to - initializing the value as low. To ensure glitch free - operation, values "low" and "high" may be written to - configure the GPIO as an output with that initial value. - - Note that this attribute *will not exist* if the kernel - doesn't support changing the direction of a GPIO, or - it was exported by kernel code that didn't explicitly - allow userspace to reconfigure this GPIO's direction. - - "value" ... reads as either 0 (low) or 1 (high). If the GPIO - is configured as an output, this value may be written; - any nonzero value is treated as high. - - If the pin can be configured as interrupt-generating interrupt - and if it has been configured to generate interrupts (see the - description of "edge"), you can poll(2) on that file and - poll(2) will return whenever the interrupt was triggered. If - you use poll(2), set the events POLLPRI. If you use select(2), - set the file descriptor in exceptfds. After poll(2) returns, - either lseek(2) to the beginning of the sysfs file and read the - new value or close the file and re-open it to read the value. - - "edge" ... reads as either "none", "rising", "falling", or - "both". Write these strings to select the signal edge(s) - that will make poll(2) on the "value" file return. - - This file exists only if the pin can be configured as an - interrupt generating input pin. - - "active_low" ... reads as either 0 (false) or 1 (true). Write - any nonzero value to invert the value attribute both - for reading and writing. Existing and subsequent - poll(2) support configuration via the edge attribute - for "rising" and "falling" edges will follow this - setting. - -GPIO controllers have paths like /sys/class/gpio/gpiochip42/ (for the -controller implementing GPIOs starting at #42) and have the following -read-only attributes: - - /sys/class/gpio/gpiochipN/ - - "base" ... same as N, the first GPIO managed by this chip - - "label" ... provided for diagnostics (not always unique) - - "ngpio" ... how many GPIOs this manges (N to N + ngpio - 1) - -Board documentation should in most cases cover what GPIOs are used for -what purposes. However, those numbers are not always stable; GPIOs on -a daughtercard might be different depending on the base board being used, -or other cards in the stack. In such cases, you may need to use the -gpiochip nodes (possibly in conjunction with schematics) to determine -the correct GPIO number to use for a given signal. - - -API Reference -============= - -The functions listed in this section are deprecated. The GPIO descriptor based -API should be used in new code. - -.. kernel-doc:: drivers/gpio/gpiolib-legacy.c - :export: diff --git a/Documentation/translations/zh_CN/driver-api/gpio/index.rst b/Documentation/translations/zh_CN/driver-api/gpio/index.rst index 9a6a14162a6c..e4d54724a1b5 100644 --- a/Documentation/translations/zh_CN/driver-api/gpio/index.rst +++ b/Documentation/translations/zh_CN/driver-api/gpio/index.rst @@ -18,8 +18,6 @@ :caption: 目录 :maxdepth: 2 - legacy - Todolist: * intro diff --git a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst b/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst deleted file mode 100644 index 0faf042001d2..000000000000 --- a/Documentation/translations/zh_CN/driver-api/gpio/legacy.rst +++ /dev/null @@ -1,618 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. include:: ../../disclaimer-zh_CN.rst - -:Original: Documentation/driver-api/gpio/legacy.rst - -:翻译: - - 傅炜 Fu Wei - 司延腾 Yanteng Si - -:校译: - - -传统GPIO接口 -============ - -本文档概述了Linux下的GPIO访问公约。 - -这些函数以 gpio_* 作为前缀。其他的函数不允许使用这样的前缀或相关的 -__gpio_* 前缀。 - - -什么是GPIO? -============ -"通用输入/输出口"(GPIO)是一个灵活的由软件控制的数字信号。他们可 -由多种芯片提供,且对于从事嵌入式和定制硬件的 Linux 开发者来说是 -比较熟悉。每个GPIO 都代表一个连接到特定引脚或球栅阵列(BGA)封装中 -“球珠”的一个位。电路板原理图显示了 GPIO 与外部硬件的连接关系。 -驱动可以编写成通用代码,以使板级启动代码可传递引脚配置数据给驱动。 - -片上系统 (SOC) 处理器对 GPIO 有很大的依赖。在某些情况下,每个 -非专用引脚都可配置为 GPIO,且大多数芯片都最少有一些 GPIO。 -可编程逻辑器件(类似 FPGA) 可以方便地提供 GPIO。像电源管理和 -音频编解码器这样的多功能芯片经常留有一些这样的引脚来帮助那些引脚 -匮乏的 SOC。同时还有通过 I2C 或 SPI 串行总线连接的“GPIO扩展器” -芯片。大多数 PC 的南桥有一些拥有 GPIO 能力的引脚 (只有BIOS -固件才知道如何使用他们)。 - -GPIO 的实际功能因系统而异。通常用法有: - - - 输出值可写 (高电平=1,低电平=0)。一些芯片也有如何驱动这些值的选项, - 例如只允许输出一个值、支持“线与”及其他取值类似的模式(值得注意的是 - “开漏”信号) - - - 输入值可读(1、0)。一些芯片支持引脚在配置为“输出”时回读,这对于类似 - “线与”的情况(以支持双向信号)是非常有用的。GPIO 控制器可能有输入 - 去毛刺/消抖逻辑,这有时需要软件控制。 - - - 输入通常可作为 IRQ 信号,一般是沿触发,但有时是电平触发。这样的 IRQ - 可能配置为系统唤醒事件,以将系统从低功耗状态下唤醒。 - - - 通常一个 GPIO 根据不同产品电路板的需求,可以配置为输入或输出,也有仅 - 支持单向的。 - - - 大部分 GPIO 可以在持有自旋锁时访问,但是通常由串行总线扩展的 GPIO - 不允许持有自旋锁。但某些系统也支持这种类型。 - -对于给定的电路板,每个 GPIO 都用于某个特定的目的,如监控 MMC/SD 卡的 -插入/移除、检测卡的写保护状态、驱动 LED、配置收发器、模拟串行总线、 -复位硬件看门狗、感知开关状态等等。 - - -GPIO 公约 -========= -注意,这个叫做“公约”,因为这不是强制性的,不遵循这个公约是无伤大雅的, -因为此时可移植性并不重要。GPIO 常用于板级特定的电路逻辑,甚至可能 -随着电路板的版本而改变,且不可能在不同走线的电路板上使用。仅有在少数 -功能上才具有可移植性,其他功能是平台特定。这也是由于“胶合”的逻辑造成的。 - -此外,这不需要任何的执行框架,只是一个接口。某个平台可能通过一个简单地 -访问芯片寄存器的内联函数来实现它,其他平台可能通过委托一系列不同的GPIO -控制器的抽象函数来实现它。(有一些可选的代码能支持这种策略的实现,本文档 -后面会介绍,但作为 GPIO 接口的客户端驱动程序必须与它的实现无关。) - -也就是说,如果在他们的平台上支持这个公约,驱动应尽可能的使用它。同时,平台 -必须在 Kconfig 中选择 ARCH_REQUIRE_GPIOLIB 或者 ARCH_WANT_OPTIONAL_GPIOLIB -选项。那些调用标准 GPIO 函数的驱动应该在 Kconfig 入口中声明依赖GENERIC_GPIO。 -当驱动包含文件: - - #include - -则 GPIO 函数是可用,无论是“真实代码”还是经优化过的语句。如果你遵守 -这个公约,当你的代码完成后,对其他的开发者来说会更容易看懂和维护。 - -注意,这些操作包含所用平台的 I/O 屏障代码,驱动无须显式地调用他们。 - - -标识 GPIO ---------- - -GPIO 是通过无符号整型来标识的,范围是 0 到 MAX_INT。保留“负”数 -用于其他目的,例如标识信号“在这个板子上不可用”或指示错误。未接触底层 -硬件的代码会忽略这些整数。 - -平台会定义这些整数的用法,且通常使用 #define 来定义 GPIO,这样 -板级特定的启动代码可以直接关联相应的原理图。相对来说,驱动应该仅使用 -启动代码传递过来的 GPIO 编号,使用 platform_data 保存板级特定 -引脚配置数据 (同时还有其他须要的板级特定数据),避免可能出现的问题。 - -例如一个平台使用编号 32-159 来标识 GPIO,而在另一个平台使用编号0-63 -标识一组 GPIO 控制器,64-79标识另一类 GPIO 控制器,且在一个含有 -FPGA 的特定板子上使用 80-95。编号不一定要连续,那些平台中,也可以 -使用编号2000-2063来标识一个 I2C 接口的 GPIO 扩展器中的 GPIO。 - -如果你要初始化一个带有无效 GPIO 编号的结构体,可以使用一些负编码 -(如"-EINVAL"),那将使其永远不会是有效。来测试这样一个结构体中的编号 -是否关联一个 GPIO,你可使用以下断言:: - - int gpio_is_valid(int number); - -如果编号不存在,则请求和释放 GPIO 的函数将拒绝执行相关操作(见下文)。 -其他编号也可能被拒绝,比如一个编号可能存在,但暂时在给定的电路上不可用。 - -一个平台是否支持多个 GPIO 控制器为平台特定的实现问题,就像是否可以 -在 GPIO 编号空间中有“空洞”和是否可以在运行时添加新的控制器一样。 -这些问题会影响其他事情,包括相邻的 GPIO 编号是否存在等。 - -使用 GPIO ---------- - -对于一个 GPIO,系统应该做的第一件事情就是通过 gpio_request() -函数分配它,见下文。 - -接下来是设置I/O方向,这通常是在板级启动代码中为所使用的 GPIO 设置 -platform_device 时完成:: - - /* 设置为输入或输出, 返回 0 或负的错误代码 */ - int gpio_direction_input(unsigned gpio); - int gpio_direction_output(unsigned gpio, int value); - -返回值为零代表成功,否则返回一个负的错误代码。这个返回值需要检查,因为 -get/set(获取/设置)函数调用没法返回错误,且有可能是配置错误。通常, -你应该在进程上下文中调用这些函数。然而,对于自旋锁安全的 GPIO,在板子 -启动的早期、进程启动前使用他们也是可以的。 - -对于作为输出的 GPIO,为其提供初始输出值,对于避免在系统启动期间出现 -信号毛刺是很有帮助的。 - -为了与传统的 GPIO 接口兼容, 在设置一个 GPIO 方向时,如果它还未被申请, -则隐含了申请那个 GPIO 的操作(见下文)。这种兼容性正在从可选的 gpiolib -框架中移除。 - -如果这个 GPIO 编码不存在,或者特定的 GPIO 不能用于那种模式,则方向 -设置可能失败。依赖启动固件来正确地设置方向通常是一个坏主意,因为它可能 -除了启动Linux,并没有做更多的验证工作。(同理, 板子的启动代码可能需要 -将这个复用的引脚设置为 GPIO,并正确地配置上拉/下拉电阻。) - - -访问自旋锁安全的 GPIO ---------------------- - -大多数 GPIO 控制器可以通过内存读/写指令来访问。这些指令不会休眠,可以 -安全地在硬(非线程)中断例程和类似的上下文中完成。 - -对于那些 GPIO,使用以下的函数访问:: - - /* GPIO 输入:返回零或非零 */ - int gpio_get_value(unsigned gpio); - - /* GPIO 输出 */ - void gpio_set_value(unsigned gpio, int value); - -GPIO值是布尔值,零表示低电平,非零表示高电平。当读取一个输出引脚的值时, -返回值应该是引脚上的值。这个值不总是和输出值相符,因为存在开漏输出信号和 -输出延迟问题。 - -以上的 get/set 函数无错误返回值,因为之前 gpio_direction_*()应已检查过 -其是否为“无效GPIO”。此外,还需要注意的是并不是所有平台都可以从输出引脚 -中读取数据,对于不能读取的引脚应总返回零。另外,对那些在原子上下文中无法 -安全访问的 GPIO (译者注:因为访问可能导致休眠)使用这些函数是不合适的 -(见下文)。 - -在 GPIO 编号(还有输出、值)为常数的情况下,鼓励通过平台特定的实现来优化 -这两个函数来访问 GPIO 值。这种情况(读写一个硬件寄存器)下只需要几条指令 -是很正常的,且无须自旋锁。这种优化函数比起那些在子程序上花费许多指令的 -函数可以使得模拟接口(译者注:例如 GPIO 模拟 I2C、1-wire 或 SPI)的 -应用(在空间和时间上都)更具效率。 - - -访问可能休眠的 GPIO -------------------- - -某些 GPIO 控制器必须通过基于总线(如 I2C 或 SPI)的消息访问。读或写这些 -GPIO 值的命令需要等待其信息排到队首才发送命令,再获得其反馈。期间需要 -休眠,这不能在 IRQ 例程(中断上下文)中执行。 - -为了访问这种 GPIO,内核定义了一套不同的函数:: - - /* GPIO 输入:返回零或非零 ,可能会休眠 */ - int gpio_get_value_cansleep(unsigned gpio); - - /* GPIO 输出,可能会休眠 */ - void gpio_set_value_cansleep(unsigned gpio, int value); - -访问这样的 GPIO 需要一个允许休眠的上下文,例如线程 IRQ 处理例程,并用以上的 -访问函数替换那些没有 cansleep()后缀的自旋锁安全访问函数。 - -除了这些访问函数可能休眠,且它们操作的 GPIO 不能在硬件 IRQ 处理例程中访问的 -事实,这些处理例程实际上和自旋锁安全的函数是一样的。 - -** 除此之外 ** 调用设置和配置此类 GPIO 的函数也必须在允许休眠的上下文中, -因为它们可能也需要访问 GPIO 控制器芯片 (这些设置函数通常在板级启动代码或者 -驱动探测/断开代码中,所以这是一个容易满足的约束条件。) :: - - gpio_direction_input() - gpio_direction_output() - gpio_request() - - ## gpio_request_one() - - gpio_free() - - - -声明和释放 GPIO ----------------- - -为了有助于捕获系统配置错误,定义了两个函数:: - - /* 申请 GPIO, 返回 0 或负的错误代码. - * 非空标签可能有助于诊断. - */ - int gpio_request(unsigned gpio, const char *label); - - /* 释放之前声明的 GPIO */ - void gpio_free(unsigned gpio); - -将无效的 GPIO 编码传递给 gpio_request()会导致失败,申请一个已使用这个 -函数声明过的 GPIO 也会失败。gpio_request()的返回值必须检查。你应该在 -进程上下文中调用这些函数。然而,对于自旋锁安全的 GPIO,在板子启动的早期、 -进入进程之前是可以申请的。 - -这个函数完成两个基本的目标。一是标识那些实际上已作为 GPIO 使用的信号线, -这样便于更好地诊断;系统可能需要服务几百个可用的 GPIO,但是对于任何一个 -给定的电路板通常只有一些被使用。另一个目的是捕获冲突,查明错误:如两个或 -更多驱动错误地认为他们已经独占了某个信号线,或是错误地认为移除一个管理着 -某个已激活信号的驱动是安全的。也就是说,申请 GPIO 的作用类似一种锁机制。 - -某些平台可能也使用 GPIO 作为电源管理激活信号(例如通过关闭未使用芯片区和 -简单地关闭未使用时钟)。 - -对于 GPIO 使用引脚控制子系统已知的引脚,子系统应该被告知其使用情况; -一个 gpiolib 驱动的 .request()操作应调用 pinctrl_gpio_request(), -而 gpiolib 驱动的 .free()操作应调用 pinctrl_gpio_free()。引脚控制 -子系统允许 pinctrl_gpio_request()在某个引脚或引脚组以复用形式“属于” -一个设备时都成功返回。 - -任何须将 GPIO 信号导向适当引脚的引脚复用硬件的编程应该发生在 GPIO -驱动的 .direction_input()或 .direction_output()函数中,以及 -任何输出 GPIO 值的设置之后。这样可使从引脚特殊功能到 GPIO 的转换 -不会在引脚产生毛刺波形。有时当用一个 GPIO 实现其信号驱动一个非 GPIO -硬件模块的解决方案时,就需要这种机制。 - -某些平台允许部分或所有 GPIO 信号使用不同的引脚。类似的,GPIO 或引脚的 -其他方面也需要配置,如上拉/下拉。平台软件应该在对这些 GPIO 调用 -gpio_request()前将这类细节配置好,例如使用引脚控制子系统的映射表, -使得 GPIO 的用户无须关注这些细节。 - -还有一个值得注意的是在释放 GPIO 前,你必须停止使用它。 - - -注意:申请一个 GPIO 并没有以任何方式配置它,只不过标识那个 GPIO 处于使用 -状态。必须有另外的代码来处理引脚配置(如控制 GPIO 使用的引脚、上拉/下拉)。 -考虑到大多数情况下声明 GPIO 之后就会立即配置它们,所以定义了以下三个辅助函数:: - - /* 申请一个 GPIO 信号, 同时通过特定的'flags'初始化配置, - * 其他和 gpio_request()的参数和返回值相同 - * - */ - int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); - -这里 'flags' 当前定义可指定以下属性: - - * GPIOF_DIR_IN - 配置方向为输入 - * GPIOF_DIR_OUT - 配置方向为输出 - - * GPIOF_INIT_LOW - 在作为输出时,初始值为低电平 - * GPIOF_INIT_HIGH - 在作为输出时,初始值为高电平 - -因为 GPIOF_INIT_* 仅有在配置为输出的时候才存在,所以有效的组合为: - - * GPIOF_IN - 配置为输入 - * GPIOF_OUT_INIT_LOW - 配置为输出,并初始化为低电平 - * GPIOF_OUT_INIT_HIGH - 配置为输出,并初始化为高电平 - -更进一步,为了更简单地声明/释放多个 GPIO,'struct gpio'被引进来封装所有 -这三个领域:: - - struct gpio { - unsigned gpio; - unsigned long flags; - const char *label; - }; - -一个典型的用例:: - - static struct gpio leds_gpios[] = { - { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* 默认开启 */ - { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* 默认关闭 */ - { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* 默认关闭 */ - { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* 默认关闭 */ - { ... }, - }; - - err = gpio_request_one(31, GPIOF_IN, "Reset Button"); - if (err) - ... - - -GPIO 映射到 IRQ ----------------- - -GPIO 编号是无符号整数;IRQ 编号也是。这些构成了两个逻辑上不同的命名空间 -(GPIO 0 不一定使用 IRQ 0)。你可以通过以下函数在它们之间实现映射:: - - /* 映射 GPIO 编号到 IRQ 编号 */ - int gpio_to_irq(unsigned gpio); - -它们的返回值为对应命名空间的相关编号,或是负的错误代码(如果无法映射)。 -(例如,某些 GPIO 无法做为 IRQ 使用。)以下的编号错误是未经检测的:使用一个 -未通过 gpio_direction_input()配置为输入的 GPIO 编号,或者使用一个 -并非来源于gpio_to_irq()的 IRQ 编号。 - -这两个映射函数可能会在信号编号的加减计算过程上花些时间。它们不可休眠。 - -gpio_to_irq()返回的非错误值可以传递给 request_irq()或者 free_irq()。 -它们通常通过板级特定的初始化代码存放到平台设备的 IRQ 资源中。注意:IRQ -触发选项是 IRQ 接口的一部分,如 IRQF_TRIGGER_FALLING,系统唤醒能力 -也是如此。 - - -模拟开漏信号 ------------- - -有时在只有低电平信号作为实际驱动结果(译者注:多个输出连接于一点,逻辑电平 -结果为所有输出的逻辑与)的时候,共享的信号线需要使用“开漏”信号。(该术语 -适用于 CMOS 管;而 TTL 用“集电极开路”。)一个上拉电阻使信号为高电平。这 -有时被称为“线与”。实际上,从负逻辑(低电平为真)的角度来看,这是一个“线或”。 - -一个开漏信号的常见例子是共享的低电平使能 IRQ 信号线。此外,有时双向数据总线 -信号也使用漏极开路信号。 - -某些 GPIO 控制器直接支持开漏输出,还有许多不支持。当你需要开漏信号,但 -硬件又不直接支持的时候,一个常用的方法是用任何即可作输入也可作输出的 GPIO -引脚来模拟: - - LOW: gpio_direction_output(gpio, 0) ... 这代码驱动信号并覆盖 - 上拉配置。 - - HIGH: gpio_direction_input(gpio) ... 这代码关闭输出,所以上拉电阻 - (或其他的一些器件)控制了信号。 - -如果你将信号线“驱动”为高电平,但是 gpio_get_value(gpio)报告了一个 -低电平(在适当的上升时间后),你就可以知道是其他的一些组件将共享信号线拉低了。 -这不一定是错误的。一个常见的例子就是 I2C 时钟的延长:一个需要较慢时钟的 -从设备延迟 SCK 的上升沿,而 I2C 主设备相应地调整其信号传输速率。 - -GPIO控制器和引脚控制子系统 --------------------------- - -SOC上的GPIO控制器可能与引脚控制子系统紧密结合,即引脚可以与可选的gpio功 -能一起被其他功能使用。我们已经涵盖了这样的情况,例如一个GPIO控制器需要保 -留一个引脚或通过调用以下任何一个引脚来设置其方向:: - - pinctrl_gpio_request() - pinctrl_gpio_free() - pinctrl_gpio_direction_input() - pinctrl_gpio_direction_output() - -但是,引脚控制子系统是如何将GPIO号码(这是一个全局事项)与某个引脚控制器 -上的某个引脚交叉关联的? - -这是通过注册引脚的“范围”来实现的,这基本上是交叉参考表。这些描述是在 -Documentation/driver-api/pin-control.rst - -虽然引脚分配完全由引脚控制子系统管理,但gpio(在gpiolib下)仍由gpio驱动 -维护。可能发生的情况是,SoC中的不同引脚范围由不同的gpio驱动器管理。 - -这使得在调用 "pinctrl_gpio_request" 之前,让gpio驱动向pin ctrl子系 -统宣布它们的引脚范围是合理的,以便在使用任何gpio之前要求引脚控制子系统准 -备相应的引脚。 - -为此,gpio控制器可以用引脚控制子系统注册其引脚范围。目前有两种方法:有或 -无DT。 - -关于对DT的支持,请参考 Documentation/devicetree/bindings/gpio/gpio.txt. - -对于非DT支持,用户可以用适当的参数调用gpiochip_add_pin_range(),将一 -系列的gpio引脚注册到引脚控制驱动上。为此,必须将引脚控制设备的名称字符串 -作为参数之一传给这个程序。 - - -这些公约忽略了什么? -==================== - -这些公约忽略的最大一件事就是引脚复用,因为这属于高度芯片特定的属性且 -没有可移植性。某个平台可能不需要明确的复用信息;有的对于任意给定的引脚 -可能只有两个功能选项;有的可能每个引脚有八个功能选项;有的可能可以将 -几个引脚中的任何一个作为给定的 GPIO。(是的,这些例子都来自于当前运行 -Linux 的系统。) - -在某些系统中,与引脚复用相关的是配置和使能集成的上、下拉模式。并不是所有 -平台都支持这种模式,或者不会以相同的方式来支持这种模式;且任何给定的电路板 -可能使用外置的上拉(或下拉)电阻,这时芯片上的就不应该使用。(当一个电路需要 -5kOhm 的拉动电阻,芯片上的 100 kOhm 电阻就不能做到。)同样的,驱动能力 -(2 mA vs 20 mA)和电压(1.8V vs 3.3V)是平台特定问题,就像模型一样在 -可配置引脚和 GPIO 之间(没)有一一对应的关系。 - -还有其他一些系统特定的机制没有在这里指出,例如上述的输入去毛刺和线与输出 -选项。硬件可能支持批量读或写 GPIO,但是那一般是配置相关的:对于处于同一 -块区(bank)的GPIO。(GPIO 通常以 16 或 32 个组成一个区块,一个给定的 -片上系统一般有几个这样的区块。)某些系统可以通过输出 GPIO 触发 IRQ, -或者从并非以 GPIO 管理的引脚取值。这些机制的相关代码没有必要具有可移植性。 - -当前,动态定义 GPIO 并不是标准的,例如作为配置一个带有某些 GPIO 扩展器的 -附加电路板的副作用。 - -GPIO 实现者的框架(可选) -========================= - -前面提到了,有一个可选的实现框架,让平台使用相同的编程接口,更加简单地支持 -不同种类的 GPIO 控制器。这个框架称为"gpiolib"。 - -作为一个辅助调试功能,如果 debugfs 可用,就会有一个 /sys/kernel/debug/gpio -文件。通过这个框架,它可以列出所有注册的控制器,以及当前正在使用中的 GPIO -的状态。 - - -控制器驱动: gpio_chip ---------------------- - -在框架中每个 GPIO 控制器都包装为一个 "struct gpio_chip",他包含了 -该类型的每个控制器的常用信息: - - - 设置 GPIO 方向的方法 - - 用于访问 GPIO 值的方法 - - 告知调用其方法是否可能休眠的标志 - - 可选的 debugfs 信息导出方法 (显示类似上拉配置一样的额外状态) - - 诊断标签 - -也包含了来自 device.platform_data 的每个实例的数据:它第一个 GPIO 的 -编号和它可用的 GPIO 的数量。 - -实现 gpio_chip 的代码应支持多控制器实例,这可能使用驱动模型。那些代码要 -配置每个 gpio_chip,并发起gpiochip_add()。卸载一个 GPIO 控制器很少见, -但在必要的时候可以使用 gpiochip_remove()。 - -大部分 gpio_chip 是一个实例特定结构体的一部分,而并不将 GPIO 接口单独 -暴露出来,比如编址、电源管理等。类似编解码器这样的芯片会有复杂的非 GPIO -状态。 - -任何一个 debugfs 信息导出方法通常应该忽略还未申请作为 GPIO 的信号线。 -他们可以使用 gpiochip_is_requested()测试,当这个 GPIO 已经申请过了 -就返回相关的标签,否则返回 NULL。 - - -平台支持 --------- - -为了支持这个框架,一个平台的 Kconfig 文件将会 "select"(选择) -ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB,并让它的 - 包含 ,同时定义两个方法: -gpio_get_value()、gpio_set_value()。 - -它也应提供一个 ARCH_NR_GPIOS 的定义值,这样可以更好地反映该平台 GPIO -的实际数量,节省静态表的空间。(这个定义值应该包含片上系统内建 GPIO 和 -GPIO 扩展器中的数据。) - -ARCH_REQUIRE_GPIOLIB 意味着 gpiolib 核心在这个构架中将总是编译进内核。 - -ARCH_WANT_OPTIONAL_GPIOLIB 意味着 gpiolib 核心默认关闭,且用户可以 -使能它,并将其编译进内核(可选)。 - -如果这些选项都没被选择,该平台就不通过 GPIO-lib 支持 GPIO,且代码不可以 -被用户使能。 - -以下这些方法的实现可以直接使用框架代码,并总是通过 gpio_chip 调度:: - - #define gpio_get_value __gpio_get_value - #define gpio_set_value __gpio_set_value - -这些定义可以用更理想的实现方法替代,那就是使用经过逻辑优化的内联函数来访问 -基于特定片上系统的 GPIO。例如,若引用的 GPIO (寄存器位偏移)是常量“12”, -读取或设置它可能只需少则两或三个指令,且不会休眠。当这样的优化无法实现时, -那些函数必须使用框架提供的代码,那就至少要几十条指令才可以实现。对于用 GPIO -模拟的 I/O 接口, 如此精简指令是很有意义的。 - -对于片上系统,平台特定代码为片上 GPIO 每个区(bank)定义并注册 gpio_chip -实例。那些 GPIO 应该根据芯片厂商的文档进行编码/标签,并直接和电路板原理图 -对应。他们应该开始于零并终止于平台特定的限制。这些 GPIO(代码)通常从 -arch_initcall()或者更早的地方集成进平台初始化代码,使这些 GPIO 总是可用, -且他们通常可以作为 IRQ 使用。 - -板级支持 --------- - -对于外部 GPIO 控制器(例如 I2C 或 SPI 扩展器、专用芯片、多功能器件、FPGA -或 CPLD),大多数常用板级特定代码都可以注册控制器设备,并保证他们的驱动知道 -gpiochip_add()所使用的 GPIO 编号。他们的起始编号通常跟在平台特定的 GPIO -编号之后。 - -例如板级启动代码应该创建结构体指明芯片公开的 GPIO 范围,并使用 platform_data -将其传递给每个 GPIO 扩展器芯片。然后芯片驱动中的 probe()例程可以将这个 -数据传递给 gpiochip_add()。 - -初始化顺序很重要。例如,如果一个设备依赖基于 I2C 的(扩展)GPIO,那么它的 -probe()例程就应该在那个 GPIO 有效以后才可以被调用。这意味着设备应该在 -GPIO 可以工作之后才可被注册。解决这类依赖的的一种方法是让这种 gpio_chip -控制器向板级特定代码提供 setup()和 teardown()回调函数。一旦所有必须的 -资源可用之后,这些板级特定的回调函数将会注册设备,并可以在这些 GPIO 控制器 -设备变成无效时移除它们。 - - -用户空间的 Sysfs 接口(可选) -============================= - -使用“gpiolib”实现框架的平台可以选择配置一个 GPIO 的 sysfs 用户接口。 -这不同于 debugfs 接口,因为它提供的是对 GPIO方向和值的控制,而不只显示 -一个GPIO 的状态摘要。此外,它可以出现在没有调试支持的产品级系统中。 - -例如,通过适当的系统硬件文档,用户空间可以知道 GIOP #23 控制 Flash -存储器的写保护(用于保护其中 Bootloader 分区)。产品的系统升级可能需要 -临时解除这个保护:首先导入一个 GPIO,改变其输出状态,然后在重新使能写保护 -前升级代码。通常情况下,GPIO #23 是不会被触及的,并且内核也不需要知道他。 - -根据适当的硬件文档,某些系统的用户空间 GPIO 可以用于确定系统配置数据, -这些数据是标准内核不知道的。在某些任务中,简单的用户空间 GPIO 驱动可能是 -系统真正需要的。 - -注意:标准内核驱动中已经存在通用的“LED 和按键”GPIO 任务,分别是: -"leds-gpio" 和 "gpio_keys"。请使用这些来替代直接访问 GPIO,因为集成在 -内核框架中的这类驱动比你在用户空间的代码更好。 - - -Sysfs 中的路径 --------------- - -在/sys/class/gpio 中有 3 类入口: - - - 用于在用户空间控制 GPIO 的控制接口; - - - GPIOs 本身;以及 - - - GPIO 控制器 ("gpio_chip" 实例)。 - -除了这些标准的文件,还包含“device”符号链接。 - -控制接口是只写的: - - /sys/class/gpio/ - - "export" ... 用户空间可以通过写其编号到这个文件,要求内核导出 - 一个 GPIO 的控制到用户空间。 - - 例如: 如果内核代码没有申请 GPIO #19,"echo 19 > export" - 将会为 GPIO #19 创建一个 "gpio19" 节点。 - - "unexport" ... 导出到用户空间的逆操作。 - - 例如: "echo 19 > unexport" 将会移除使用"export"文件导出的 - "gpio19" 节点。 - -GPIO 信号的路径类似 /sys/class/gpio/gpio42/ (对于 GPIO #42 来说), -并有如下的读/写属性: - - /sys/class/gpio/gpioN/ - - "direction" ... 读取得到 "in" 或 "out"。这个值通常运行写入。 - 写入"out" 时,其引脚的默认输出为低电平。为了确保无故障运行, - "low" 或 "high" 的电平值应该写入 GPIO 的配置,作为初始输出值。 - - 注意:如果内核不支持改变 GPIO 的方向,或者在导出时内核代码没有 - 明确允许用户空间可以重新配置 GPIO 方向,那么这个属性将不存在。 - - "value" ... 读取得到 0 (低电平) 或 1 (高电平)。如果 GPIO 配置为 - 输出,这个值允许写操作。任何非零值都以高电平看待。 - - 如果引脚可以配置为中断信号,且如果已经配置了产生中断的模式 - (见"edge"的描述),你可以对这个文件使用轮询操作(poll(2)), - 且轮询操作会在任何中断触发时返回。如果你使用轮询操作(poll(2)), - 请在 events 中设置 POLLPRI 和 POLLERR。如果你使用轮询操作 - (select(2)),请在 exceptfds 设置你期望的文件描述符。在 - 轮询操作(poll(2))返回之后,既可以通过 lseek(2)操作读取 - sysfs 文件的开始部分,也可以关闭这个文件并重新打开它来读取数据。 - - "edge" ... 读取得到“none”、“rising”、“falling”或者“both”。 - 将这些字符串写入这个文件可以选择沿触发模式,会使得轮询操作 - (select(2))在"value"文件中返回。 - - 这个文件仅有在这个引脚可以配置为可产生中断输入引脚时,才存在。 - - "active_low" ... 读取得到 0 (假) 或 1 (真)。写入任何非零值可以 - 翻转这个属性的(读写)值。已存在或之后通过"edge"属性设置了"rising" - 和 "falling" 沿触发模式的轮询操作(poll(2))将会遵循这个设置。 - -GPIO 控制器的路径类似 /sys/class/gpio/gpiochip42/ (对于从#42 GPIO -开始实现控制的控制器),并有着以下只读属性: - - /sys/class/gpio/gpiochipN/ - - "base" ... 与以上的 N 相同,代表此芯片管理的第一个 GPIO 的编号 - - "label" ... 用于诊断 (并不总是只有唯一值) - - "ngpio" ... 此控制器所管理的 GPIO 数量(而 GPIO 编号从 N 到 - N + ngpio - 1) - -大多数情况下,电路板的文档应当标明每个 GPIO 的使用目的。但是那些编号并不总是 -固定的,例如在扩展卡上的 GPIO会根据所使用的主板或所在堆叠架构中其他的板子而 -有所不同。在这种情况下,你可能需要使用 gpiochip 节点(尽可能地结合电路图)来 -确定给定信号所用的 GPIO 编号。 - - -API参考 -======= - -本节中列出的函数已被废弃。在新的代码中应该使用基于GPIO描述符的API。 diff --git a/Documentation/translations/zh_TW/gpio.txt b/Documentation/translations/zh_TW/gpio.txt deleted file mode 100644 index 77d69d381316..000000000000 --- a/Documentation/translations/zh_TW/gpio.txt +++ /dev/null @@ -1,574 +0,0 @@ -Chinese translated version of Documentation/admin-guide/gpio - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Maintainer: Grant Likely - Linus Walleij -Traditional Chinese maintainer: Hu Haowen <2023002089@link.tyut.edu.cn> ---------------------------------------------------------------------- -Documentation/admin-guide/gpio 的繁體中文翻譯 - -如果想評論或更新本文的內容,請直接聯繫原文檔的維護者。如果你使用英文 -交流有困難的話,也可以向繁體中文版維護者求助。如果本翻譯更新不及時或 -者翻譯存在問題,請聯繫繁體中文版維護者。 - -英文版維護者: Grant Likely - Linus Walleij -繁體中文版維護者: 胡皓文 Hu Haowen <2023002089@link.tyut.edu.cn> -繁體中文版翻譯者: 胡皓文 Hu Haowen <2023002089@link.tyut.edu.cn> -繁體中文版校譯者: 胡皓文 Hu Haowen <2023002089@link.tyut.edu.cn> - -以下爲正文 ---------------------------------------------------------------------- -GPIO 接口 - -本文檔提供了一個在Linux下訪問GPIO的公約概述。 - -這些函數以 gpio_* 作爲前綴。其他的函數不允許使用這樣的前綴或相關的 -__gpio_* 前綴。 - - -什麼是GPIO? -========== -"通用輸入/輸出口"(GPIO)是一個靈活的由軟體控制的數位訊號。他們可 -由多種晶片提供,且對於從事嵌入式和定製硬體的 Linux 開發者來說是 -比較熟悉。每個GPIO 都代表一個連接到特定引腳或球柵陣列(BGA)封裝中 -「球珠」的一個位。電路板原理圖顯示了 GPIO 與外部硬體的連接關係。 -驅動可以編寫成通用代碼,以使板級啓動代碼可傳遞引腳配置數據給驅動。 - -片上系統 (SOC) 處理器對 GPIO 有很大的依賴。在某些情況下,每個 -非專用引腳都可配置爲 GPIO,且大多數晶片都最少有一些 GPIO。 -可編程邏輯器件(類似 FPGA) 可以方便地提供 GPIO。像電源管理和 -音頻編解碼器這樣的多功能晶片經常留有一些這樣的引腳來幫助那些引腳 -匱乏的 SOC。同時還有通過 I2C 或 SPI 串行總線連接的「GPIO擴展器」 -晶片。大多數 PC 的南橋有一些擁有 GPIO 能力的引腳 (只有BIOS -固件才知道如何使用他們)。 - -GPIO 的實際功能因系統而異。通常用法有: - - - 輸出值可寫 (高電平=1,低電平=0)。一些晶片也有如何驅動這些值的選項, - 例如只允許輸出一個值、支持「線與」及其他取值類似的模式(值得注意的是 - 「開漏」信號) - - - 輸入值可讀(1、0)。一些晶片支持引腳在配置爲「輸出」時回讀,這對於類似 - 「線與」的情況(以支持雙向信號)是非常有用的。GPIO 控制器可能有輸入 - 去毛刺/消抖邏輯,這有時需要軟體控制。 - - - 輸入通常可作爲 IRQ 信號,一般是沿觸發,但有時是電平觸發。這樣的 IRQ - 可能配置爲系統喚醒事件,以將系統從低功耗狀態下喚醒。 - - - 通常一個 GPIO 根據不同產品電路板的需求,可以配置爲輸入或輸出,也有僅 - 支持單向的。 - - - 大部分 GPIO 可以在持有自旋鎖時訪問,但是通常由串行總線擴展的 GPIO - 不允許持有自旋鎖。但某些系統也支持這種類型。 - -對於給定的電路板,每個 GPIO 都用於某個特定的目的,如監控 MMC/SD 卡的 -插入/移除、檢測卡的防寫狀態、驅動 LED、配置收發器、模擬串行總線、 -復位硬體看門狗、感知開關狀態等等。 - - -GPIO 公約 -========= -注意,這個叫做「公約」,因爲這不是強制性的,不遵循這個公約是無傷大雅的, -因爲此時可移植性並不重要。GPIO 常用於板級特定的電路邏輯,甚至可能 -隨著電路板的版本而改變,且不可能在不同走線的電路板上使用。僅有在少數 -功能上才具有可移植性,其他功能是平台特定。這也是由於「膠合」的邏輯造成的。 - -此外,這不需要任何的執行框架,只是一個接口。某個平台可能通過一個簡單地 -訪問晶片寄存器的內聯函數來實現它,其他平台可能通過委託一系列不同的GPIO -控制器的抽象函數來實現它。(有一些可選的代碼能支持這種策略的實現,本文檔 -後面會介紹,但作爲 GPIO 接口的客戶端驅動程序必須與它的實現無關。) - -也就是說,如果在他們的平台上支持這個公約,驅動應儘可能的使用它。同時,平台 -必須在 Kconfig 中選擇 ARCH_REQUIRE_GPIOLIB 或者 ARCH_WANT_OPTIONAL_GPIOLIB -選項。那些調用標準 GPIO 函數的驅動應該在 Kconfig 入口中聲明依賴GENERIC_GPIO。 -當驅動包含文件: - - #include - -則 GPIO 函數是可用,無論是「真實代碼」還是經優化過的語句。如果你遵守 -這個公約,當你的代碼完成後,對其他的開發者來說會更容易看懂和維護。 - -注意,這些操作包含所用平台的 I/O 屏障代碼,驅動無須顯式地調用他們。 - - -標識 GPIO ---------- -GPIO 是通過無符號整型來標識的,範圍是 0 到 MAX_INT。保留「負」數 -用於其他目的,例如標識信號「在這個板子上不可用」或指示錯誤。未接觸底層 -硬體的代碼會忽略這些整數。 - -平台會定義這些整數的用法,且通常使用 #define 來定義 GPIO,這樣 -板級特定的啓動代碼可以直接關聯相應的原理圖。相對來說,驅動應該僅使用 -啓動代碼傳遞過來的 GPIO 編號,使用 platform_data 保存板級特定 -引腳配置數據 (同時還有其他須要的板級特定數據),避免可能出現的問題。 - -例如一個平台使用編號 32-159 來標識 GPIO,而在另一個平台使用編號0-63 -標識一組 GPIO 控制器,64-79標識另一類 GPIO 控制器,且在一個含有 -FPGA 的特定板子上使用 80-95。編號不一定要連續,那些平台中,也可以 -使用編號2000-2063來標識一個 I2C 接口的 GPIO 擴展器中的 GPIO。 - -如果你要初始化一個帶有無效 GPIO 編號的結構體,可以使用一些負編碼 -(如"-EINVAL"),那將使其永遠不會是有效。來測試這樣一個結構體中的編號 -是否關聯一個 GPIO,你可使用以下斷言: - - int gpio_is_valid(int number); - -如果編號不存在,則請求和釋放 GPIO 的函數將拒絕執行相關操作(見下文)。 -其他編號也可能被拒絕,比如一個編號可能存在,但暫時在給定的電路上不可用。 - -一個平台是否支持多個 GPIO 控制器爲平台特定的實現問題,就像是否可以 -在 GPIO 編號空間中有「空洞」和是否可以在運行時添加新的控制器一樣。 -這些問題會影響其他事情,包括相鄰的 GPIO 編號是否存在等。 - -使用 GPIO ---------- -對於一個 GPIO,系統應該做的第一件事情就是通過 gpio_request() -函數分配它,見下文。 - -接下來是設置I/O方向,這通常是在板級啓動代碼中爲所使用的 GPIO 設置 -platform_device 時完成。 - - /* 設置爲輸入或輸出, 返回 0 或負的錯誤代碼 */ - int gpio_direction_input(unsigned gpio); - int gpio_direction_output(unsigned gpio, int value); - -返回值爲零代表成功,否則返回一個負的錯誤代碼。這個返回值需要檢查,因爲 -get/set(獲取/設置)函數調用沒法返回錯誤,且有可能是配置錯誤。通常, -你應該在進程上下文中調用這些函數。然而,對於自旋鎖安全的 GPIO,在板子 -啓動的早期、進程啓動前使用他們也是可以的。 - -對於作爲輸出的 GPIO,爲其提供初始輸出值,對於避免在系統啓動期間出現 -信號毛刺是很有幫助的。 - -爲了與傳統的 GPIO 接口兼容, 在設置一個 GPIO 方向時,如果它還未被申請, -則隱含了申請那個 GPIO 的操作(見下文)。這種兼容性正在從可選的 gpiolib -框架中移除。 - -如果這個 GPIO 編碼不存在,或者特定的 GPIO 不能用於那種模式,則方向 -設置可能失敗。依賴啓動固件來正確地設置方向通常是一個壞主意,因爲它可能 -除了啓動Linux,並沒有做更多的驗證工作。(同理, 板子的啓動代碼可能需要 -將這個復用的引腳設置爲 GPIO,並正確地配置上拉/下拉電阻。) - - -訪問自旋鎖安全的 GPIO -------------------- -大多數 GPIO 控制器可以通過內存讀/寫指令來訪問。這些指令不會休眠,可以 -安全地在硬(非線程)中斷例程和類似的上下文中完成。 - -對於那些 GPIO,使用以下的函數訪問: - - /* GPIO 輸入:返回零或非零 */ - int gpio_get_value(unsigned gpio); - - /* GPIO 輸出 */ - void gpio_set_value(unsigned gpio, int value); - -GPIO值是布爾值,零表示低電平,非零表示高電平。當讀取一個輸出引腳的值時, -返回值應該是引腳上的值。這個值不總是和輸出值相符,因爲存在開漏輸出信號和 -輸出延遲問題。 - -以上的 get/set 函數無錯誤返回值,因爲之前 gpio_direction_*()應已檢查過 -其是否爲「無效GPIO」。此外,還需要注意的是並不是所有平台都可以從輸出引腳 -中讀取數據,對於不能讀取的引腳應總返回零。另外,對那些在原子上下文中無法 -安全訪問的 GPIO (譯者註:因爲訪問可能導致休眠)使用這些函數是不合適的 -(見下文)。 - -在 GPIO 編號(還有輸出、值)爲常數的情況下,鼓勵通過平台特定的實現來優化 -這兩個函數來訪問 GPIO 值。這種情況(讀寫一個硬體寄存器)下只需要幾條指令 -是很正常的,且無須自旋鎖。這種優化函數比起那些在子程序上花費許多指令的 -函數可以使得模擬接口(譯者注:例如 GPIO 模擬 I2C、1-wire 或 SPI)的 -應用(在空間和時間上都)更具效率。 - - -訪問可能休眠的 GPIO ------------------ -某些 GPIO 控制器必須通過基於總線(如 I2C 或 SPI)的消息訪問。讀或寫這些 -GPIO 值的命令需要等待其信息排到隊首才發送命令,再獲得其反饋。期間需要 -休眠,這不能在 IRQ 例程(中斷上下文)中執行。 - -爲了訪問這種 GPIO,內核定義了一套不同的函數: - - /* GPIO 輸入:返回零或非零 ,可能會休眠 */ - int gpio_get_value_cansleep(unsigned gpio); - - /* GPIO 輸出,可能會休眠 */ - void gpio_set_value_cansleep(unsigned gpio, int value); - -訪問這樣的 GPIO 需要一個允許休眠的上下文,例如線程 IRQ 處理例程,並用以上的 -訪問函數替換那些沒有 cansleep()後綴的自旋鎖安全訪問函數。 - -除了這些訪問函數可能休眠,且它們操作的 GPIO 不能在硬體 IRQ 處理例程中訪問的 -事實,這些處理例程實際上和自旋鎖安全的函數是一樣的。 - -** 除此之外 ** 調用設置和配置此類 GPIO 的函數也必須在允許休眠的上下文中, -因爲它們可能也需要訪問 GPIO 控制器晶片: (這些設置函數通常在板級啓動代碼或者 -驅動探測/斷開代碼中,所以這是一個容易滿足的約束條件。) - - gpio_direction_input() - gpio_direction_output() - gpio_request() - -## gpio_request_one() - - gpio_free() - - -聲明和釋放 GPIO ----------------------------- -爲了有助於捕獲系統配置錯誤,定義了兩個函數。 - - /* 申請 GPIO, 返回 0 或負的錯誤代碼. - * 非空標籤可能有助於診斷. - */ - int gpio_request(unsigned gpio, const char *label); - - /* 釋放之前聲明的 GPIO */ - void gpio_free(unsigned gpio); - -將無效的 GPIO 編碼傳遞給 gpio_request()會導致失敗,申請一個已使用這個 -函數聲明過的 GPIO 也會失敗。gpio_request()的返回值必須檢查。你應該在 -進程上下文中調用這些函數。然而,對於自旋鎖安全的 GPIO,在板子啓動的早期、 -進入進程之前是可以申請的。 - -這個函數完成兩個基本的目標。一是標識那些實際上已作爲 GPIO 使用的信號線, -這樣便於更好地診斷;系統可能需要服務幾百個可用的 GPIO,但是對於任何一個 -給定的電路板通常只有一些被使用。另一個目的是捕獲衝突,查明錯誤:如兩個或 -更多驅動錯誤地認爲他們已經獨占了某個信號線,或是錯誤地認爲移除一個管理著 -某個已激活信號的驅動是安全的。也就是說,申請 GPIO 的作用類似一種鎖機制。 - -某些平台可能也使用 GPIO 作爲電源管理激活信號(例如通過關閉未使用晶片區和 -簡單地關閉未使用時鐘)。 - -對於 GPIO 使用 pinctrl 子系統已知的引腳,子系統應該被告知其使用情況; -一個 gpiolib 驅動的 .request()操作應調用 pinctrl_gpio_request(), -而 gpiolib 驅動的 .free()操作應調用 pinctrl_gpio_free()。pinctrl -子系統允許 pinctrl_gpio_request()在某個引腳或引腳組以復用形式「屬於」 -一個設備時都成功返回。 - -任何須將 GPIO 信號導向適當引腳的引腳復用硬體的編程應該發生在 GPIO -驅動的 .direction_input()或 .direction_output()函數中,以及 -任何輸出 GPIO 值的設置之後。這樣可使從引腳特殊功能到 GPIO 的轉換 -不會在引腳產生毛刺波形。有時當用一個 GPIO 實現其信號驅動一個非 GPIO -硬體模塊的解決方案時,就需要這種機制。 - -某些平台允許部分或所有 GPIO 信號使用不同的引腳。類似的,GPIO 或引腳的 -其他方面也需要配置,如上拉/下拉。平台軟體應該在對這些 GPIO 調用 -gpio_request()前將這類細節配置好,例如使用 pinctrl 子系統的映射表, -使得 GPIO 的用戶無須關注這些細節。 - -還有一個值得注意的是在釋放 GPIO 前,你必須停止使用它。 - - -注意:申請一個 GPIO 並沒有以任何方式配置它,只不過標識那個 GPIO 處於使用 -狀態。必須有另外的代碼來處理引腳配置(如控制 GPIO 使用的引腳、上拉/下拉)。 -考慮到大多數情況下聲明 GPIO 之後就會立即配置它們,所以定義了以下三個輔助函數: - - /* 申請一個 GPIO 信號, 同時通過特定的'flags'初始化配置, - * 其他和 gpio_request()的參數和返回值相同 - * - */ - int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); - -這裡 'flags' 當前定義可指定以下屬性: - - * GPIOF_DIR_IN - 配置方向爲輸入 - * GPIOF_DIR_OUT - 配置方向爲輸出 - - * GPIOF_INIT_LOW - 在作爲輸出時,初始值爲低電平 - * GPIOF_INIT_HIGH - 在作爲輸出時,初始值爲高電平 - -因爲 GPIOF_INIT_* 僅有在配置爲輸出的時候才存在,所以有效的組合爲: - - * GPIOF_IN - 配置爲輸入 - * GPIOF_OUT_INIT_LOW - 配置爲輸出,並初始化爲低電平 - * GPIOF_OUT_INIT_HIGH - 配置爲輸出,並初始化爲高電平 - -更進一步,爲了更簡單地聲明/釋放多個 GPIO,'struct gpio'被引進來封裝所有 -這三個領域: - - struct gpio { - unsigned gpio; - unsigned long flags; - const char *label; - }; - -一個典型的用例: - - static struct gpio leds_gpios[] = { - { 32, GPIOF_OUT_INIT_HIGH, "Power LED" }, /* 默認開啓 */ - { 33, GPIOF_OUT_INIT_LOW, "Green LED" }, /* 默認關閉 */ - { 34, GPIOF_OUT_INIT_LOW, "Red LED" }, /* 默認關閉 */ - { 35, GPIOF_OUT_INIT_LOW, "Blue LED" }, /* 默認關閉 */ - { ... }, - }; - - err = gpio_request_one(31, GPIOF_IN, "Reset Button"); - if (err) - ... - - -GPIO 映射到 IRQ --------------------- -GPIO 編號是無符號整數;IRQ 編號也是。這些構成了兩個邏輯上不同的命名空間 -(GPIO 0 不一定使用 IRQ 0)。你可以通過以下函數在它們之間實現映射: - - /* 映射 GPIO 編號到 IRQ 編號 */ - int gpio_to_irq(unsigned gpio); - -它們的返回值爲對應命名空間的相關編號,或是負的錯誤代碼(如果無法映射)。 -(例如,某些 GPIO 無法做爲 IRQ 使用。)以下的編號錯誤是未經檢測的:使用一個 -未通過 gpio_direction_input()配置爲輸入的 GPIO 編號,或者使用一個 -並非來源於gpio_to_irq()的 IRQ 編號。 - -這兩個映射函數可能會在信號編號的加減計算過程上花些時間。它們不可休眠。 - -gpio_to_irq()返回的非錯誤值可以傳遞給 request_irq()或者 free_irq()。 -它們通常通過板級特定的初始化代碼存放到平台設備的 IRQ 資源中。注意:IRQ -觸發選項是 IRQ 接口的一部分,如 IRQF_TRIGGER_FALLING,系統喚醒能力 -也是如此。 - - -模擬開漏信號 ----------------------------- -有時在只有低電平信號作爲實際驅動結果(譯者注:多個輸出連接於一點,邏輯電平 -結果爲所有輸出的邏輯與)的時候,共享的信號線需要使用「開漏」信號。(該術語 -適用於 CMOS 管;而 TTL 用「集電極開路」。)一個上拉電阻使信號爲高電平。這 -有時被稱爲「線與」。實際上,從負邏輯(低電平爲真)的角度來看,這是一個「線或」。 - -一個開漏信號的常見例子是共享的低電平使能 IRQ 信號線。此外,有時雙向數據總線 -信號也使用漏極開路信號。 - -某些 GPIO 控制器直接支持開漏輸出,還有許多不支持。當你需要開漏信號,但 -硬體又不直接支持的時候,一個常用的方法是用任何即可作輸入也可作輸出的 GPIO -引腳來模擬: - - LOW: gpio_direction_output(gpio, 0) ... 這代碼驅動信號並覆蓋 - 上拉配置。 - - HIGH: gpio_direction_input(gpio) ... 這代碼關閉輸出,所以上拉電阻 - (或其他的一些器件)控制了信號。 - -如果你將信號線「驅動」爲高電平,但是 gpio_get_value(gpio)報告了一個 -低電平(在適當的上升時間後),你就可以知道是其他的一些組件將共享信號線拉低了。 -這不一定是錯誤的。一個常見的例子就是 I2C 時鐘的延長:一個需要較慢時鐘的 -從設備延遲 SCK 的上升沿,而 I2C 主設備相應地調整其信號傳輸速率。 - - -這些公約忽略了什麼? -================ -這些公約忽略的最大一件事就是引腳復用,因爲這屬於高度晶片特定的屬性且 -沒有可移植性。某個平台可能不需要明確的復用信息;有的對於任意給定的引腳 -可能只有兩個功能選項;有的可能每個引腳有八個功能選項;有的可能可以將 -幾個引腳中的任何一個作爲給定的 GPIO。(是的,這些例子都來自於當前運行 -Linux 的系統。) - -在某些系統中,與引腳復用相關的是配置和使能集成的上、下拉模式。並不是所有 -平台都支持這種模式,或者不會以相同的方式來支持這種模式;且任何給定的電路板 -可能使用外置的上拉(或下拉)電阻,這時晶片上的就不應該使用。(當一個電路需要 -5kOhm 的拉動電阻,晶片上的 100 kOhm 電阻就不能做到。)同樣的,驅動能力 -(2 mA vs 20 mA)和電壓(1.8V vs 3.3V)是平台特定問題,就像模型一樣在 -可配置引腳和 GPIO 之間(沒)有一一對應的關係。 - -還有其他一些系統特定的機制沒有在這裡指出,例如上述的輸入去毛刺和線與輸出 -選項。硬體可能支持批量讀或寫 GPIO,但是那一般是配置相關的:對於處於同一 -塊區(bank)的GPIO。(GPIO 通常以 16 或 32 個組成一個區塊,一個給定的 -片上系統一般有幾個這樣的區塊。)某些系統可以通過輸出 GPIO 觸發 IRQ, -或者從並非以 GPIO 管理的引腳取值。這些機制的相關代碼沒有必要具有可移植性。 - -當前,動態定義 GPIO 並不是標準的,例如作爲配置一個帶有某些 GPIO 擴展器的 -附加電路板的副作用。 - -GPIO 實現者的框架 (可選) -===================== -前面提到了,有一個可選的實現框架,讓平台使用相同的編程接口,更加簡單地支持 -不同種類的 GPIO 控制器。這個框架稱爲"gpiolib"。 - -作爲一個輔助調試功能,如果 debugfs 可用,就會有一個 /sys/kernel/debug/gpio -文件。通過這個框架,它可以列出所有註冊的控制器,以及當前正在使用中的 GPIO -的狀態。 - - -控制器驅動: gpio_chip -------------------- -在框架中每個 GPIO 控制器都包裝爲一個 "struct gpio_chip",他包含了 -該類型的每個控制器的常用信息: - - - 設置 GPIO 方向的方法 - - 用於訪問 GPIO 值的方法 - - 告知調用其方法是否可能休眠的標誌 - - 可選的 debugfs 信息導出方法 (顯示類似上拉配置一樣的額外狀態) - - 診斷標籤 - -也包含了來自 device.platform_data 的每個實例的數據:它第一個 GPIO 的 -編號和它可用的 GPIO 的數量。 - -實現 gpio_chip 的代碼應支持多控制器實例,這可能使用驅動模型。那些代碼要 -配置每個 gpio_chip,並發起gpiochip_add()。卸載一個 GPIO 控制器很少見, -但在必要的時候可以使用 gpiochip_remove()。 - -大部分 gpio_chip 是一個實例特定結構體的一部分,而並不將 GPIO 接口單獨 -暴露出來,比如編址、電源管理等。類似編解碼器這樣的晶片會有複雜的非 GPIO -狀態。 - -任何一個 debugfs 信息導出方法通常應該忽略還未申請作爲 GPIO 的信號線。 -他們可以使用 gpiochip_is_requested()測試,當這個 GPIO 已經申請過了 -就返回相關的標籤,否則返回 NULL。 - - -平台支持 -------- -爲了支持這個框架,一個平台的 Kconfig 文件將會 "select"(選擇) -ARCH_REQUIRE_GPIOLIB 或 ARCH_WANT_OPTIONAL_GPIOLIB,並讓它的 - 包含 ,同時定義二個方法: -gpio_get_value()、gpio_set_value()。 - -它也應提供一個 ARCH_NR_GPIOS 的定義值,這樣可以更好地反映該平台 GPIO -的實際數量,節省靜態表的空間。(這個定義值應該包含片上系統內建 GPIO 和 -GPIO 擴展器中的數據。) - -ARCH_REQUIRE_GPIOLIB 意味著 gpiolib 核心在這個構架中將總是編譯進內核。 - -ARCH_WANT_OPTIONAL_GPIOLIB 意味著 gpiolib 核心默認關閉,且用戶可以 -使能它,並將其編譯進內核(可選)。 - -如果這些選項都沒被選擇,該平台就不通過 GPIO-lib 支持 GPIO,且代碼不可以 -被用戶使能。 - -以下這些方法的實現可以直接使用框架代碼,並總是通過 gpio_chip 調度: - - #define gpio_get_value __gpio_get_value - #define gpio_set_value __gpio_set_value - -這些定義可以用更理想的實現方法替代,那就是使用經過邏輯優化的內聯函數來訪問 -基於特定片上系統的 GPIO。例如,若引用的 GPIO (寄存器位偏移)是常量「12」, -讀取或設置它可能只需少則兩或三個指令,且不會休眠。當這樣的優化無法實現時, -那些函數必須使用框架提供的代碼,那就至少要幾十條指令才可以實現。對於用 GPIO -模擬的 I/O 接口, 如此精簡指令是很有意義的。 - -對於片上系統,平台特定代碼爲片上 GPIO 每個區(bank)定義並註冊 gpio_chip -實例。那些 GPIO 應該根據晶片廠商的文檔進行編碼/標籤,並直接和電路板原理圖 -對應。他們應該開始於零並終止於平台特定的限制。這些 GPIO(代碼)通常從 -arch_initcall()或者更早的地方集成進平台初始化代碼,使這些 GPIO 總是可用, -且他們通常可以作爲 IRQ 使用。 - -板級支持 -------- -對於外部 GPIO 控制器(例如 I2C 或 SPI 擴展器、專用晶片、多功能器件、FPGA -或 CPLD),大多數常用板級特定代碼都可以註冊控制器設備,並保證他們的驅動知道 -gpiochip_add()所使用的 GPIO 編號。他們的起始編號通常跟在平台特定的 GPIO -編號之後。 - -例如板級啓動代碼應該創建結構體指明晶片公開的 GPIO 範圍,並使用 platform_data -將其傳遞給每個 GPIO 擴展器晶片。然後晶片驅動中的 probe()例程可以將這個 -數據傳遞給 gpiochip_add()。 - -初始化順序很重要。例如,如果一個設備依賴基於 I2C 的(擴展)GPIO,那麼它的 -probe()例程就應該在那個 GPIO 有效以後才可以被調用。這意味著設備應該在 -GPIO 可以工作之後才可被註冊。解決這類依賴的的一種方法是讓這種 gpio_chip -控制器向板級特定代碼提供 setup()和 teardown()回調函數。一旦所有必須的 -資源可用之後,這些板級特定的回調函數將會註冊設備,並可以在這些 GPIO 控制器 -設備變成無效時移除它們。 - - -用戶空間的 Sysfs 接口(可選) -======================== -使用「gpiolib」實現框架的平台可以選擇配置一個 GPIO 的 sysfs 用戶接口。 -這不同於 debugfs 接口,因爲它提供的是對 GPIO方向和值的控制,而不只顯示 -一個GPIO 的狀態摘要。此外,它可以出現在沒有調試支持的產品級系統中。 - -例如,通過適當的系統硬體文檔,用戶空間可以知道 GIOP #23 控制 Flash -存儲器的防寫(用於保護其中 Bootloader 分區)。產品的系統升級可能需要 -臨時解除這個保護:首先導入一個 GPIO,改變其輸出狀態,然後在重新使能防寫 -前升級代碼。通常情況下,GPIO #23 是不會被觸及的,並且內核也不需要知道他。 - -根據適當的硬體文檔,某些系統的用戶空間 GPIO 可以用於確定系統配置數據, -這些數據是標準內核不知道的。在某些任務中,簡單的用戶空間 GPIO 驅動可能是 -系統真正需要的。 - -注意:標準內核驅動中已經存在通用的「LED 和按鍵」GPIO 任務,分別是: -"leds-gpio" 和 "gpio_keys"。請使用這些來替代直接訪問 GPIO,因爲集成在 -內核框架中的這類驅動比你在用戶空間的代碼更好。 - - -Sysfs 中的路徑 --------------- -在/sys/class/gpio 中有 3 類入口: - - - 用於在用戶空間控制 GPIO 的控制接口; - - - GPIOs 本身;以及 - - - GPIO 控制器 ("gpio_chip" 實例)。 - -除了這些標準的文件,還包含「device」符號連結。 - -控制接口是只寫的: - - /sys/class/gpio/ - - "export" ... 用戶空間可以通過寫其編號到這個文件,要求內核導出 - 一個 GPIO 的控制到用戶空間。 - - 例如: 如果內核代碼沒有申請 GPIO #19,"echo 19 > export" - 將會爲 GPIO #19 創建一個 "gpio19" 節點。 - - "unexport" ... 導出到用戶空間的逆操作。 - - 例如: "echo 19 > unexport" 將會移除使用"export"文件導出的 - "gpio19" 節點。 - -GPIO 信號的路徑類似 /sys/class/gpio/gpio42/ (對於 GPIO #42 來說), -並有如下的讀/寫屬性: - - /sys/class/gpio/gpioN/ - - "direction" ... 讀取得到 "in" 或 "out"。這個值通常運行寫入。 - 寫入"out" 時,其引腳的默認輸出爲低電平。爲了確保無故障運行, - "low" 或 "high" 的電平值應該寫入 GPIO 的配置,作爲初始輸出值。 - - 注意:如果內核不支持改變 GPIO 的方向,或者在導出時內核代碼沒有 - 明確允許用戶空間可以重新配置 GPIO 方向,那麼這個屬性將不存在。 - - "value" ... 讀取得到 0 (低電平) 或 1 (高電平)。如果 GPIO 配置爲 - 輸出,這個值允許寫操作。任何非零值都以高電平看待。 - - 如果引腳可以配置爲中斷信號,且如果已經配置了產生中斷的模式 - (見"edge"的描述),你可以對這個文件使用輪詢操作(poll(2)), - 且輪詢操作會在任何中斷觸發時返回。如果你使用輪詢操作(poll(2)), - 請在 events 中設置 POLLPRI 和 POLLERR。如果你使用輪詢操作 - (select(2)),請在 exceptfds 設置你期望的文件描述符。在 - 輪詢操作(poll(2))返回之後,既可以通過 lseek(2)操作讀取 - sysfs 文件的開始部分,也可以關閉這個文件並重新打開它來讀取數據。 - - "edge" ... 讀取得到「none」、「rising」、「falling」或者「both」。 - 將這些字符串寫入這個文件可以選擇沿觸發模式,會使得輪詢操作 - (select(2))在"value"文件中返回。 - - 這個文件僅有在這個引腳可以配置爲可產生中斷輸入引腳時,才存在。 - - "active_low" ... 讀取得到 0 (假) 或 1 (真)。寫入任何非零值可以 - 翻轉這個屬性的(讀寫)值。已存在或之後通過"edge"屬性設置了"rising" - 和 "falling" 沿觸發模式的輪詢操作(poll(2))將會遵循這個設置。 - -GPIO 控制器的路徑類似 /sys/class/gpio/gpiochip42/ (對於從#42 GPIO -開始實現控制的控制器),並有著以下只讀屬性: - - /sys/class/gpio/gpiochipN/ - - "base" ... 與以上的 N 相同,代表此晶片管理的第一個 GPIO 的編號 - - "label" ... 用於診斷 (並不總是只有唯一值) - - "ngpio" ... 此控制器所管理的 GPIO 數量(而 GPIO 編號從 N 到 - N + ngpio - 1) - -大多數情況下,電路板的文檔應當標明每個 GPIO 的使用目的。但是那些編號並不總是 -固定的,例如在擴展卡上的 GPIO會根據所使用的主板或所在堆疊架構中其他的板子而 -有所不同。在這種情況下,你可能需要使用 gpiochip 節點(儘可能地結合電路圖)來 -確定給定信號所用的 GPIO 編號。 diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 56ac7e7a2889..063f71b18a7c 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * + * NOTE: This header *must not* be included. * * This is the LEGACY GPIO bulk include file, including legacy APIs. It is * used for GPIO drivers still referencing the global GPIO numberspace, @@ -16,8 +16,6 @@ struct device; -/* see Documentation/driver-api/gpio/legacy.rst */ - /* make these flag values available regardless of GPIO kconfig options */ #define GPIOF_DIR_OUT (0 << 0) #define GPIOF_DIR_IN (1 << 0) @@ -121,8 +119,6 @@ static inline int gpio_to_irq(unsigned gpio) int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); -/* CONFIG_GPIOLIB: bindings for managed devices that want to request gpios */ - int devm_gpio_request(struct device *dev, unsigned gpio, const char *label); int devm_gpio_request_one(struct device *dev, unsigned gpio, unsigned long flags, const char *label); -- cgit v1.2.3 From ca3a91063acc3abc0fb233591d8cda4b37dc39ac Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 5 May 2024 23:10:33 +0300 Subject: dt-bindings: clock: qcom: Add SM7150 DISPCC clocks Add device tree bindings for the display clock controller on Qualcomm SM7150 platform. Co-developed-by: David Wronek Signed-off-by: David Wronek Signed-off-by: Danila Tikhonov Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20240505201038.276047-4-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,sm7150-dispcc.yaml | 75 ++++++++++++++++++++++ include/dt-bindings/clock/qcom,sm7150-dispcc.h | 59 +++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml create mode 100644 include/dt-bindings/clock/qcom,sm7150-dispcc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml new file mode 100644 index 000000000000..b8d6e1d05ce2 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-dispcc.yaml @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sm7150-dispcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Display Clock & Reset Controller for SM7150 + +maintainers: + - Danila Tikhonov + - David Wronek + - Jens Reidel + +description: | + Qualcomm display clock control module provides the clocks, resets and power + domains on SM7150. + + See also:: include/dt-bindings/clock/qcom,sm7150-dispcc.h + +properties: + compatible: + const: qcom,sm7150-dispcc + + clocks: + items: + - description: Board XO source + - description: Board Always On XO source + - description: GPLL0 source from GCC + - description: Sleep clock source + - description: Byte clock from MDSS DSI PHY0 + - description: Pixel clock from MDSS DSI PHY0 + - description: Byte clock from MDSS DSI PHY1 + - description: Pixel clock from MDSS DSI PHY1 + - description: Link clock from DP PHY + - description: VCO DIV clock from DP PHY + + power-domains: + maxItems: 1 + description: + CX power domain. + +required: + - compatible + - clocks + - power-domains + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + clock-controller@af00000 { + compatible = "qcom,sm7150-dispcc"; + reg = <0x0af00000 0x200000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&gcc GCC_DISP_GPLL0_CLK_SRC>, + <&sleep_clk>, + <&mdss_dsi0_phy 0>, + <&mdss_dsi0_phy 1>, + <&mdss_dsi1_phy 0>, + <&mdss_dsi1_phy 1>, + <&dp_phy 0>, + <&dp_phy 1>; + power-domains = <&rpmhpd RPMHPD_CX>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,sm7150-dispcc.h b/include/dt-bindings/clock/qcom,sm7150-dispcc.h new file mode 100644 index 000000000000..fc1fefe8fd72 --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm7150-dispcc.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Danila Tikhonov + * Copyright (c) 2024, David Wronek + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_DISPCC_SM7150_H +#define _DT_BINDINGS_CLK_QCOM_DISPCC_SM7150_H + +/* DISPCC clock registers */ +#define DISPCC_PLL0 0 +#define DISPCC_MDSS_AHB_CLK 1 +#define DISPCC_MDSS_AHB_CLK_SRC 2 +#define DISPCC_MDSS_BYTE0_CLK 3 +#define DISPCC_MDSS_BYTE0_CLK_SRC 4 +#define DISPCC_MDSS_BYTE0_DIV_CLK_SRC 5 +#define DISPCC_MDSS_BYTE0_INTF_CLK 6 +#define DISPCC_MDSS_BYTE1_CLK 7 +#define DISPCC_MDSS_BYTE1_CLK_SRC 8 +#define DISPCC_MDSS_BYTE1_DIV_CLK_SRC 9 +#define DISPCC_MDSS_BYTE1_INTF_CLK 10 +#define DISPCC_MDSS_DP_AUX_CLK 11 +#define DISPCC_MDSS_DP_AUX_CLK_SRC 12 +#define DISPCC_MDSS_DP_CRYPTO_CLK 13 +#define DISPCC_MDSS_DP_CRYPTO_CLK_SRC 14 +#define DISPCC_MDSS_DP_LINK_CLK 15 +#define DISPCC_MDSS_DP_LINK_CLK_SRC 16 +#define DISPCC_MDSS_DP_LINK_INTF_CLK 17 +#define DISPCC_MDSS_DP_PIXEL1_CLK 18 +#define DISPCC_MDSS_DP_PIXEL1_CLK_SRC 19 +#define DISPCC_MDSS_DP_PIXEL_CLK 20 +#define DISPCC_MDSS_DP_PIXEL_CLK_SRC 21 +#define DISPCC_MDSS_ESC0_CLK 22 +#define DISPCC_MDSS_ESC0_CLK_SRC 23 +#define DISPCC_MDSS_ESC1_CLK 24 +#define DISPCC_MDSS_ESC1_CLK_SRC 25 +#define DISPCC_MDSS_MDP_CLK 26 +#define DISPCC_MDSS_MDP_CLK_SRC 27 +#define DISPCC_MDSS_MDP_LUT_CLK 28 +#define DISPCC_MDSS_NON_GDSC_AHB_CLK 29 +#define DISPCC_MDSS_PCLK0_CLK 30 +#define DISPCC_MDSS_PCLK0_CLK_SRC 31 +#define DISPCC_MDSS_PCLK1_CLK 32 +#define DISPCC_MDSS_PCLK1_CLK_SRC 33 +#define DISPCC_MDSS_ROT_CLK 34 +#define DISPCC_MDSS_ROT_CLK_SRC 35 +#define DISPCC_MDSS_RSCC_AHB_CLK 36 +#define DISPCC_MDSS_RSCC_VSYNC_CLK 37 +#define DISPCC_MDSS_VSYNC_CLK 38 +#define DISPCC_MDSS_VSYNC_CLK_SRC 39 +#define DISPCC_XO_CLK_SRC 40 +#define DISPCC_SLEEP_CLK 41 +#define DISPCC_SLEEP_CLK_SRC 42 + +/* DISPCC GDSCR */ +#define MDSS_GDSC 0 + +#endif -- cgit v1.2.3 From 0fd2a048368ea99feccd7dfd6a5f42f6d011f10f Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 5 May 2024 23:10:35 +0300 Subject: dt-bindings: clock: qcom: Add SM7150 CAMCC clocks Add device tree bindings for the camera clock controller on Qualcomm SM7150 platform. Signed-off-by: Danila Tikhonov Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20240505201038.276047-6-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,sm7150-camcc.yaml | 60 +++++++++++ include/dt-bindings/clock/qcom,sm7150-camcc.h | 113 +++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml create mode 100644 include/dt-bindings/clock/qcom,sm7150-camcc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml new file mode 100644 index 000000000000..7be4b10c430c --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-camcc.yaml @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sm7150-camcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Camera Clock & Reset Controller on SM7150 + +maintainers: + - Danila Tikhonov + - David Wronek + - Jens Reidel + +description: | + Qualcomm camera clock control module provides the clocks, resets and power + domains on SM7150. + + See also:: include/dt-bindings/clock/qcom,sm7150-camcc.h + +properties: + compatible: + const: qcom,sm7150-camcc + + clocks: + items: + - description: Board XO source + - description: Board XO Active-Only source + - description: Sleep clock source + + power-domains: + maxItems: 1 + description: + CX power domain. + +required: + - compatible + - clocks + - power-domains + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + clock-controller@ad00000 { + compatible = "qcom,sm7150-camcc"; + reg = <0xad00000 0x10000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&sleep_clk>; + power-domains = <&rpmhpd RPMHPD_CX>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,sm7150-camcc.h b/include/dt-bindings/clock/qcom,sm7150-camcc.h new file mode 100644 index 000000000000..ce73ef0fe95d --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm7150-camcc.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Danila Tikhonov + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_CAMCC_SM7150_H +#define _DT_BINDINGS_CLK_QCOM_CAMCC_SM7150_H + +/* Hardware clocks */ +#define CAMCC_PLL0_OUT_EVEN 0 +#define CAMCC_PLL0_OUT_ODD 1 +#define CAMCC_PLL1_OUT_EVEN 2 +#define CAMCC_PLL2_OUT_EARLY 3 +#define CAMCC_PLL3_OUT_EVEN 4 +#define CAMCC_PLL4_OUT_EVEN 5 + +/* CAMCC clock registers */ +#define CAMCC_PLL0 6 +#define CAMCC_PLL1 7 +#define CAMCC_PLL2 8 +#define CAMCC_PLL2_OUT_AUX 9 +#define CAMCC_PLL2_OUT_MAIN 10 +#define CAMCC_PLL3 11 +#define CAMCC_PLL4 12 +#define CAMCC_BPS_AHB_CLK 13 +#define CAMCC_BPS_AREG_CLK 14 +#define CAMCC_BPS_AXI_CLK 15 +#define CAMCC_BPS_CLK 16 +#define CAMCC_BPS_CLK_SRC 17 +#define CAMCC_CAMNOC_AXI_CLK 18 +#define CAMCC_CAMNOC_AXI_CLK_SRC 19 +#define CAMCC_CAMNOC_DCD_XO_CLK 20 +#define CAMCC_CCI_0_CLK 21 +#define CAMCC_CCI_0_CLK_SRC 22 +#define CAMCC_CCI_1_CLK 23 +#define CAMCC_CCI_1_CLK_SRC 24 +#define CAMCC_CORE_AHB_CLK 25 +#define CAMCC_CPAS_AHB_CLK 26 +#define CAMCC_CPHY_RX_CLK_SRC 27 +#define CAMCC_CSI0PHYTIMER_CLK 28 +#define CAMCC_CSI0PHYTIMER_CLK_SRC 29 +#define CAMCC_CSI1PHYTIMER_CLK 30 +#define CAMCC_CSI1PHYTIMER_CLK_SRC 31 +#define CAMCC_CSI2PHYTIMER_CLK 32 +#define CAMCC_CSI2PHYTIMER_CLK_SRC 33 +#define CAMCC_CSI3PHYTIMER_CLK 34 +#define CAMCC_CSI3PHYTIMER_CLK_SRC 35 +#define CAMCC_CSIPHY0_CLK 36 +#define CAMCC_CSIPHY1_CLK 37 +#define CAMCC_CSIPHY2_CLK 38 +#define CAMCC_CSIPHY3_CLK 39 +#define CAMCC_FAST_AHB_CLK_SRC 40 +#define CAMCC_FD_CORE_CLK 41 +#define CAMCC_FD_CORE_CLK_SRC 42 +#define CAMCC_FD_CORE_UAR_CLK 43 +#define CAMCC_ICP_AHB_CLK 44 +#define CAMCC_ICP_CLK 45 +#define CAMCC_ICP_CLK_SRC 46 +#define CAMCC_IFE_0_AXI_CLK 47 +#define CAMCC_IFE_0_CLK 48 +#define CAMCC_IFE_0_CLK_SRC 49 +#define CAMCC_IFE_0_CPHY_RX_CLK 50 +#define CAMCC_IFE_0_CSID_CLK 51 +#define CAMCC_IFE_0_CSID_CLK_SRC 52 +#define CAMCC_IFE_0_DSP_CLK 53 +#define CAMCC_IFE_1_AXI_CLK 54 +#define CAMCC_IFE_1_CLK 55 +#define CAMCC_IFE_1_CLK_SRC 56 +#define CAMCC_IFE_1_CPHY_RX_CLK 57 +#define CAMCC_IFE_1_CSID_CLK 58 +#define CAMCC_IFE_1_CSID_CLK_SRC 59 +#define CAMCC_IFE_1_DSP_CLK 60 +#define CAMCC_IFE_LITE_CLK 61 +#define CAMCC_IFE_LITE_CLK_SRC 62 +#define CAMCC_IFE_LITE_CPHY_RX_CLK 63 +#define CAMCC_IFE_LITE_CSID_CLK 64 +#define CAMCC_IFE_LITE_CSID_CLK_SRC 65 +#define CAMCC_IPE_0_AHB_CLK 66 +#define CAMCC_IPE_0_AREG_CLK 67 +#define CAMCC_IPE_0_AXI_CLK 68 +#define CAMCC_IPE_0_CLK 69 +#define CAMCC_IPE_0_CLK_SRC 70 +#define CAMCC_IPE_1_AHB_CLK 71 +#define CAMCC_IPE_1_AREG_CLK 72 +#define CAMCC_IPE_1_AXI_CLK 73 +#define CAMCC_IPE_1_CLK 74 +#define CAMCC_JPEG_CLK 75 +#define CAMCC_JPEG_CLK_SRC 76 +#define CAMCC_LRME_CLK 77 +#define CAMCC_LRME_CLK_SRC 78 +#define CAMCC_MCLK0_CLK 79 +#define CAMCC_MCLK0_CLK_SRC 80 +#define CAMCC_MCLK1_CLK 81 +#define CAMCC_MCLK1_CLK_SRC 82 +#define CAMCC_MCLK2_CLK 83 +#define CAMCC_MCLK2_CLK_SRC 84 +#define CAMCC_MCLK3_CLK 85 +#define CAMCC_MCLK3_CLK_SRC 86 +#define CAMCC_SLEEP_CLK 87 +#define CAMCC_SLEEP_CLK_SRC 88 +#define CAMCC_SLOW_AHB_CLK_SRC 89 +#define CAMCC_XO_CLK_SRC 90 + +/* CAMCC GDSCRs */ +#define BPS_GDSC 0 +#define IFE_0_GDSC 1 +#define IFE_1_GDSC 2 +#define IPE_0_GDSC 3 +#define IPE_1_GDSC 4 +#define TITAN_TOP_GDSC 5 + +#endif -- cgit v1.2.3 From a4be1860b9319e9e55eaa9e28e35e7b19128060c Mon Sep 17 00:00:00 2001 From: Danila Tikhonov Date: Sun, 5 May 2024 23:10:37 +0300 Subject: dt-bindings: clock: qcom: Add SM7150 VIDEOCC clocks Add device tree bindings for the video clock controller on Qualcomm SM7150 platform. Signed-off-by: Danila Tikhonov Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20240505201038.276047-8-danila@jiaxyga.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,sm7150-videocc.yaml | 58 ++++++++++++++++++++++ include/dt-bindings/clock/qcom,sm7150-videocc.h | 28 +++++++++++ 2 files changed, 86 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml create mode 100644 include/dt-bindings/clock/qcom,sm7150-videocc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml new file mode 100644 index 000000000000..037ffc71e70e --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,sm7150-videocc.yaml @@ -0,0 +1,58 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,sm7150-videocc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Video Clock & Reset Controller on SM7150 + +maintainers: + - Danila Tikhonov + - David Wronek + - Jens Reidel + +description: | + Qualcomm video clock control module provides the clocks, resets and power + domains on SM7150. + + See also:: include/dt-bindings/clock/qcom,videocc-sm7150.h + +properties: + compatible: + const: qcom,sm7150-videocc + + clocks: + items: + - description: Board XO source + - description: Board Always On XO source + + power-domains: + maxItems: 1 + description: + CX power domain. + +required: + - compatible + - clocks + - power-domains + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + videocc: clock-controller@ab00000 { + compatible = "qcom,sm7150-videocc"; + reg = <0x0ab00000 0x10000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>; + power-domains = <&rpmhpd RPMHPD_CX>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,sm7150-videocc.h b/include/dt-bindings/clock/qcom,sm7150-videocc.h new file mode 100644 index 000000000000..d86e0fbb159a --- /dev/null +++ b/include/dt-bindings/clock/qcom,sm7150-videocc.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Danila Tikhonov + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_VIDEOCC_SM7150_H +#define _DT_BINDINGS_CLK_QCOM_VIDEOCC_SM7150_H + +#define VIDEOCC_PLL0 0 +#define VIDEOCC_IRIS_AHB_CLK 1 +#define VIDEOCC_IRIS_CLK_SRC 2 +#define VIDEOCC_MVS0_AXI_CLK 3 +#define VIDEOCC_MVS0_CORE_CLK 4 +#define VIDEOCC_MVS1_AXI_CLK 5 +#define VIDEOCC_MVS1_CORE_CLK 6 +#define VIDEOCC_MVSC_CORE_CLK 7 +#define VIDEOCC_MVSC_CTL_AXI_CLK 8 +#define VIDEOCC_VENUS_AHB_CLK 9 +#define VIDEOCC_XO_CLK 10 +#define VIDEOCC_XO_CLK_SRC 11 + +/* VIDEOCC GDSCRs */ +#define VENUS_GDSC 0 +#define VCODEC0_GDSC 1 +#define VCODEC1_GDSC 2 + +#endif -- cgit v1.2.3 From 27c42e925323b975a64429e313b0cf5c0c02a411 Mon Sep 17 00:00:00 2001 From: Kathiravan Thirumoorthy Date: Mon, 25 Mar 2024 21:19:48 +0530 Subject: dt-bindings: arm: qcom,ids: Add SoC ID for IPQ5321 Add the ID for the Qualcomm IPQ5321 SoC. Acked-by: Krzysztof Kozlowski Reviewed-by: Mukesh Ojha Signed-off-by: Kathiravan Thirumoorthy Link: https://lore.kernel.org/r/20240325-ipq5321-sku-support-v2-1-f30ce244732f@quicinc.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index d040033dc8ee..a2958952a9ec 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -272,6 +272,7 @@ #define QCOM_ID_QCS8550 603 #define QCOM_ID_QCM8550 604 #define QCOM_ID_IPQ5300 624 +#define QCOM_ID_IPQ5321 650 /* * The board type and revision information, used by Qualcomm bootloaders and -- cgit v1.2.3 From 3fefd9b594aa6be9f120733f5bb57b07d47871a9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 17 Apr 2024 18:56:57 +0100 Subject: fs: Remove i_blocks_per_page The last caller has been converted to i_blocks_per_folio() so we can remove this wrapper. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Dave Kleikamp --- include/linux/pagemap.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 3d69589c00a4..63f2f3602a7f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -1536,10 +1536,4 @@ unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) { return folio_size(folio) >> inode->i_blkbits; } - -static inline -unsigned int i_blocks_per_page(struct inode *inode, struct page *page) -{ - return i_blocks_per_folio(inode, page_folio(page)); -} #endif /* _LINUX_PAGEMAP_H */ -- cgit v1.2.3 From a0a44d9175b349df2462089140fb7f292100bd7c Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 27 May 2024 11:01:28 +0200 Subject: mm, slab: don't wrap internal functions with alloc_hooks() The functions __kmalloc_noprof(), kmalloc_large_noprof(), kmalloc_trace_noprof() and their _node variants are all internal to the implementations of kmalloc_noprof() and kmalloc_node_noprof() and are only declared in the "public" slab.h and exported so that those implementations can be static inline and distinguish the build-time constant size variants. The only other users for some of the internal functions are slub_kunit and fortify_kunit tests which make very short-lived allocations. Therefore we can stop wrapping them with the alloc_hooks() macro. Instead add a __ prefix to all of them and a comment documenting these as internal. Also rename __kmalloc_trace() to __kmalloc_cache() which is more descriptive - it is a variant of __kmalloc() where the exact kmalloc cache has been already determined. The usage in fortify_kunit can be removed completely, as the internal functions should be tested already through kmalloc() tests in the test variant that passes non-constant allocation size. Reported-by: Kent Overstreet Cc: Suren Baghdasaryan Cc: Kees Cook Reviewed-by: Kent Overstreet Acked-by: David Rientjes Signed-off-by: Vlastimil Babka --- include/linux/slab.h | 48 ++++++++++++++++++++++++------------------------ lib/fortify_kunit.c | 5 ----- lib/slub_kunit.c | 2 +- mm/slub.c | 26 +++++++++++++------------- 4 files changed, 38 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7247e217e21b..ed6bee5ec2b6 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -528,9 +528,6 @@ static_assert(PAGE_SHIFT <= 20); #include -void *__kmalloc_noprof(size_t size, gfp_t flags) __assume_kmalloc_alignment __alloc_size(1); -#define __kmalloc(...) alloc_hooks(__kmalloc_noprof(__VA_ARGS__)) - /** * kmem_cache_alloc - Allocate an object * @cachep: The cache to allocate from. @@ -568,31 +565,34 @@ static __always_inline void kfree_bulk(size_t size, void **p) kmem_cache_free_bulk(NULL, size, p); } -void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment - __alloc_size(1); -#define __kmalloc_node(...) alloc_hooks(__kmalloc_node_noprof(__VA_ARGS__)) - void *kmem_cache_alloc_node_noprof(struct kmem_cache *s, gfp_t flags, int node) __assume_slab_alignment __malloc; #define kmem_cache_alloc_node(...) alloc_hooks(kmem_cache_alloc_node_noprof(__VA_ARGS__)) -void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t flags, size_t size) - __assume_kmalloc_alignment __alloc_size(3); +/* + * The following functions are not to be used directly and are intended only + * for internal use from kmalloc() and kmalloc_node() + * with the exception of kunit tests + */ + +void *__kmalloc_noprof(size_t size, gfp_t flags) + __assume_kmalloc_alignment __alloc_size(1); + +void *__kmalloc_node_noprof(size_t size, gfp_t flags, int node) + __assume_kmalloc_alignment __alloc_size(1); -void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) __assume_kmalloc_alignment - __alloc_size(4); -#define kmalloc_trace(...) alloc_hooks(kmalloc_trace_noprof(__VA_ARGS__)) +void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t flags, size_t size) + __assume_kmalloc_alignment __alloc_size(3); -#define kmalloc_node_trace(...) alloc_hooks(kmalloc_node_trace_noprof(__VA_ARGS__)) +void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) + __assume_kmalloc_alignment __alloc_size(4); -void *kmalloc_large_noprof(size_t size, gfp_t flags) __assume_page_alignment - __alloc_size(1); -#define kmalloc_large(...) alloc_hooks(kmalloc_large_noprof(__VA_ARGS__)) +void *__kmalloc_large_noprof(size_t size, gfp_t flags) + __assume_page_alignment __alloc_size(1); -void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) __assume_page_alignment - __alloc_size(1); -#define kmalloc_large_node(...) alloc_hooks(kmalloc_large_node_noprof(__VA_ARGS__)) +void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) + __assume_page_alignment __alloc_size(1); /** * kmalloc - allocate kernel memory @@ -654,10 +654,10 @@ static __always_inline __alloc_size(1) void *kmalloc_noprof(size_t size, gfp_t f unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large_noprof(size, flags); + return __kmalloc_large_noprof(size, flags); index = kmalloc_index(size); - return kmalloc_trace_noprof( + return __kmalloc_cache_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, size); } @@ -671,10 +671,10 @@ static __always_inline __alloc_size(1) void *kmalloc_node_noprof(size_t size, gf unsigned int index; if (size > KMALLOC_MAX_CACHE_SIZE) - return kmalloc_large_node_noprof(size, flags, node); + return __kmalloc_large_node_noprof(size, flags, node); index = kmalloc_index(size); - return kmalloc_node_trace_noprof( + return __kmalloc_cache_node_noprof( kmalloc_caches[kmalloc_type(flags, _RET_IP_)][index], flags, node, size); } diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index 39da5b3bc649..044f409ef856 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -233,11 +233,6 @@ static void fortify_test_alloc_size_##allocator##_dynamic(struct kunit *test) \ kfree(p)); \ checker(expected_size, \ kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE), \ - kfree(p)); \ - checker(expected_size, __kmalloc(alloc_size, gfp), \ - kfree(p)); \ - checker(expected_size, \ - __kmalloc_node(alloc_size, gfp, NUMA_NO_NODE), \ kfree(p)); \ \ orig = kmalloc(alloc_size, gfp); \ diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 4ce960438806..e6667a28c014 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -140,7 +140,7 @@ static void test_kmalloc_redzone_access(struct kunit *test) { struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32, SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE); - u8 *p = kmalloc_trace(s, GFP_KERNEL, 18); + u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18); kasan_disable_current(); diff --git a/mm/slub.c b/mm/slub.c index 0809760cf789..95e0a3332c44 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4053,7 +4053,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_node_noprof); * directly to the page allocator. We use __GFP_COMP, because we will need to * know the allocation order to free the pages properly in kfree. */ -static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) +static void *___kmalloc_large_node(size_t size, gfp_t flags, int node) { struct folio *folio; void *ptr = NULL; @@ -4078,25 +4078,25 @@ static void *__kmalloc_large_node(size_t size, gfp_t flags, int node) return ptr; } -void *kmalloc_large_noprof(size_t size, gfp_t flags) +void *__kmalloc_large_noprof(size_t size, gfp_t flags) { - void *ret = __kmalloc_large_node(size, flags, NUMA_NO_NODE); + void *ret = ___kmalloc_large_node(size, flags, NUMA_NO_NODE); trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), flags, NUMA_NO_NODE); return ret; } -EXPORT_SYMBOL(kmalloc_large_noprof); +EXPORT_SYMBOL(__kmalloc_large_noprof); -void *kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) +void *__kmalloc_large_node_noprof(size_t size, gfp_t flags, int node) { - void *ret = __kmalloc_large_node(size, flags, node); + void *ret = ___kmalloc_large_node(size, flags, node); trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << get_order(size), flags, node); return ret; } -EXPORT_SYMBOL(kmalloc_large_node_noprof); +EXPORT_SYMBOL(__kmalloc_large_node_noprof); static __always_inline void *__do_kmalloc_node(size_t size, gfp_t flags, int node, @@ -4106,7 +4106,7 @@ void *__do_kmalloc_node(size_t size, gfp_t flags, int node, void *ret; if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) { - ret = __kmalloc_large_node(size, flags, node); + ret = __kmalloc_large_node_noprof(size, flags, node); trace_kmalloc(caller, ret, size, PAGE_SIZE << get_order(size), flags, node); return ret; @@ -4142,7 +4142,7 @@ void *kmalloc_node_track_caller_noprof(size_t size, gfp_t flags, } EXPORT_SYMBOL(kmalloc_node_track_caller_noprof); -void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) +void *__kmalloc_cache_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) { void *ret = slab_alloc_node(s, NULL, gfpflags, NUMA_NO_NODE, _RET_IP_, size); @@ -4152,10 +4152,10 @@ void *kmalloc_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, size_t size) ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } -EXPORT_SYMBOL(kmalloc_trace_noprof); +EXPORT_SYMBOL(__kmalloc_cache_noprof); -void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size) +void *__kmalloc_cache_node_noprof(struct kmem_cache *s, gfp_t gfpflags, + int node, size_t size) { void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, size); @@ -4164,7 +4164,7 @@ void *kmalloc_node_trace_noprof(struct kmem_cache *s, gfp_t gfpflags, ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; } -EXPORT_SYMBOL(kmalloc_node_trace_noprof); +EXPORT_SYMBOL(__kmalloc_cache_node_noprof); static noinline void free_to_partial_list( struct kmem_cache *s, struct slab *slab, -- cgit v1.2.3 From 582d79f34330db4ccee85620cf95ee7ccb9a0d01 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:50 +0200 Subject: drm/connector: Introduce an HDMI connector initialization function A lot of the various HDMI drivers duplicate some logic that depends on the HDMI spec itself and not really a particular hardware implementation. Output BPC or format selection, infoframe generation are good examples of such areas. This creates a lot of boilerplate, with a lot of variations, which makes it hard for userspace to rely on, and makes it difficult to get it right for drivers. In the next patches, we'll add a lot of infrastructure around the drm_connector and drm_connector_state structures, which will allow to abstract away the duplicated logic. This infrastructure comes with a few requirements though, and thus we need a new initialization function. Hopefully, this will make drivers simpler to handle, and their behaviour more consistent. Reviewed-by: Dave Stevenson Reviewed-by: Sui Jingfeng Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-1-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/drm_connector.c | 39 +++++++++++++++++++++++++++++++++++++++ include/drm/drm_connector.h | 5 +++++ 2 files changed, 44 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 4d2df7f64dc5..8c471bdfbfff 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -452,6 +452,45 @@ int drmm_connector_init(struct drm_device *dev, } EXPORT_SYMBOL(drmm_connector_init); +/** + * drmm_connector_hdmi_init - Init a preallocated HDMI connector + * @dev: DRM device + * @connector: A pointer to the HDMI connector to init + * @funcs: callbacks for this connector + * @connector_type: user visible type of the connector + * @ddc: optional pointer to the associated ddc adapter + * + * Initialises a preallocated HDMI connector. Connectors can be + * subclassed as part of driver connector objects. + * + * Cleanup is automatically handled with a call to + * drm_connector_cleanup() in a DRM-managed action. + * + * The connector structure should be allocated with drmm_kzalloc(). + * + * Returns: + * Zero on success, error code on failure. + */ +int drmm_connector_hdmi_init(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc) +{ + int ret; + + if (!(connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector_type == DRM_MODE_CONNECTOR_HDMIB)) + return -EINVAL; + + ret = drmm_connector_init(dev, connector, funcs, connector_type, ddc); + if (ret) + return ret; + + return 0; +} +EXPORT_SYMBOL(drmm_connector_hdmi_init); + /** * drm_connector_attach_edid_property - attach edid property. * @connector: the connector diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 58ee9adf9091..7ef1ff83d885 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1908,6 +1908,11 @@ int drmm_connector_init(struct drm_device *dev, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc); +int drmm_connector_hdmi_init(struct drm_device *dev, + struct drm_connector *connector, + const struct drm_connector_funcs *funcs, + int connector_type, + struct i2c_adapter *ddc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); -- cgit v1.2.3 From 54cb39e2293b1e221708d3ac157ecc59086e1b46 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:52 +0200 Subject: drm/connector: hdmi: Create an HDMI sub-state The next features we will need to share across drivers will need to store some parameters for drivers to use, such as the selected output format. Let's create a new connector sub-state dedicated to HDMI controllers, that will eventually store everything we need. Reviewed-by: Dave Stevenson Reviewed-by: Sui Jingfeng Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-3-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/Kconfig | 7 +++++ drivers/gpu/drm/display/Makefile | 2 ++ drivers/gpu/drm/display/drm_hdmi_state_helper.c | 41 +++++++++++++++++++++++++ include/drm/display/drm_hdmi_state_helper.h | 16 ++++++++++ include/drm/drm_connector.h | 7 +++++ 5 files changed, 73 insertions(+) create mode 100644 drivers/gpu/drm/display/drm_hdmi_state_helper.c create mode 100644 include/drm/display/drm_hdmi_state_helper.h (limited to 'include') diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 864a6488bfdf..14114b597ef4 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -70,3 +70,10 @@ config DRM_DISPLAY_HDMI_HELPER depends on DRM_DISPLAY_HELPER help DRM display helpers for HDMI. + +config DRM_DISPLAY_HDMI_STATE_HELPER + bool + depends on DRM_DISPLAY_HELPER + depends on DRM_DISPLAY_HDMI_HELPER + help + DRM KMS state helpers for HDMI. diff --git a/drivers/gpu/drm/display/Makefile b/drivers/gpu/drm/display/Makefile index 17d2cc73ff56..629df2f4d322 100644 --- a/drivers/gpu/drm/display/Makefile +++ b/drivers/gpu/drm/display/Makefile @@ -14,6 +14,8 @@ drm_display_helper-$(CONFIG_DRM_DISPLAY_HDCP_HELPER) += drm_hdcp_helper.o drm_display_helper-$(CONFIG_DRM_DISPLAY_HDMI_HELPER) += \ drm_hdmi_helper.o \ drm_scdc_helper.o +drm_display_helper-$(CONFIG_DRM_DISPLAY_HDMI_STATE_HELPER) += \ + drm_hdmi_state_helper.o drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_AUX_CHARDEV) += drm_dp_aux_dev.o drm_display_helper-$(CONFIG_DRM_DISPLAY_DP_AUX_CEC) += drm_dp_cec.o diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c new file mode 100644 index 000000000000..1e92c1108d23 --- /dev/null +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: MIT + +#include +#include + +#include + +/** + * __drm_atomic_helper_connector_hdmi_reset() - Initializes all HDMI @drm_connector_state resources + * @connector: DRM connector + * @new_conn_state: connector state to reset + * + * Initializes all HDMI resources from a @drm_connector_state without + * actually allocating it. This is useful for HDMI drivers, in + * combination with __drm_atomic_helper_connector_reset() or + * drm_atomic_helper_connector_reset(). + */ +void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, + struct drm_connector_state *new_conn_state) +{ +} +EXPORT_SYMBOL(__drm_atomic_helper_connector_hdmi_reset); + +/** + * drm_atomic_helper_connector_hdmi_check() - Helper to check HDMI connector atomic state + * @connector: DRM Connector + * @state: the DRM State object + * + * Provides a default connector state check handler for HDMI connectors. + * Checks that a desired connector update is valid, and updates various + * fields of derived state. + * + * RETURNS: + * Zero on success, or an errno code otherwise. + */ +int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, + struct drm_atomic_state *state) +{ + return 0; +} +EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_check); diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h new file mode 100644 index 000000000000..837899db315a --- /dev/null +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ + +#ifndef DRM_HDMI_STATE_HELPER_H_ +#define DRM_HDMI_STATE_HELPER_H_ + +struct drm_atomic_state; +struct drm_connector; +struct drm_connector_state; + +void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, + struct drm_connector_state *new_conn_state); + +int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, + struct drm_atomic_state *state); + +#endif // DRM_HDMI_STATE_HELPER_H_ diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 7ef1ff83d885..5fe3f6a519cd 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1031,6 +1031,13 @@ struct drm_connector_state { * DRM blob property for HDR output metadata */ struct drm_property_blob *hdr_output_metadata; + + /** + * @hdmi: HDMI-related variable and properties. Filled by + * @drm_atomic_helper_connector_hdmi_check(). + */ + struct { + } hdmi; }; /** -- cgit v1.2.3 From aadb3e16b8f30cc1c1efdfe162f400e026385bfb Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:53 +0200 Subject: drm/connector: hdmi: Add output BPC to the connector state We'll add automatic selection of the output BPC in a following patch, but let's add it to the HDMI connector state already. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-4-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 20 ++++++++++++++++++++ drivers/gpu/drm/drm_atomic.c | 5 +++++ drivers/gpu/drm/drm_connector.c | 20 +++++++++++++++++++- drivers/gpu/drm/tests/drm_connector_test.c | 12 ++++++++---- include/drm/drm_connector.h | 12 +++++++++++- 5 files changed, 63 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 1e92c1108d23..82293d93b5f8 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -18,6 +18,10 @@ void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, struct drm_connector_state *new_conn_state) { + unsigned int max_bpc = connector->max_bpc; + + new_conn_state->max_bpc = max_bpc; + new_conn_state->max_requested_bpc = max_bpc; } EXPORT_SYMBOL(__drm_atomic_helper_connector_hdmi_reset); @@ -36,6 +40,22 @@ EXPORT_SYMBOL(__drm_atomic_helper_connector_hdmi_reset); int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, struct drm_atomic_state *state) { + struct drm_connector_state *old_conn_state = + drm_atomic_get_old_connector_state(state, connector); + struct drm_connector_state *new_conn_state = + drm_atomic_get_new_connector_state(state, connector); + + if (old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc) { + struct drm_crtc *crtc = new_conn_state->crtc; + struct drm_crtc_state *crtc_state; + + crtc_state = drm_atomic_get_crtc_state(state, crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + crtc_state->mode_changed = true; + } + return 0; } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_check); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index a91737adf8e7..4e11cfb4518b 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1143,6 +1143,11 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, drm_printf(p, "\tmax_requested_bpc=%d\n", state->max_requested_bpc); drm_printf(p, "\tcolorspace=%s\n", drm_get_colorspace_name(state->colorspace)); + if (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || + connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) { + drm_printf(p, "\toutput_bpc=%u\n", state->hdmi.output_bpc); + } + if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) if (state->writeback_job && state->writeback_job->fb) drm_printf(p, "\tfb=%d\n", state->writeback_job->fb->base.id); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 8c471bdfbfff..e5c840987854 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -459,6 +459,7 @@ EXPORT_SYMBOL(drmm_connector_init); * @funcs: callbacks for this connector * @connector_type: user visible type of the connector * @ddc: optional pointer to the associated ddc adapter + * @max_bpc: Maximum bits per char the HDMI connector supports * * Initialises a preallocated HDMI connector. Connectors can be * subclassed as part of driver connector objects. @@ -475,7 +476,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type, - struct i2c_adapter *ddc) + struct i2c_adapter *ddc, + unsigned int max_bpc) { int ret; @@ -483,10 +485,26 @@ int drmm_connector_hdmi_init(struct drm_device *dev, connector_type == DRM_MODE_CONNECTOR_HDMIB)) return -EINVAL; + if (!(max_bpc == 8 || max_bpc == 10 || max_bpc == 12)) + return -EINVAL; + ret = drmm_connector_init(dev, connector, funcs, connector_type, ddc); if (ret) return ret; + /* + * drm_connector_attach_max_bpc_property() requires the + * connector to have a state. + */ + if (connector->funcs->reset) + connector->funcs->reset(connector); + + drm_connector_attach_max_bpc_property(connector, 8, max_bpc); + connector->max_bpc = max_bpc; + + if (max_bpc > 8) + drm_connector_attach_hdr_output_metadata_property(connector); + return 0; } EXPORT_SYMBOL(drmm_connector_hdmi_init); diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index 261d4109946d..2661eb64a5cd 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -184,7 +184,8 @@ static void drm_test_connector_hdmi_init_valid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, - &priv->ddc); + &priv->ddc, + 8); KUNIT_EXPECT_EQ(test, ret, 0); } @@ -200,7 +201,8 @@ static void drm_test_connector_hdmi_init_null_ddc(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, - NULL); + NULL, + 8); KUNIT_EXPECT_EQ(test, ret, 0); } @@ -217,7 +219,8 @@ static void drm_test_connector_hdmi_init_type_valid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, connector_type, - &priv->ddc); + &priv->ddc, + 8); KUNIT_EXPECT_EQ(test, ret, 0); } @@ -248,7 +251,8 @@ static void drm_test_connector_hdmi_init_type_invalid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, connector_type, - &priv->ddc); + &priv->ddc, + 8); KUNIT_EXPECT_LT(test, ret, 0); } diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 5fe3f6a519cd..4ddb438ea2dd 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1037,6 +1037,10 @@ struct drm_connector_state { * @drm_atomic_helper_connector_hdmi_check(). */ struct { + /** + * @output_bpc: Bits per color channel to output. + */ + unsigned int output_bpc; } hdmi; }; @@ -1686,6 +1690,11 @@ struct drm_connector { */ struct drm_property_blob *path_blob_ptr; + /** + * @max_bpc: Maximum bits per color channel the connector supports. + */ + unsigned int max_bpc; + /** * @max_bpc_property: Default connector property for the max bpc to be * driven out of the connector. @@ -1919,7 +1928,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, int connector_type, - struct i2c_adapter *ddc); + struct i2c_adapter *ddc, + unsigned int max_bpc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); void drm_connector_unregister(struct drm_connector *connector); -- cgit v1.2.3 From 948f01d5e5595023c2e7cfc0184a322be00ef214 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:56 +0200 Subject: drm/connector: hdmi: Add support for output format Just like BPC, we'll add support for automatic selection of the output format for HDMI connectors. Let's add the needed defaults and fields for now. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-7-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 3 ++- drivers/gpu/drm/drm_atomic.c | 2 ++ drivers/gpu/drm/drm_connector.c | 31 ++++++++++++++++++++++ drivers/gpu/drm/tests/drm_connector_test.c | 9 +++++++ drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 22 +++++++++++---- include/drm/drm_connector.h | 20 ++++++++++++++ 6 files changed, 81 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 82293d93b5f8..f6cd0612ea2c 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -45,7 +45,8 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, struct drm_connector_state *new_conn_state = drm_atomic_get_new_connector_state(state, connector); - if (old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc) { + if (old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc || + old_conn_state->hdmi.output_format != new_conn_state->hdmi.output_format) { struct drm_crtc *crtc = new_conn_state->crtc; struct drm_crtc_state *crtc_state; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 4e11cfb4518b..8730137baa86 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1146,6 +1146,8 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, if (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) { drm_printf(p, "\toutput_bpc=%u\n", state->hdmi.output_bpc); + drm_printf(p, "\toutput_format=%s\n", + drm_hdmi_connector_get_output_format_name(state->hdmi.output_format)); } if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index e5c840987854..f554bf703e13 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -459,6 +459,7 @@ EXPORT_SYMBOL(drmm_connector_init); * @funcs: callbacks for this connector * @connector_type: user visible type of the connector * @ddc: optional pointer to the associated ddc adapter + * @supported_formats: Bitmask of @hdmi_colorspace listing supported output formats * @max_bpc: Maximum bits per char the HDMI connector supports * * Initialises a preallocated HDMI connector. Connectors can be @@ -477,6 +478,7 @@ int drmm_connector_hdmi_init(struct drm_device *dev, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc, + unsigned long supported_formats, unsigned int max_bpc) { int ret; @@ -485,6 +487,9 @@ int drmm_connector_hdmi_init(struct drm_device *dev, connector_type == DRM_MODE_CONNECTOR_HDMIB)) return -EINVAL; + if (!supported_formats || !(supported_formats & BIT(HDMI_COLORSPACE_RGB))) + return -EINVAL; + if (!(max_bpc == 8 || max_bpc == 10 || max_bpc == 12)) return -EINVAL; @@ -492,6 +497,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev, if (ret) return ret; + connector->hdmi.supported_formats = supported_formats; + /* * drm_connector_attach_max_bpc_property() requires the * connector to have a state. @@ -1201,6 +1208,30 @@ static const u32 dp_colorspaces = BIT(DRM_MODE_COLORIMETRY_BT2020_CYCC) | BIT(DRM_MODE_COLORIMETRY_BT2020_YCC); +static const char * const output_format_str[] = { + [HDMI_COLORSPACE_RGB] = "RGB", + [HDMI_COLORSPACE_YUV420] = "YUV 4:2:0", + [HDMI_COLORSPACE_YUV422] = "YUV 4:2:2", + [HDMI_COLORSPACE_YUV444] = "YUV 4:4:4", +}; + +/* + * drm_hdmi_connector_get_output_format_name() - Return a string for HDMI connector output format + * @fmt: Output format to compute name of + * + * Returns: the name of the output format, or NULL if the type is not + * valid. + */ +const char * +drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt) +{ + if (fmt >= ARRAY_SIZE(output_format_str)) + return NULL; + + return output_format_str[fmt]; +} +EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); + /** * DOC: standard connector properties * diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index 9465fa33f3b6..37e6efd46d7e 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -187,6 +187,7 @@ static void drm_test_connector_hdmi_init_valid(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 8); KUNIT_EXPECT_EQ(test, ret, 0); } @@ -204,6 +205,7 @@ static void drm_test_connector_hdmi_init_null_ddc(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, NULL, + BIT(HDMI_COLORSPACE_RGB), 8); KUNIT_EXPECT_EQ(test, ret, 0); } @@ -221,6 +223,7 @@ static void drm_test_connector_hdmi_init_bpc_invalid(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 9); KUNIT_EXPECT_LT(test, ret, 0); } @@ -238,6 +241,7 @@ static void drm_test_connector_hdmi_init_bpc_null(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 0); KUNIT_EXPECT_LT(test, ret, 0); } @@ -260,6 +264,7 @@ static void drm_test_connector_hdmi_init_bpc_8(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 8); KUNIT_EXPECT_EQ(test, ret, 0); @@ -298,6 +303,7 @@ static void drm_test_connector_hdmi_init_bpc_10(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 10); KUNIT_EXPECT_EQ(test, ret, 0); @@ -336,6 +342,7 @@ static void drm_test_connector_hdmi_init_bpc_12(struct kunit *test) &dummy_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 12); KUNIT_EXPECT_EQ(test, ret, 0); @@ -370,6 +377,7 @@ static void drm_test_connector_hdmi_init_type_valid(struct kunit *test) &dummy_funcs, connector_type, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 8); KUNIT_EXPECT_EQ(test, ret, 0); } @@ -402,6 +410,7 @@ static void drm_test_connector_hdmi_init_type_invalid(struct kunit *test) &dummy_funcs, connector_type, &priv->ddc, + BIT(HDMI_COLORSPACE_RGB), 8); KUNIT_EXPECT_LT(test, ret, 0); } diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index a82f2a52ae0f..69ffaa4507c1 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -149,6 +149,7 @@ static const struct drm_connector_funcs dummy_connector_funcs = { static struct drm_atomic_helper_connector_hdmi_priv * drm_atomic_helper_connector_hdmi_init(struct kunit *test, + unsigned int formats, unsigned int max_bpc) { struct drm_atomic_helper_connector_hdmi_priv *priv; @@ -192,6 +193,7 @@ drm_atomic_helper_connector_hdmi_init(struct kunit *test, &dummy_connector_funcs, DRM_MODE_CONNECTOR_HDMIA, NULL, + formats, max_bpc); KUNIT_ASSERT_EQ(test, ret, 0); @@ -227,7 +229,9 @@ static void drm_test_check_output_bpc_crtc_mode_changed(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, 10); + priv = drm_atomic_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); ctx = drm_kunit_helper_acquire_ctx_alloc(test); @@ -294,7 +298,9 @@ static void drm_test_check_output_bpc_crtc_mode_not_changed(struct kunit *test) struct drm_crtc *crtc; int ret; - priv = drm_atomic_helper_connector_hdmi_init(test, 10); + priv = drm_atomic_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); ctx = drm_kunit_helper_acquire_ctx_alloc(test); @@ -363,7 +369,9 @@ static void drm_test_check_bpc_8_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, 8); + priv = drm_atomic_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 8); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -385,7 +393,9 @@ static void drm_test_check_bpc_10_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, 10); + priv = drm_atomic_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 10); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; @@ -407,7 +417,9 @@ static void drm_test_check_bpc_12_value(struct kunit *test) struct drm_connector_state *conn_state; struct drm_connector *conn; - priv = drm_atomic_helper_connector_hdmi_init(test, 12); + priv = drm_atomic_helper_connector_hdmi_init(test, + BIT(HDMI_COLORSPACE_RGB), + 12); KUNIT_ASSERT_NOT_NULL(test, priv); conn = &priv->connector; diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 4ddb438ea2dd..4b6b1117c7d0 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -368,6 +368,9 @@ enum drm_panel_orientation { DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +const char * +drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt); + /** * struct drm_monitor_range_info - Panel's Monitor range in EDID for * &drm_display_info @@ -1041,6 +1044,11 @@ struct drm_connector_state { * @output_bpc: Bits per color channel to output. */ unsigned int output_bpc; + + /** + * @output_format: Pixel format to output in. + */ + enum hdmi_colorspace output_format; } hdmi; }; @@ -1906,6 +1914,17 @@ struct drm_connector { /** @hdr_sink_metadata: HDR Metadata Information read from sink */ struct hdr_sink_metadata hdr_sink_metadata; + + /** + * @hdmi: HDMI-related variable and properties. + */ + struct { + /** + * @supported_formats: Bitmask of @hdmi_colorspace + * supported by the controller. + */ + unsigned long supported_formats; + } hdmi; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) @@ -1929,6 +1948,7 @@ int drmm_connector_hdmi_init(struct drm_device *dev, const struct drm_connector_funcs *funcs, int connector_type, struct i2c_adapter *ddc, + unsigned long supported_formats, unsigned int max_bpc); void drm_connector_attach_edid_property(struct drm_connector *connector); int drm_connector_register(struct drm_connector *connector); -- cgit v1.2.3 From 40167bcbd19c241fc30a912fa8a8276b9ed1a12e Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:57:58 +0200 Subject: drm/display: hdmi: Add HDMI compute clock helper A lot of HDMI drivers have some variation of the formula to calculate the TMDS character rate from a mode, but few of them actually take all parameters into account. Let's create a helper to provide that rate taking all parameters into account. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-9-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_helper.c | 61 +++++++++++++++++++++++++++++++ include/drm/display/drm_hdmi_helper.h | 4 ++ 2 files changed, 65 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_helper.c b/drivers/gpu/drm/display/drm_hdmi_helper.c index faf5e9efa7d3..74dd4d01dd9b 100644 --- a/drivers/gpu/drm/display/drm_hdmi_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_helper.c @@ -195,3 +195,64 @@ void drm_hdmi_avi_infoframe_content_type(struct hdmi_avi_infoframe *frame, frame->itc = conn_state->content_type != DRM_MODE_CONTENT_TYPE_NO_DATA; } EXPORT_SYMBOL(drm_hdmi_avi_infoframe_content_type); + +/** + * drm_hdmi_compute_mode_clock() - Computes the TMDS Character Rate + * @mode: Display mode to compute the clock for + * @bpc: Bits per character + * @fmt: Output Pixel Format used + * + * Returns the TMDS Character Rate for a given mode, bpc count and output format. + * + * RETURNS: + * The TMDS Character Rate, in Hertz, or 0 on error. + */ +unsigned long long +drm_hdmi_compute_mode_clock(const struct drm_display_mode *mode, + unsigned int bpc, enum hdmi_colorspace fmt) +{ + unsigned long long clock = mode->clock * 1000ULL; + unsigned int vic = drm_match_cea_mode(mode); + + /* + * CTA-861-G Spec, section 5.4 - Color Coding and Quantization + * mandates that VIC 1 always uses 8 bpc. + */ + if (vic == 1 && bpc != 8) + return 0; + + if (fmt == HDMI_COLORSPACE_YUV422) { + /* + * HDMI 1.0 Spec, section 6.5 - Pixel Encoding states that + * YUV422 sends 24 bits over three channels, with Cb and Cr + * components being sent on odd and even pixels, respectively. + * + * If fewer than 12 bpc are sent, data are left justified. + */ + if (bpc > 12) + return 0; + + /* + * HDMI 1.0 Spec, section 6.5 - Pixel Encoding + * specifies that YUV422 sends two 12-bits components over + * three TMDS channels per pixel clock, which is equivalent to + * three 8-bits components over three channels used by RGB as + * far as the clock rate goes. + */ + bpc = 8; + } + + /* + * HDMI 2.0 Spec, Section 7.1 - YCbCr 4:2:0 Pixel Encoding + * specifies that YUV420 encoding is carried at a TMDS Character Rate + * equal to half the pixel clock rate. + */ + if (fmt == HDMI_COLORSPACE_YUV420) + clock = clock / 2; + + if (mode->flags & DRM_MODE_FLAG_DBLCLK) + clock = clock * 2; + + return DIV_ROUND_CLOSEST_ULL(clock * bpc, 8); +} +EXPORT_SYMBOL(drm_hdmi_compute_mode_clock); diff --git a/include/drm/display/drm_hdmi_helper.h b/include/drm/display/drm_hdmi_helper.h index 76d234826e22..57e3b18c15ec 100644 --- a/include/drm/display/drm_hdmi_helper.h +++ b/include/drm/display/drm_hdmi_helper.h @@ -24,4 +24,8 @@ drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame, void drm_hdmi_avi_infoframe_content_type(struct hdmi_avi_infoframe *frame, const struct drm_connector_state *conn_state); +unsigned long long +drm_hdmi_compute_mode_clock(const struct drm_display_mode *mode, + unsigned int bpc, enum hdmi_colorspace fmt); + #endif -- cgit v1.2.3 From f035f4097f1e0a35a457b72427bb0c06ca0c81c4 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:00 +0200 Subject: drm/connector: hdmi: Calculate TMDS character rate Most HDMI drivers have some code to calculate the TMDS character rate, usually to adjust an internal clock to match what the mode requires. Since the TMDS character rates mostly depends on the resolution, whether we need to repeat pixels or not, the bpc count and the format, we can now derive it from the HDMI connector state that stores all those infos and remove the duplication from drivers. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-11-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 67 ++++++++++++++++++++++ drivers/gpu/drm/drm_atomic.c | 1 + drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 3 + include/drm/drm_connector.h | 5 ++ 4 files changed, 76 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index f6cd0612ea2c..08630561d864 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -3,6 +3,7 @@ #include #include +#include #include /** @@ -25,6 +26,63 @@ void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, } EXPORT_SYMBOL(__drm_atomic_helper_connector_hdmi_reset); +static const struct drm_display_mode * +connector_state_get_mode(const struct drm_connector_state *conn_state) +{ + struct drm_atomic_state *state; + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + + state = conn_state->state; + if (!state) + return NULL; + + crtc = conn_state->crtc; + if (!crtc) + return NULL; + + crtc_state = drm_atomic_get_new_crtc_state(state, crtc); + if (!crtc_state) + return NULL; + + return &crtc_state->mode; +} + +static enum drm_mode_status +hdmi_clock_valid(const struct drm_connector *connector, + const struct drm_display_mode *mode, + unsigned long long clock) +{ + const struct drm_display_info *info = &connector->display_info; + + if (info->max_tmds_clock && clock > info->max_tmds_clock * 1000) + return MODE_CLOCK_HIGH; + + return MODE_OK; +} + +static int +hdmi_compute_clock(const struct drm_connector *connector, + struct drm_connector_state *conn_state, + const struct drm_display_mode *mode, + unsigned int bpc, enum hdmi_colorspace fmt) +{ + enum drm_mode_status status; + unsigned long long clock; + + clock = drm_hdmi_compute_mode_clock(mode, bpc, fmt); + if (!clock) + return -EINVAL; + + status = hdmi_clock_valid(connector, mode, clock); + if (status != MODE_OK) + return -EINVAL; + + conn_state->hdmi.tmds_char_rate = clock; + + return 0; +} + /** * drm_atomic_helper_connector_hdmi_check() - Helper to check HDMI connector atomic state * @connector: DRM Connector @@ -44,6 +102,15 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, drm_atomic_get_old_connector_state(state, connector); struct drm_connector_state *new_conn_state = drm_atomic_get_new_connector_state(state, connector); + const struct drm_display_mode *mode = + connector_state_get_mode(new_conn_state); + int ret; + + ret = hdmi_compute_clock(connector, new_conn_state, mode, + new_conn_state->hdmi.output_bpc, + new_conn_state->hdmi.output_format); + if (ret) + return ret; if (old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc || old_conn_state->hdmi.output_format != new_conn_state->hdmi.output_format) { diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 8730137baa86..26f9e525c0a0 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1148,6 +1148,7 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, drm_printf(p, "\toutput_bpc=%u\n", state->hdmi.output_bpc); drm_printf(p, "\toutput_format=%s\n", drm_hdmi_connector_get_output_format_name(state->hdmi.output_format)); + drm_printf(p, "\ttmds_char_rate=%llu\n", state->hdmi.tmds_char_rate); } if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK) diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 99fed169ea4d..71cfc7450134 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -72,6 +72,9 @@ static int light_up_connector(struct kunit *test, conn_state = drm_atomic_get_connector_state(state, connector); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, conn_state); + conn_state->hdmi.output_bpc = connector->max_bpc; + conn_state->hdmi.output_format = HDMI_COLORSPACE_RGB; + ret = drm_atomic_set_crtc_for_connector(conn_state, crtc); KUNIT_EXPECT_EQ(test, ret, 0); diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 4b6b1117c7d0..3bdcf904a11b 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1049,6 +1049,11 @@ struct drm_connector_state { * @output_format: Pixel format to output in. */ enum hdmi_colorspace output_format; + + /** + * @tmds_char_rate: TMDS Character Rate, in Hz. + */ + unsigned long long tmds_char_rate; } hdmi; }; -- cgit v1.2.3 From e5030a74f976b4e808e28e78805c87203ac1a48d Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:02 +0200 Subject: drm/connector: hdmi: Add custom hook to filter TMDS character rate Most of the HDMI controllers have an upper TMDS character rate limit they can't exceed. On "embedded"-grade display controllers, it will typically be lower than what high-grade monitors can provide these days, so drivers will filter the TMDS character rate based on the controller capabilities. To make that easier to handle for drivers, let's provide an optional hook to be implemented by drivers so they can tell the HDMI controller helpers if a given TMDS character rate is reachable for them or not. This will then be useful to figure out the best format and bpc count for a given mode. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-13-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 9 +++++++ drivers/gpu/drm/drm_connector.c | 4 +++ drivers/gpu/drm/tests/drm_connector_test.c | 14 ++++++++++ drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 4 +++ include/drm/drm_connector.h | 31 ++++++++++++++++++++++ 5 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 08630561d864..063421835dba 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -53,11 +53,20 @@ hdmi_clock_valid(const struct drm_connector *connector, const struct drm_display_mode *mode, unsigned long long clock) { + const struct drm_connector_hdmi_funcs *funcs = connector->hdmi.funcs; const struct drm_display_info *info = &connector->display_info; if (info->max_tmds_clock && clock > info->max_tmds_clock * 1000) return MODE_CLOCK_HIGH; + if (funcs && funcs->tmds_char_rate_valid) { + enum drm_mode_status status; + + status = funcs->tmds_char_rate_valid(connector, mode, clock); + if (status != MODE_OK) + return status; + } + return MODE_OK; } diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index f554bf703e13..39e9ff6c4f61 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -457,6 +457,7 @@ EXPORT_SYMBOL(drmm_connector_init); * @dev: DRM device * @connector: A pointer to the HDMI connector to init * @funcs: callbacks for this connector + * @hdmi_funcs: HDMI-related callbacks for this connector * @connector_type: user visible type of the connector * @ddc: optional pointer to the associated ddc adapter * @supported_formats: Bitmask of @hdmi_colorspace listing supported output formats @@ -476,6 +477,7 @@ EXPORT_SYMBOL(drmm_connector_init); int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, + const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, struct i2c_adapter *ddc, unsigned long supported_formats, @@ -512,6 +514,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev, if (max_bpc > 8) drm_connector_attach_hdr_output_metadata_property(connector); + connector->hdmi.funcs = hdmi_funcs; + return 0; } EXPORT_SYMBOL(drmm_connector_hdmi_init); diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index 8557f722080c..e44dfe2e9455 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -24,6 +24,9 @@ struct drm_connector_init_priv { struct i2c_adapter ddc; }; +static const struct drm_connector_hdmi_funcs dummy_hdmi_funcs = { +}; + static const struct drm_connector_funcs dummy_funcs = { .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, @@ -189,6 +192,7 @@ static void drm_test_connector_hdmi_init_valid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -207,6 +211,7 @@ static void drm_test_connector_hdmi_init_null_ddc(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, NULL, BIT(HDMI_COLORSPACE_RGB), @@ -225,6 +230,7 @@ static void drm_test_connector_hdmi_init_bpc_invalid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -243,6 +249,7 @@ static void drm_test_connector_hdmi_init_bpc_null(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -266,6 +273,7 @@ static void drm_test_connector_hdmi_init_bpc_8(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -305,6 +313,7 @@ static void drm_test_connector_hdmi_init_bpc_10(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -344,6 +353,7 @@ static void drm_test_connector_hdmi_init_bpc_12(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -378,6 +388,7 @@ static void drm_test_connector_hdmi_init_formats_empty(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, 0, @@ -396,6 +407,7 @@ static void drm_test_connector_hdmi_init_formats_no_rgb(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, &priv->ddc, BIT(HDMI_COLORSPACE_YUV422), @@ -415,6 +427,7 @@ static void drm_test_connector_hdmi_init_type_valid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, connector_type, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), @@ -448,6 +461,7 @@ static void drm_test_connector_hdmi_init_type_invalid(struct kunit *test) ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, &dummy_funcs, + &dummy_hdmi_funcs, connector_type, &priv->ddc, BIT(HDMI_COLORSPACE_RGB), diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 0731f244f977..23449a4786ce 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -112,6 +112,9 @@ static int set_connector_edid(struct kunit *test, struct drm_connector *connecto return 0; } +static const struct drm_connector_hdmi_funcs dummy_connector_hdmi_funcs = { +}; + static int dummy_connector_get_modes(struct drm_connector *connector) { struct drm_atomic_helper_connector_hdmi_priv *priv = @@ -194,6 +197,7 @@ drm_atomic_helper_connector_hdmi_init(struct kunit *test, conn = &priv->connector; ret = drmm_connector_hdmi_init(drm, conn, &dummy_connector_funcs, + &dummy_connector_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, NULL, formats, diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 3bdcf904a11b..ffef967869d9 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -38,6 +38,7 @@ struct drm_connector_helper_funcs; struct drm_modeset_acquire_ctx; struct drm_device; struct drm_crtc; +struct drm_display_mode; struct drm_encoder; struct drm_panel; struct drm_property; @@ -1057,6 +1058,30 @@ struct drm_connector_state { } hdmi; }; +/** + * struct drm_connector_hdmi_funcs - drm_hdmi_connector control functions + */ +struct drm_connector_hdmi_funcs { + /** + * @tmds_char_rate_valid: + * + * This callback is invoked at atomic_check time to figure out + * whether a particular TMDS character rate is supported by the + * driver. + * + * The @tmds_char_rate_valid callback is optional. + * + * Returns: + * + * Either &drm_mode_status.MODE_OK or one of the failure reasons + * in &enum drm_mode_status. + */ + enum drm_mode_status + (*tmds_char_rate_valid)(const struct drm_connector *connector, + const struct drm_display_mode *mode, + unsigned long long tmds_rate); +}; + /** * struct drm_connector_funcs - control connectors on a given device * @@ -1929,6 +1954,11 @@ struct drm_connector { * supported by the controller. */ unsigned long supported_formats; + + /** + * @funcs: HDMI connector Control Functions + */ + const struct drm_connector_hdmi_funcs *funcs; } hdmi; }; @@ -1951,6 +1981,7 @@ int drmm_connector_init(struct drm_device *dev, int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, const struct drm_connector_funcs *funcs, + const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, struct i2c_adapter *ddc, unsigned long supported_formats, -- cgit v1.2.3 From ab52af4ba7c7dc2e226ede5935a0587743b747d3 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:07 +0200 Subject: drm/connector: hdmi: Add Broadcast RGB property The i915 driver has a property to force the RGB range of an HDMI output. The vc4 driver then implemented the same property with the same semantics. KWin has support for it, and a PR for mutter is also there to support it. Both drivers implementing the same property with the same semantics, plus the userspace having support for it, is proof enough that it's pretty much a de-facto standard now and we can provide helpers for it. Let's plumb it into the newly created HDMI connector. Reviewed-by: Dave Stevenson Acked-by: Pekka Paalanen Reviewed-by: Sebastian Wick Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-18-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- Documentation/gpu/kms-properties.csv | 1 - drivers/gpu/drm/display/drm_hdmi_state_helper.c | 4 +- drivers/gpu/drm/drm_atomic.c | 2 + drivers/gpu/drm/drm_atomic_uapi.c | 4 ++ drivers/gpu/drm/drm_connector.c | 88 +++++++++++++++++++++++++ include/drm/drm_connector.h | 36 ++++++++++ 6 files changed, 133 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/gpu/kms-properties.csv b/Documentation/gpu/kms-properties.csv index c3c50a277cc9..bfbfbf4f102d 100644 --- a/Documentation/gpu/kms-properties.csv +++ b/Documentation/gpu/kms-properties.csv @@ -17,7 +17,6 @@ Owner Module/Drivers,Group,Property Name,Type,Property Values,Object attached,De ,Virtual GPU,“suggested X”,RANGE,"Min=0, Max=0xffffffff",Connector,property to suggest an X offset for a connector ,,“suggested Y”,RANGE,"Min=0, Max=0xffffffff",Connector,property to suggest an Y offset for a connector ,Optional,"""aspect ratio""",ENUM,"{ ""None"", ""4:3"", ""16:9"" }",Connector,TDB -i915,Generic,"""Broadcast RGB""",ENUM,"{ ""Automatic"", ""Full"", ""Limited 16:235"" }",Connector,"When this property is set to Limited 16:235 and CTM is set, the hardware will be programmed with the result of the multiplication of CTM by the limited range matrix to ensure the pixels normally in the range 0..1.0 are remapped to the range 16/255..235/255." ,,“audio”,ENUM,"{ ""force-dvi"", ""off"", ""auto"", ""on"" }",Connector,TBD ,SDVO-TV,“mode”,ENUM,"{ ""NTSC_M"", ""NTSC_J"", ""NTSC_443"", ""PAL_B"" } etc.",Connector,TBD ,,"""left_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 1623b96cd97c..57c66beec5bc 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -25,6 +25,7 @@ void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, new_conn_state->max_bpc = max_bpc; new_conn_state->max_requested_bpc = max_bpc; + new_conn_state->hdmi.broadcast_rgb = DRM_HDMI_BROADCAST_RGB_AUTO; } EXPORT_SYMBOL(__drm_atomic_helper_connector_hdmi_reset); @@ -332,7 +333,8 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, if (ret) return ret; - if (old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc || + if (old_conn_state->hdmi.broadcast_rgb != new_conn_state->hdmi.broadcast_rgb || + old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc || old_conn_state->hdmi.output_format != new_conn_state->hdmi.output_format) { struct drm_crtc *crtc = new_conn_state->crtc; struct drm_crtc_state *crtc_state; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 26f9e525c0a0..3e57d98d8418 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1145,6 +1145,8 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, if (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA || connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) { + drm_printf(p, "\tbroadcast_rgb=%s\n", + drm_hdmi_connector_get_broadcast_rgb_name(state->hdmi.broadcast_rgb)); drm_printf(p, "\toutput_bpc=%u\n", state->hdmi.output_bpc); drm_printf(p, "\toutput_format=%s\n", drm_hdmi_connector_get_output_format_name(state->hdmi.output_format)); diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index fc16fddee5c5..22bbb2d83e30 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -776,6 +776,8 @@ static int drm_atomic_connector_set_property(struct drm_connector *connector, state->max_requested_bpc = val; } else if (property == connector->privacy_screen_sw_state_property) { state->privacy_screen_sw_state = val; + } else if (property == connector->broadcast_rgb_property) { + state->hdmi.broadcast_rgb = val; } else if (connector->funcs->atomic_set_property) { return connector->funcs->atomic_set_property(connector, state, property, val); @@ -859,6 +861,8 @@ drm_atomic_connector_get_property(struct drm_connector *connector, *val = state->max_requested_bpc; } else if (property == connector->privacy_screen_sw_state_property) { *val = state->privacy_screen_sw_state; + } else if (property == connector->broadcast_rgb_property) { + *val = state->hdmi.broadcast_rgb; } else if (connector->funcs->atomic_get_property) { return connector->funcs->atomic_get_property(connector, state, property, val); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 39e9ff6c4f61..debb5a77ad89 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1212,6 +1212,29 @@ static const u32 dp_colorspaces = BIT(DRM_MODE_COLORIMETRY_BT2020_CYCC) | BIT(DRM_MODE_COLORIMETRY_BT2020_YCC); +static const struct drm_prop_enum_list broadcast_rgb_names[] = { + { DRM_HDMI_BROADCAST_RGB_AUTO, "Automatic" }, + { DRM_HDMI_BROADCAST_RGB_FULL, "Full" }, + { DRM_HDMI_BROADCAST_RGB_LIMITED, "Limited 16:235" }, +}; + +/* + * drm_hdmi_connector_get_broadcast_rgb_name - Return a string for HDMI connector RGB broadcast selection + * @broadcast_rgb: Broadcast RGB selection to compute name of + * + * Returns: the name of the Broadcast RGB selection, or NULL if the type + * is not valid. + */ +const char * +drm_hdmi_connector_get_broadcast_rgb_name(enum drm_hdmi_broadcast_rgb broadcast_rgb) +{ + if (broadcast_rgb >= ARRAY_SIZE(broadcast_rgb_names)) + return NULL; + + return broadcast_rgb_names[broadcast_rgb].name; +} +EXPORT_SYMBOL(drm_hdmi_connector_get_broadcast_rgb_name); + static const char * const output_format_str[] = { [HDMI_COLORSPACE_RGB] = "RGB", [HDMI_COLORSPACE_YUV420] = "YUV 4:2:0", @@ -1708,6 +1731,38 @@ EXPORT_SYMBOL(drm_connector_attach_dp_subconnector_property); /** * DOC: HDMI connector properties * + * Broadcast RGB (HDMI specific) + * Indicates the Quantization Range (Full vs Limited) used. The color + * processing pipeline will be adjusted to match the value of the + * property, and the Infoframes will be generated and sent accordingly. + * + * This property is only relevant if the HDMI output format is RGB. If + * it's one of the YCbCr variant, it will be ignored. + * + * The CRTC attached to the connector must be configured by user-space to + * always produce full-range pixels. + * + * The value of this property can be one of the following: + * + * Automatic: + * The quantization range is selected automatically based on the + * mode according to the HDMI specifications (HDMI 1.4b - Section + * 6.6 - Video Quantization Ranges). + * + * Full: + * Full quantization range is forced. + * + * Limited 16:235: + * Limited quantization range is forced. Unlike the name suggests, + * this works for any number of bits-per-component. + * + * Property values other than Automatic can result in colors being off (if + * limited is selected but the display expects full), or a black screen + * (if full is selected but the display expects limited). + * + * Drivers can set up this property by calling + * drm_connector_attach_broadcast_rgb_property(). + * * content type (HDMI specific): * Indicates content type setting to be used in HDMI infoframes to indicate * content type for the external device, so that it adjusts its display @@ -2570,6 +2625,39 @@ int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *conn } EXPORT_SYMBOL(drm_connector_attach_hdr_output_metadata_property); +/** + * drm_connector_attach_broadcast_rgb_property - attach "Broadcast RGB" property + * @connector: connector to attach the property on. + * + * This is used to add support for forcing the RGB range on a connector + * + * Returns: + * Zero on success, negative errno on failure. + */ +int drm_connector_attach_broadcast_rgb_property(struct drm_connector *connector) +{ + struct drm_device *dev = connector->dev; + struct drm_property *prop; + + prop = connector->broadcast_rgb_property; + if (!prop) { + prop = drm_property_create_enum(dev, DRM_MODE_PROP_ENUM, + "Broadcast RGB", + broadcast_rgb_names, + ARRAY_SIZE(broadcast_rgb_names)); + if (!prop) + return -EINVAL; + + connector->broadcast_rgb_property = prop; + } + + drm_object_attach_property(&connector->base, prop, + DRM_HDMI_BROADCAST_RGB_AUTO); + + return 0; +} +EXPORT_SYMBOL(drm_connector_attach_broadcast_rgb_property); + /** * drm_connector_attach_colorspace_property - attach "Colorspace" property * @connector: connector to attach the property on. diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index ffef967869d9..207635715466 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -369,6 +369,29 @@ enum drm_panel_orientation { DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +/** + * enum drm_hdmi_broadcast_rgb - Broadcast RGB Selection for an HDMI @drm_connector + */ +enum drm_hdmi_broadcast_rgb { + /** + * @DRM_HDMI_BROADCAST_RGB_AUTO: The RGB range is selected + * automatically based on the mode. + */ + DRM_HDMI_BROADCAST_RGB_AUTO, + + /** + * @DRM_HDMI_BROADCAST_RGB_FULL: Full range RGB is forced. + */ + DRM_HDMI_BROADCAST_RGB_FULL, + + /** + * @DRM_HDMI_BROADCAST_RGB_LIMITED: Limited range RGB is forced. + */ + DRM_HDMI_BROADCAST_RGB_LIMITED, +}; + +const char * +drm_hdmi_connector_get_broadcast_rgb_name(enum drm_hdmi_broadcast_rgb broadcast_rgb); const char * drm_hdmi_connector_get_output_format_name(enum hdmi_colorspace fmt); @@ -1041,6 +1064,12 @@ struct drm_connector_state { * @drm_atomic_helper_connector_hdmi_check(). */ struct { + /** + * @broadcast_rgb: Connector property to pass the + * Broadcast RGB selection value. + */ + enum drm_hdmi_broadcast_rgb broadcast_rgb; + /** * @output_bpc: Bits per color channel to output. */ @@ -1757,6 +1786,12 @@ struct drm_connector { */ struct drm_property *privacy_screen_hw_state_property; + /** + * @broadcast_rgb_property: Connector property to set the + * Broadcast RGB selection to output with. + */ + struct drm_property *broadcast_rgb_property; + #define DRM_CONNECTOR_POLL_HPD (1 << 0) #define DRM_CONNECTOR_POLL_CONNECT (1 << 1) #define DRM_CONNECTOR_POLL_DISCONNECT (1 << 2) @@ -2096,6 +2131,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, u32 scaling_mode_mask); int drm_connector_attach_vrr_capable_property( struct drm_connector *connector); +int drm_connector_attach_broadcast_rgb_property(struct drm_connector *connector); int drm_connector_attach_colorspace_property(struct drm_connector *connector); int drm_connector_attach_hdr_output_metadata_property(struct drm_connector *connector); bool drm_connector_atomic_hdr_metadata_equal(struct drm_connector_state *old_state, -- cgit v1.2.3 From 027d435906490812d4568ff371a8b63c24a36bcd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:09 +0200 Subject: drm/connector: hdmi: Add RGB Quantization Range to the connector state HDMI controller drivers will need to figure out the RGB range they need to configure based on a mode and property values. Let's expose that in the HDMI connector state so drivers can just use that value. Reviewed-by: Dave Stevenson Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-20-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 29 +++++++++++++++++++++++++ drivers/gpu/drm/drm_atomic.c | 1 + include/drm/drm_connector.h | 6 +++++ 3 files changed, 36 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 57c66beec5bc..f18020cfe4ea 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -51,6 +51,33 @@ connector_state_get_mode(const struct drm_connector_state *conn_state) return &crtc_state->mode; } +static bool hdmi_is_limited_range(const struct drm_connector *connector, + const struct drm_connector_state *conn_state) +{ + const struct drm_display_info *info = &connector->display_info; + const struct drm_display_mode *mode = + connector_state_get_mode(conn_state); + + /* + * The Broadcast RGB property only applies to RGB format, and + * i915 just assumes limited range for YCbCr output, so let's + * just do the same. + */ + if (conn_state->hdmi.output_format != HDMI_COLORSPACE_RGB) + return true; + + if (conn_state->hdmi.broadcast_rgb == DRM_HDMI_BROADCAST_RGB_FULL) + return false; + + if (conn_state->hdmi.broadcast_rgb == DRM_HDMI_BROADCAST_RGB_LIMITED) + return true; + + if (!info->is_hdmi) + return false; + + return drm_default_rgb_quant_range(mode) == HDMI_QUANTIZATION_RANGE_LIMITED; +} + static bool sink_supports_format_bpc(const struct drm_connector *connector, const struct drm_display_info *info, @@ -329,6 +356,8 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, connector_state_get_mode(new_conn_state); int ret; + new_conn_state->hdmi.is_limited_range = hdmi_is_limited_range(connector, new_conn_state); + ret = hdmi_compute_config(connector, new_conn_state, mode); if (ret) return ret; diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 3e57d98d8418..07b4b394e3bf 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1147,6 +1147,7 @@ static void drm_atomic_connector_print_state(struct drm_printer *p, connector->connector_type == DRM_MODE_CONNECTOR_HDMIB) { drm_printf(p, "\tbroadcast_rgb=%s\n", drm_hdmi_connector_get_broadcast_rgb_name(state->hdmi.broadcast_rgb)); + drm_printf(p, "\tis_limited_range=%c\n", state->hdmi.is_limited_range ? 'y' : 'n'); drm_printf(p, "\toutput_bpc=%u\n", state->hdmi.output_bpc); drm_printf(p, "\toutput_format=%s\n", drm_hdmi_connector_get_output_format_name(state->hdmi.output_format)); diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 207635715466..dcad2d46fabe 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -1070,6 +1070,12 @@ struct drm_connector_state { */ enum drm_hdmi_broadcast_rgb broadcast_rgb; + /** + * @is_full_range: Is the output supposed to use a full + * RGB Quantization Range or not? + */ + bool is_limited_range; + /** * @output_bpc: Bits per color channel to output. */ -- cgit v1.2.3 From f378b77227bc4732922c57f92be89438bb1018a1 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 27 May 2024 15:58:11 +0200 Subject: drm/connector: hdmi: Add Infoframes generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Infoframes in KMS is usually handled by a bunch of low-level helpers that require quite some boilerplate for drivers. This leads to discrepancies with how drivers generate them, and which are actually sent. Now that we have everything needed to generate them in the HDMI connector state, we can generate them in our common logic so that drivers can simply reuse what we precomputed. Cc: Ville Syrjälä Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240527-kms-hdmi-connector-state-v15-22-c5af16c3aae2@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 336 +++++++++++++++++++++ drivers/gpu/drm/drm_connector.c | 14 + drivers/gpu/drm/tests/drm_connector_test.c | 12 + drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 1 + include/drm/display/drm_hdmi_state_helper.h | 6 + include/drm/drm_connector.h | 111 ++++++- 6 files changed, 479 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index f18020cfe4ea..437270c29210 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -333,6 +333,144 @@ hdmi_compute_config(const struct drm_connector *connector, return -EINVAL; } +static int hdmi_generate_avi_infoframe(const struct drm_connector *connector, + struct drm_connector_state *conn_state) +{ + const struct drm_display_mode *mode = + connector_state_get_mode(conn_state); + struct drm_connector_hdmi_infoframe *infoframe = + &conn_state->hdmi.infoframes.avi; + struct hdmi_avi_infoframe *frame = + &infoframe->data.avi; + bool is_limited_range = conn_state->hdmi.is_limited_range; + enum hdmi_quantization_range rgb_quant_range = + is_limited_range ? HDMI_QUANTIZATION_RANGE_LIMITED : HDMI_QUANTIZATION_RANGE_FULL; + int ret; + + ret = drm_hdmi_avi_infoframe_from_display_mode(frame, connector, mode); + if (ret) + return ret; + + frame->colorspace = conn_state->hdmi.output_format; + + /* + * FIXME: drm_hdmi_avi_infoframe_quant_range() doesn't handle + * YUV formats at all at the moment, so if we ever support YUV + * formats this needs to be revised. + */ + drm_hdmi_avi_infoframe_quant_range(frame, connector, mode, rgb_quant_range); + drm_hdmi_avi_infoframe_colorimetry(frame, conn_state); + drm_hdmi_avi_infoframe_bars(frame, conn_state); + + infoframe->set = true; + + return 0; +} + +static int hdmi_generate_spd_infoframe(const struct drm_connector *connector, + struct drm_connector_state *conn_state) +{ + struct drm_connector_hdmi_infoframe *infoframe = + &conn_state->hdmi.infoframes.spd; + struct hdmi_spd_infoframe *frame = + &infoframe->data.spd; + int ret; + + ret = hdmi_spd_infoframe_init(frame, + connector->hdmi.vendor, + connector->hdmi.product); + if (ret) + return ret; + + frame->sdi = HDMI_SPD_SDI_PC; + + infoframe->set = true; + + return 0; +} + +static int hdmi_generate_hdr_infoframe(const struct drm_connector *connector, + struct drm_connector_state *conn_state) +{ + struct drm_connector_hdmi_infoframe *infoframe = + &conn_state->hdmi.infoframes.hdr_drm; + struct hdmi_drm_infoframe *frame = + &infoframe->data.drm; + int ret; + + if (connector->max_bpc < 10) + return 0; + + if (!conn_state->hdr_output_metadata) + return 0; + + ret = drm_hdmi_infoframe_set_hdr_metadata(frame, conn_state); + if (ret) + return ret; + + infoframe->set = true; + + return 0; +} + +static int hdmi_generate_hdmi_vendor_infoframe(const struct drm_connector *connector, + struct drm_connector_state *conn_state) +{ + const struct drm_display_info *info = &connector->display_info; + const struct drm_display_mode *mode = + connector_state_get_mode(conn_state); + struct drm_connector_hdmi_infoframe *infoframe = + &conn_state->hdmi.infoframes.hdmi; + struct hdmi_vendor_infoframe *frame = + &infoframe->data.vendor.hdmi; + int ret; + + if (!info->has_hdmi_infoframe) + return 0; + + ret = drm_hdmi_vendor_infoframe_from_display_mode(frame, connector, mode); + if (ret) + return ret; + + infoframe->set = true; + + return 0; +} + +static int +hdmi_generate_infoframes(const struct drm_connector *connector, + struct drm_connector_state *conn_state) +{ + const struct drm_display_info *info = &connector->display_info; + int ret; + + if (!info->is_hdmi) + return 0; + + ret = hdmi_generate_avi_infoframe(connector, conn_state); + if (ret) + return ret; + + ret = hdmi_generate_spd_infoframe(connector, conn_state); + if (ret) + return ret; + + /* + * Audio Infoframes will be generated by ALSA, and updated by + * drm_atomic_helper_connector_hdmi_update_audio_infoframe(). + */ + + ret = hdmi_generate_hdr_infoframe(connector, conn_state); + if (ret) + return ret; + + ret = hdmi_generate_hdmi_vendor_infoframe(connector, conn_state); + if (ret) + return ret; + + return 0; +} + /** * drm_atomic_helper_connector_hdmi_check() - Helper to check HDMI connector atomic state * @connector: DRM Connector @@ -362,6 +500,10 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, if (ret) return ret; + ret = hdmi_generate_infoframes(connector, new_conn_state); + if (ret) + return ret; + if (old_conn_state->hdmi.broadcast_rgb != new_conn_state->hdmi.broadcast_rgb || old_conn_state->hdmi.output_bpc != new_conn_state->hdmi.output_bpc || old_conn_state->hdmi.output_format != new_conn_state->hdmi.output_format) { @@ -378,3 +520,197 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, return 0; } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_check); + +#define HDMI_MAX_INFOFRAME_SIZE 29 + +static int clear_device_infoframe(struct drm_connector *connector, + enum hdmi_infoframe_type type) +{ + const struct drm_connector_hdmi_funcs *funcs = connector->hdmi.funcs; + struct drm_device *dev = connector->dev; + int ret; + + drm_dbg_kms(dev, "Clearing infoframe type 0x%x\n", type); + + if (!funcs || !funcs->clear_infoframe) { + drm_dbg_kms(dev, "Function not implemented, bailing.\n"); + return 0; + } + + ret = funcs->clear_infoframe(connector, type); + if (ret) { + drm_dbg_kms(dev, "Call failed: %d\n", ret); + return ret; + } + + return 0; +} + +static int clear_infoframe(struct drm_connector *connector, + struct drm_connector_hdmi_infoframe *old_frame) +{ + int ret; + + ret = clear_device_infoframe(connector, old_frame->data.any.type); + if (ret) + return ret; + + return 0; +} + +static int write_device_infoframe(struct drm_connector *connector, + union hdmi_infoframe *frame) +{ + const struct drm_connector_hdmi_funcs *funcs = connector->hdmi.funcs; + struct drm_device *dev = connector->dev; + u8 buffer[HDMI_MAX_INFOFRAME_SIZE]; + int ret; + int len; + + drm_dbg_kms(dev, "Writing infoframe type %x\n", frame->any.type); + + if (!funcs || !funcs->write_infoframe) { + drm_dbg_kms(dev, "Function not implemented, bailing.\n"); + return -EINVAL; + } + + len = hdmi_infoframe_pack(frame, buffer, sizeof(buffer)); + if (len < 0) + return len; + + ret = funcs->write_infoframe(connector, frame->any.type, buffer, len); + if (ret) { + drm_dbg_kms(dev, "Call failed: %d\n", ret); + return ret; + } + + return 0; +} + +static int write_infoframe(struct drm_connector *connector, + struct drm_connector_hdmi_infoframe *new_frame) +{ + int ret; + + ret = write_device_infoframe(connector, &new_frame->data); + if (ret) + return ret; + + return 0; +} + +static int write_or_clear_infoframe(struct drm_connector *connector, + struct drm_connector_hdmi_infoframe *old_frame, + struct drm_connector_hdmi_infoframe *new_frame) +{ + if (new_frame->set) + return write_infoframe(connector, new_frame); + + if (old_frame->set && !new_frame->set) + return clear_infoframe(connector, old_frame); + + return 0; +} + +/** + * drm_atomic_helper_connector_hdmi_update_infoframes - Update the Infoframes + * @connector: A pointer to the HDMI connector + * @state: The HDMI connector state to generate the infoframe from + * + * This function is meant for HDMI connector drivers to write their + * infoframes. It will typically be used in a + * @drm_connector_helper_funcs.atomic_enable implementation. + * + * Returns: + * Zero on success, error code on failure. + */ +int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, + struct drm_atomic_state *state) +{ + struct drm_connector_state *old_conn_state = + drm_atomic_get_old_connector_state(state, connector); + struct drm_connector_state *new_conn_state = + drm_atomic_get_new_connector_state(state, connector); + struct drm_display_info *info = &connector->display_info; + int ret; + + if (!info->is_hdmi) + return 0; + + mutex_lock(&connector->hdmi.infoframes.lock); + + ret = write_or_clear_infoframe(connector, + &old_conn_state->hdmi.infoframes.avi, + &new_conn_state->hdmi.infoframes.avi); + if (ret) + goto out; + + if (connector->hdmi.infoframes.audio.set) { + ret = write_infoframe(connector, + &connector->hdmi.infoframes.audio); + if (ret) + goto out; + } + + ret = write_or_clear_infoframe(connector, + &old_conn_state->hdmi.infoframes.hdr_drm, + &new_conn_state->hdmi.infoframes.hdr_drm); + if (ret) + goto out; + + ret = write_or_clear_infoframe(connector, + &old_conn_state->hdmi.infoframes.spd, + &new_conn_state->hdmi.infoframes.spd); + if (ret) + goto out; + + if (info->has_hdmi_infoframe) { + ret = write_or_clear_infoframe(connector, + &old_conn_state->hdmi.infoframes.hdmi, + &new_conn_state->hdmi.infoframes.hdmi); + if (ret) + goto out; + } + +out: + mutex_unlock(&connector->hdmi.infoframes.lock); + return ret; +} +EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_update_infoframes); + +/** + * drm_atomic_helper_connector_hdmi_update_audio_infoframe - Update the Audio Infoframe + * @connector: A pointer to the HDMI connector + * @frame: A pointer to the audio infoframe to write + * + * This function is meant for HDMI connector drivers to update their + * audio infoframe. It will typically be used in one of the ALSA hooks + * (most likely prepare). + * + * Returns: + * Zero on success, error code on failure. + */ +int +drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector, + struct hdmi_audio_infoframe *frame) +{ + struct drm_connector_hdmi_infoframe *infoframe = + &connector->hdmi.infoframes.audio; + struct drm_display_info *info = &connector->display_info; + int ret; + + if (!info->is_hdmi) + return 0; + + mutex_lock(&connector->hdmi.infoframes.lock); + + memcpy(&infoframe->data, frame, sizeof(infoframe->data)); + infoframe->set = true; + + ret = write_infoframe(connector, infoframe); + + mutex_unlock(&connector->hdmi.infoframes.lock); + + return ret; +} +EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_update_audio_infoframe); diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index debb5a77ad89..3d73a981004c 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -278,6 +278,7 @@ static int __drm_connector_init(struct drm_device *dev, INIT_LIST_HEAD(&connector->modes); mutex_init(&connector->mutex); mutex_init(&connector->edid_override_mutex); + mutex_init(&connector->hdmi.infoframes.lock); connector->edid_blob_ptr = NULL; connector->epoch_counter = 0; connector->tile_blob_ptr = NULL; @@ -456,6 +457,8 @@ EXPORT_SYMBOL(drmm_connector_init); * drmm_connector_hdmi_init - Init a preallocated HDMI connector * @dev: DRM device * @connector: A pointer to the HDMI connector to init + * @vendor: HDMI Controller Vendor name + * @product: HDMI Controller Product name * @funcs: callbacks for this connector * @hdmi_funcs: HDMI-related callbacks for this connector * @connector_type: user visible type of the connector @@ -476,6 +479,7 @@ EXPORT_SYMBOL(drmm_connector_init); */ int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, + const char *vendor, const char *product, const struct drm_connector_funcs *funcs, const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, @@ -485,6 +489,13 @@ int drmm_connector_hdmi_init(struct drm_device *dev, { int ret; + if (!vendor || !product) + return -EINVAL; + + if ((strlen(vendor) > DRM_CONNECTOR_HDMI_VENDOR_LEN) || + (strlen(product) > DRM_CONNECTOR_HDMI_PRODUCT_LEN)) + return -EINVAL; + if (!(connector_type == DRM_MODE_CONNECTOR_HDMIA || connector_type == DRM_MODE_CONNECTOR_HDMIB)) return -EINVAL; @@ -500,6 +511,8 @@ int drmm_connector_hdmi_init(struct drm_device *dev, return ret; connector->hdmi.supported_formats = supported_formats; + strtomem_pad(connector->hdmi.vendor, vendor, 0); + strtomem_pad(connector->hdmi.product, product, 0); /* * drm_connector_attach_max_bpc_property() requires the @@ -652,6 +665,7 @@ void drm_connector_cleanup(struct drm_connector *connector) connector->funcs->atomic_destroy_state(connector, connector->state); + mutex_destroy(&connector->hdmi.infoframes.lock); mutex_destroy(&connector->mutex); memset(connector, 0, sizeof(*connector)); diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c index a4ea170a2575..2e631617c391 100644 --- a/drivers/gpu/drm/tests/drm_connector_test.c +++ b/drivers/gpu/drm/tests/drm_connector_test.c @@ -191,6 +191,7 @@ static void drm_test_connector_hdmi_init_valid(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -210,6 +211,7 @@ static void drm_test_connector_hdmi_init_null_ddc(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -229,6 +231,7 @@ static void drm_test_connector_hdmi_init_bpc_invalid(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -248,6 +251,7 @@ static void drm_test_connector_hdmi_init_bpc_null(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -272,6 +276,7 @@ static void drm_test_connector_hdmi_init_bpc_8(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -312,6 +317,7 @@ static void drm_test_connector_hdmi_init_bpc_10(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -352,6 +358,7 @@ static void drm_test_connector_hdmi_init_bpc_12(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -387,6 +394,7 @@ static void drm_test_connector_hdmi_init_formats_empty(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -406,6 +414,7 @@ static void drm_test_connector_hdmi_init_formats_no_rgb(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, @@ -426,6 +435,7 @@ static void drm_test_connector_hdmi_init_type_valid(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, connector_type, @@ -460,6 +470,7 @@ static void drm_test_connector_hdmi_init_type_invalid(struct kunit *test) int ret; ret = drmm_connector_hdmi_init(&priv->drm, &priv->connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, connector_type, @@ -725,6 +736,7 @@ static void drm_test_drm_connector_attach_broadcast_rgb_property_hdmi_connector( int ret; ret = drmm_connector_hdmi_init(&priv->drm, connector, + "Vendor", "Product", &dummy_funcs, &dummy_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c index 2eda21228d75..2d3abc71dc16 100644 --- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c +++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c @@ -206,6 +206,7 @@ drm_atomic_helper_connector_hdmi_init(struct kunit *test, conn = &priv->connector; ret = drmm_connector_hdmi_init(drm, conn, + "Vendor", "Product", &dummy_connector_funcs, &dummy_connector_hdmi_funcs, DRM_MODE_CONNECTOR_HDMIA, diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h index 837899db315a..eb162ff24de0 100644 --- a/include/drm/display/drm_hdmi_state_helper.h +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -6,6 +6,7 @@ struct drm_atomic_state; struct drm_connector; struct drm_connector_state; +struct hdmi_audio_infoframe; void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, struct drm_connector_state *new_conn_state); @@ -13,4 +14,9 @@ void __drm_atomic_helper_connector_hdmi_reset(struct drm_connector *connector, int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, struct drm_atomic_state *state); +int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector, + struct hdmi_audio_infoframe *frame); +int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, + struct drm_atomic_state *state); + #endif // DRM_HDMI_STATE_HELPER_H_ diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index dcad2d46fabe..e04a8a0d1bbd 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -914,6 +914,21 @@ struct drm_tv_connector_state { unsigned int hue; }; +/** + * struct drm_connector_hdmi_infoframe - HDMI Infoframe container + */ +struct drm_connector_hdmi_infoframe { + /** + * @data: HDMI Infoframe structure + */ + union hdmi_infoframe data; + + /** + * @set: Is the content of @data valid? + */ + bool set; +}; + /** * struct drm_connector_state - mutable connector state */ @@ -1071,7 +1086,36 @@ struct drm_connector_state { enum drm_hdmi_broadcast_rgb broadcast_rgb; /** - * @is_full_range: Is the output supposed to use a full + * @infoframes: HDMI Infoframes matching that state + */ + struct { + /** + * @avi: AVI Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe avi; + + /** + * @hdr_drm: DRM (Dynamic Range and Mastering) + * Infoframes structure matching our state. + */ + struct drm_connector_hdmi_infoframe hdr_drm; + + /** + * @spd: SPD Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe spd; + + /** + * @vendor: HDMI Vendor Infoframes structure + * matching our state. + */ + struct drm_connector_hdmi_infoframe hdmi; + } infoframes; + + /** + * @is_limited_range: Is the output supposed to use a limited * RGB Quantization Range or not? */ bool is_limited_range; @@ -1115,6 +1159,41 @@ struct drm_connector_hdmi_funcs { (*tmds_char_rate_valid)(const struct drm_connector *connector, const struct drm_display_mode *mode, unsigned long long tmds_rate); + + /** + * @clear_infoframe: + * + * This callback is invoked through + * @drm_atomic_helper_connector_hdmi_update_infoframes during a + * commit to clear the infoframes into the hardware. It will be + * called multiple times, once for every disabled infoframe + * type. + * + * The @clear_infoframe callback is optional. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*clear_infoframe)(struct drm_connector *connector, + enum hdmi_infoframe_type type); + + /** + * @write_infoframe: + * + * This callback is invoked through + * @drm_atomic_helper_connector_hdmi_update_infoframes during a + * commit to program the infoframes into the hardware. It will + * be called multiple times, once for every updated infoframe + * type. + * + * The @write_infoframe callback is mandatory. + * + * Returns: + * 0 on success, a negative error code otherwise + */ + int (*write_infoframe)(struct drm_connector *connector, + enum hdmi_infoframe_type type, + const u8 *buffer, size_t len); }; /** @@ -1990,6 +2069,18 @@ struct drm_connector { * @hdmi: HDMI-related variable and properties. */ struct { +#define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 + /** + * @vendor: HDMI Controller Vendor Name + */ + unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; + +#define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 + /** + * @product: HDMI Controller Product Name + */ + unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; + /** * @supported_formats: Bitmask of @hdmi_colorspace * supported by the controller. @@ -2000,6 +2091,23 @@ struct drm_connector { * @funcs: HDMI connector Control Functions */ const struct drm_connector_hdmi_funcs *funcs; + + /** + * @infoframes: Current Infoframes output by the connector + */ + struct { + /** + * @lock: Mutex protecting against concurrent access to + * the infoframes, most notably between KMS and ALSA. + */ + struct mutex lock; + + /** + * @audio: Current Audio Infoframes structure. Protected + * by @lock. + */ + struct drm_connector_hdmi_infoframe audio; + } infoframes; } hdmi; }; @@ -2021,6 +2129,7 @@ int drmm_connector_init(struct drm_device *dev, struct i2c_adapter *ddc); int drmm_connector_hdmi_init(struct drm_device *dev, struct drm_connector *connector, + const char *vendor, const char *product, const struct drm_connector_funcs *funcs, const struct drm_connector_hdmi_funcs *hdmi_funcs, int connector_type, -- cgit v1.2.3 From db003a28e03f95f2bcb63f037a2078b8870b1ecd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 28 May 2024 14:33:20 +0200 Subject: netfs: fix kernel doc for nets_wait_for_outstanding_io() The @inode parameter wasn't documented leading to new doc build warnings. Fixes: f89ea63f1c65 ("netfs, 9p: Fix race between umount and async request completion") Link: https://lore.kernel.org/r/20240528133050.7e09d78e@canb.auug.org.au Signed-off-by: Christian Brauner --- include/linux/netfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3ca3906bb8da..5d0288938cc2 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -521,7 +521,7 @@ static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx) /** * netfs_wait_for_outstanding_io - Wait for outstanding I/O to complete - * @ctx: The netfs inode to wait on + * @inode: The netfs inode to wait on * * Wait for outstanding I/O requests of any type to complete. This is intended * to be called from inode eviction routines. This makes sure that any -- cgit v1.2.3 From 620c266f394932e5decc4b34683a75dfc59dc2f4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 24 May 2024 12:19:39 +0200 Subject: fhandle: relax open_by_handle_at() permission checks A current limitation of open_by_handle_at() is that it's currently not possible to use it from within containers at all because we require CAP_DAC_READ_SEARCH in the initial namespace. That's unfortunate because there are scenarios where using open_by_handle_at() from within containers. Two examples: (1) cgroupfs allows to encode cgroups to file handles and reopen them with open_by_handle_at(). (2) Fanotify allows placing filesystem watches they currently aren't usable in containers because the returned file handles cannot be used. Here's a proposal for relaxing the permission check for open_by_handle_at(). (1) Opening file handles when the caller has privileges over the filesystem (1.1) The caller has an unobstructed view of the filesystem. (1.2) The caller has permissions to follow a path to the file handle. This doesn't address the problem of opening a file handle when only a portion of a filesystem is exposed as is common in containers by e.g., bind-mounting a subtree. The proposal to solve this use-case is: (2) Opening file handles when the caller has privileges over a subtree (2.1) The caller is able to reach the file from the provided mount fd. (2.2) The caller has permissions to construct an unobstructed path to the file handle. (2.3) The caller has permissions to follow a path to the file handle. The relaxed permission checks are currently restricted to directory file handles which are what both cgroupfs and fanotify need. Handling disconnected non-directory file handles would lead to a potentially non-deterministic api. If a disconnected non-directory file handle is provided we may fail to decode a valid path that we could use for permission checking. That in itself isn't a problem as we would just return EACCES in that case. However, confusion may arise if a non-disconnected dentry ends up in the cache later and those opening the file handle would suddenly succeed. * It's potentially possible to use timing information (side-channel) to infer whether a given inode exists. I don't think that's particularly problematic. Thanks to Jann for bringing this to my attention. * An unrelated note (IOW, these are thoughts that apply to open_by_handle_at() generically and are unrelated to the changes here): Jann pointed out that we should verify whether deleted files could potentially be reopened through open_by_handle_at(). I don't think that's possible though. Another potential thing to check is whether open_by_handle_at() could be abused to open internal stuff like memfds or gpu stuff. I don't think so but I haven't had the time to completely verify this. This dates back to discussions Amir and I had quite some time ago and thanks to him for providing a lot of details around the export code and related patches! Link: https://lore.kernel.org/r/20240524-vfs-open_by_handle_at-v1-1-3d4b7d22736b@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jeff Layton Signed-off-by: Christian Brauner --- fs/exportfs/expfs.c | 9 ++- fs/fhandle.c | 178 +++++++++++++++++++++++++++++++++++++---------- fs/mount.h | 1 + fs/namespace.c | 2 +- fs/nfsd/nfsfh.c | 2 +- include/linux/exportfs.h | 2 + 6 files changed, 152 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 07ea3d62b298..4f2dd4ab4486 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -427,7 +427,7 @@ EXPORT_SYMBOL_GPL(exportfs_encode_fh); struct dentry * exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, - int fileid_type, + int fileid_type, unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context) { @@ -445,6 +445,11 @@ exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, if (IS_ERR_OR_NULL(result)) return result; + if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) { + err = -ENOTDIR; + goto err_result; + } + /* * If no acceptance criteria was specified by caller, a disconnected * dentry is also accepatable. Callers may use this mode to query if @@ -581,7 +586,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, { struct dentry *ret; - ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, + ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0, acceptable, context); if (IS_ERR_OR_NULL(ret)) { if (ret == ERR_PTR(-ENOMEM)) diff --git a/fs/fhandle.c b/fs/fhandle.c index 8a7f86c2139a..6e8cea16790e 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -115,88 +115,188 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, return err; } -static struct vfsmount *get_vfsmount_from_fd(int fd) +static int get_path_from_fd(int fd, struct path *root) { - struct vfsmount *mnt; - if (fd == AT_FDCWD) { struct fs_struct *fs = current->fs; spin_lock(&fs->lock); - mnt = mntget(fs->pwd.mnt); + *root = fs->pwd; + path_get(root); spin_unlock(&fs->lock); } else { struct fd f = fdget(fd); if (!f.file) - return ERR_PTR(-EBADF); - mnt = mntget(f.file->f_path.mnt); + return -EBADF; + *root = f.file->f_path; + path_get(root); fdput(f); } - return mnt; + + return 0; } +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + static int vfs_dentry_acceptable(void *context, struct dentry *dentry) { - return 1; + struct handle_to_path_ctx *ctx = context; + struct user_namespace *user_ns = current_user_ns(); + struct dentry *d, *root = ctx->root.dentry; + struct mnt_idmap *idmap = mnt_idmap(ctx->root.mnt); + int retval = 0; + + if (!root) + return 1; + + /* Old permission model with global CAP_DAC_READ_SEARCH. */ + if (!ctx->flags) + return 1; + + /* + * It's racy as we're not taking rename_lock but we're able to ignore + * permissions and we just need an approximation whether we were able + * to follow a path to the file. + * + * It's also potentially expensive on some filesystems especially if + * there is a deep path. + */ + d = dget(dentry); + while (d != root && !IS_ROOT(d)) { + struct dentry *parent = dget_parent(d); + + /* + * We know that we have the ability to override DAC permissions + * as we've verified this earlier via CAP_DAC_READ_SEARCH. But + * we also need to make sure that there aren't any unmapped + * inodes in the path that would prevent us from reaching the + * file. + */ + if (!privileged_wrt_inode_uidgid(user_ns, idmap, + d_inode(parent))) { + dput(d); + dput(parent); + return retval; + } + + dput(d); + d = parent; + } + + if (!(ctx->flags & HANDLE_CHECK_SUBTREE) || d == root) + retval = 1; + WARN_ON_ONCE(d != root && d != root->d_sb->s_root); + dput(d); + return retval; } -static int do_handle_to_path(int mountdirfd, struct file_handle *handle, - struct path *path) +static int do_handle_to_path(struct file_handle *handle, struct path *path, + struct handle_to_path_ctx *ctx) { - int retval = 0; int handle_dwords; + struct vfsmount *mnt = ctx->root.mnt; - path->mnt = get_vfsmount_from_fd(mountdirfd); - if (IS_ERR(path->mnt)) { - retval = PTR_ERR(path->mnt); - goto out_err; - } /* change the handle size to multiple of sizeof(u32) */ handle_dwords = handle->handle_bytes >> 2; - path->dentry = exportfs_decode_fh(path->mnt, + path->dentry = exportfs_decode_fh_raw(mnt, (struct fid *)handle->f_handle, handle_dwords, handle->handle_type, - vfs_dentry_acceptable, NULL); - if (IS_ERR(path->dentry)) { - retval = PTR_ERR(path->dentry); - goto out_mnt; + ctx->fh_flags, + vfs_dentry_acceptable, ctx); + if (IS_ERR_OR_NULL(path->dentry)) { + if (path->dentry == ERR_PTR(-ENOMEM)) + return -ENOMEM; + return -ESTALE; } + path->mnt = mntget(mnt); return 0; -out_mnt: - mntput(path->mnt); -out_err: - return retval; +} + +/* + * Allow relaxed permissions of file handles if the caller has the + * ability to mount the filesystem or create a bind-mount of the + * provided @mountdirfd. + * + * In both cases the caller may be able to get an unobstructed way to + * the encoded file handle. If the caller is only able to create a + * bind-mount we need to verify that there are no locked mounts on top + * of it that could prevent us from getting to the encoded file. + * + * In principle, locked mounts can prevent the caller from mounting the + * filesystem but that only applies to procfs and sysfs neither of which + * support decoding file handles. + */ +static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, + unsigned int o_flags) +{ + struct path *root = &ctx->root; + + /* + * Restrict to O_DIRECTORY to provide a deterministic API that avoids a + * confusing api in the face of disconnected non-dir dentries. + * + * There's only one dentry for each directory inode (VFS rule)... + */ + if (!(o_flags & O_DIRECTORY)) + return false; + + if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) + ctx->flags = HANDLE_CHECK_PERMS; + else if (is_mounted(root->mnt) && + ns_capable(real_mount(root->mnt)->mnt_ns->user_ns, + CAP_SYS_ADMIN) && + !has_locked_children(real_mount(root->mnt), root->dentry)) + ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; + else + return false; + + /* Are we able to override DAC permissions? */ + if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) + return false; + + ctx->fh_flags = EXPORT_FH_DIR_ONLY; + return true; } static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, - struct path *path) + struct path *path, unsigned int o_flags) { int retval = 0; struct file_handle f_handle; struct file_handle *handle = NULL; + struct handle_to_path_ctx ctx = {}; - /* - * With handle we don't look at the execute bit on the - * directory. Ideally we would like CAP_DAC_SEARCH. - * But we don't have that - */ - if (!capable(CAP_DAC_READ_SEARCH)) { - retval = -EPERM; + retval = get_path_from_fd(mountdirfd, &ctx.root); + if (retval) goto out_err; + + if (!capable(CAP_DAC_READ_SEARCH) && !may_decode_fh(&ctx, o_flags)) { + retval = -EPERM; + goto out_path; } + if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; - goto out_err; + goto out_path; } if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || (f_handle.handle_bytes == 0)) { retval = -EINVAL; - goto out_err; + goto out_path; } handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { retval = -ENOMEM; - goto out_err; + goto out_path; } /* copy the full handle */ *handle = f_handle; @@ -207,10 +307,12 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, goto out_handle; } - retval = do_handle_to_path(mountdirfd, handle, path); + retval = do_handle_to_path(handle, path, &ctx); out_handle: kfree(handle); +out_path: + path_put(&ctx.root); out_err: return retval; } @@ -223,7 +325,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, struct file *file; int fd; - retval = handle_to_path(mountdirfd, ufh, &path); + retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) return retval; diff --git a/fs/mount.h b/fs/mount.h index 4a42fc68f4cc..4adce73211ae 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -152,3 +152,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) } extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor); +bool has_locked_children(struct mount *mnt, struct dentry *dentry); diff --git a/fs/namespace.c b/fs/namespace.c index 5a51315c6678..4386787210c7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2078,7 +2078,7 @@ void drop_collected_mounts(struct vfsmount *mnt) namespace_unlock(); } -static bool has_locked_children(struct mount *mnt, struct dentry *dentry) +bool has_locked_children(struct mount *mnt, struct dentry *dentry) { struct mount *child; diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 0b75305fb5f5..dd4e11a703aa 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -247,7 +247,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) dentry = dget(exp->ex_path.dentry); else { dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid, - data_left, fileid_type, + data_left, fileid_type, 0, nfsd_acceptable, exp); if (IS_ERR_OR_NULL(dentry)) { trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp, diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index bb37ad5cc954..893a1d21dc1c 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -158,6 +158,7 @@ struct fid { #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ +#define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ /** * struct export_operations - for nfsd to communicate with file systems @@ -305,6 +306,7 @@ static inline int exportfs_encode_fid(struct inode *inode, struct fid *fid, extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, + unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context); extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, -- cgit v1.2.3 From 1866407831deb945a16c60c38246f28c2475b28a Mon Sep 17 00:00:00 2001 From: Richard Acayan Date: Thu, 23 May 2024 21:20:25 -0400 Subject: dt-bindings: arm: qcom,ids: Add SoC ID for SDM670 The socinfo driver uses SoC IDs from the device tree bindings. Add the SoC ID for SDM670. Signed-off-by: Richard Acayan Reviewed-by: Konrad Dybcio Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240524012023.318965-6-mailingradian@gmail.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/arm/qcom,ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/arm/qcom,ids.h b/include/dt-bindings/arm/qcom,ids.h index a2958952a9ec..d6c9e9472121 100644 --- a/include/dt-bindings/arm/qcom,ids.h +++ b/include/dt-bindings/arm/qcom,ids.h @@ -175,6 +175,7 @@ #define QCOM_ID_SDA630 327 #define QCOM_ID_MSM8905 331 #define QCOM_ID_SDX202 333 +#define QCOM_ID_SDM670 336 #define QCOM_ID_SDM450 338 #define QCOM_ID_SM8150 339 #define QCOM_ID_SDA845 341 -- cgit v1.2.3 From 14a1d1dc35d346a1523f38f6517c349dfa447a58 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Thu, 9 May 2024 16:06:48 +0200 Subject: dt-bindings: clock: rk3128: Add PCLK_MIPIPHY The DPHY's APB clock is required to be exposed in order to be able to enable it and access the phy's registers. Signed-off-by: Alex Bee Acked-by: Conor Dooley Link: https://lore.kernel.org/r/20240509140653.168591-3-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- include/dt-bindings/clock/rk3128-cru.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/dt-bindings/clock/rk3128-cru.h b/include/dt-bindings/clock/rk3128-cru.h index 6a47825dac5d..1be455ba4985 100644 --- a/include/dt-bindings/clock/rk3128-cru.h +++ b/include/dt-bindings/clock/rk3128-cru.h @@ -116,6 +116,7 @@ #define PCLK_GMAC 367 #define PCLK_PMU_PRE 368 #define PCLK_SIM_CARD 369 +#define PCLK_MIPIPHY 370 /* hclk gates */ #define HCLK_SPDIF 440 -- cgit v1.2.3 From 4edd7dc82bd6be6989c034ccbbbccdc61034e33b Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 30 Apr 2024 11:43:43 +0530 Subject: PCI: endpoint: Rename core_init() callback in 'struct pci_epc_event_ops' to epc_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit core_init() callback is used to notify the EPC initialization event to the EPF drivers. The 'core' prefix was used indicate that the controller IP core has completed initialization. But it serves no purpose as the EPF driver will only care about the EPC initialization as a whole and there is no real benefit to distinguish the IP core part. Rename the core_init() callback in 'struct pci_epc_event_ops' to epc_init() to make it more clear. Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-2-22832d0d456f@linaro.org Tested-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel --- drivers/pci/endpoint/functions/pci-epf-mhi.c | 4 ++-- drivers/pci/endpoint/functions/pci-epf-test.c | 4 ++-- drivers/pci/endpoint/pci-epc-core.c | 16 ++++++++-------- include/linux/pci-epf.h | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/pci/endpoint/functions/pci-epf-mhi.c b/drivers/pci/endpoint/functions/pci-epf-mhi.c index 2c54d80107cf..95c3206f609f 100644 --- a/drivers/pci/endpoint/functions/pci-epf-mhi.c +++ b/drivers/pci/endpoint/functions/pci-epf-mhi.c @@ -716,7 +716,7 @@ static void pci_epf_mhi_dma_deinit(struct pci_epf_mhi *epf_mhi) epf_mhi->dma_chan_rx = NULL; } -static int pci_epf_mhi_core_init(struct pci_epf *epf) +static int pci_epf_mhi_epc_init(struct pci_epf *epf) { struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf); const struct pci_epf_mhi_ep_info *info = epf_mhi->info; @@ -897,7 +897,7 @@ static void pci_epf_mhi_unbind(struct pci_epf *epf) } static const struct pci_epc_event_ops pci_epf_mhi_event_ops = { - .core_init = pci_epf_mhi_core_init, + .epc_init = pci_epf_mhi_epc_init, .link_up = pci_epf_mhi_link_up, .link_down = pci_epf_mhi_link_down, .bme = pci_epf_mhi_bme, diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 3de18a601e7b..191aee61703f 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -731,7 +731,7 @@ static int pci_epf_test_set_bar(struct pci_epf *epf) return 0; } -static int pci_epf_test_core_init(struct pci_epf *epf) +static int pci_epf_test_epc_init(struct pci_epf *epf) { struct pci_epf_test *epf_test = epf_get_drvdata(epf); struct pci_epf_header *header = epf->header; @@ -791,7 +791,7 @@ static int pci_epf_test_link_up(struct pci_epf *epf) } static const struct pci_epc_event_ops pci_epf_test_event_ops = { - .core_init = pci_epf_test_core_init, + .epc_init = pci_epf_test_epc_init, .link_up = pci_epf_test_link_up, }; diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 47d27ec7439d..e6bffa37a948 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -727,9 +727,9 @@ void pci_epc_linkdown(struct pci_epc *epc) EXPORT_SYMBOL_GPL(pci_epc_linkdown); /** - * pci_epc_init_notify() - Notify the EPF device that EPC device's core - * initialization is completed. - * @epc: the EPC device whose core initialization is completed + * pci_epc_init_notify() - Notify the EPF device that EPC device initialization + * is completed. + * @epc: the EPC device whose initialization is completed * * Invoke to Notify the EPF device that the EPC device's initialization * is completed. @@ -744,8 +744,8 @@ void pci_epc_init_notify(struct pci_epc *epc) mutex_lock(&epc->list_lock); list_for_each_entry(epf, &epc->pci_epf, list) { mutex_lock(&epf->lock); - if (epf->event_ops && epf->event_ops->core_init) - epf->event_ops->core_init(epf); + if (epf->event_ops && epf->event_ops->epc_init) + epf->event_ops->epc_init(epf); mutex_unlock(&epf->lock); } epc->init_complete = true; @@ -756,7 +756,7 @@ EXPORT_SYMBOL_GPL(pci_epc_init_notify); /** * pci_epc_notify_pending_init() - Notify the pending EPC device initialization * complete to the EPF device - * @epc: the EPC device whose core initialization is pending to be notified + * @epc: the EPC device whose initialization is pending to be notified * @epf: the EPF device to be notified * * Invoke to notify the pending EPC device initialization complete to the EPF @@ -767,8 +767,8 @@ void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf) { if (epc->init_complete) { mutex_lock(&epf->lock); - if (epf->event_ops && epf->event_ops->core_init) - epf->event_ops->core_init(epf); + if (epf->event_ops && epf->event_ops->epc_init) + epf->event_ops->epc_init(epf); mutex_unlock(&epf->lock); } } diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index adee6a1b35db..afe3bfd88742 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -70,13 +70,13 @@ struct pci_epf_ops { /** * struct pci_epc_event_ops - Callbacks for capturing the EPC events - * @core_init: Callback for the EPC initialization complete event + * @epc_init: Callback for the EPC initialization complete event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event * @bme: Callback for the EPC BME (Bus Master Enable) event */ struct pci_epc_event_ops { - int (*core_init)(struct pci_epf *epf); + int (*epc_init)(struct pci_epf *epf); int (*link_up)(struct pci_epf *epf); int (*link_down)(struct pci_epf *epf); int (*bme)(struct pci_epf *epf); -- cgit v1.2.3 From f58838d7feb04809257f54a8b256786d2f9dfe01 Mon Sep 17 00:00:00 2001 From: Manivannan Sadhasivam Date: Tue, 30 Apr 2024 11:43:44 +0530 Subject: PCI: endpoint: Rename BME to Bus Master Enable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BME which stands for 'Bus Master Enable' is not defined in the PCIe base spec even though it is commonly referred in many places (vendor docs). To align with the spec, rename it to its expansion 'Bus Master Enable'. Suggested-by: Damien Le Moal Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-3-22832d0d456f@linaro.org Link: https://lore.kernel.org/linux-pci/20240430-pci-epf-rework-v4-4-22832d0d456f@linaro.org Tested-by: Niklas Cassel Signed-off-by: Manivannan Sadhasivam Signed-off-by: Krzysztof Wilczyński [bhelgaas: squash removal of irrelevant 'Link is enabled'] Signed-off-by: Bjorn Helgaas Reviewed-by: Niklas Cassel --- drivers/pci/controller/dwc/pcie-qcom-ep.c | 4 ++-- drivers/pci/endpoint/functions/pci-epf-mhi.c | 8 ++++---- drivers/pci/endpoint/pci-epc-core.c | 19 ++++++++++--------- include/linux/pci-epc.h | 2 +- include/linux/pci-epf.h | 4 ++-- 5 files changed, 19 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c index 2fb8c15e7a91..1ecf602c225a 100644 --- a/drivers/pci/controller/dwc/pcie-qcom-ep.c +++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c @@ -642,10 +642,10 @@ static irqreturn_t qcom_pcie_ep_global_irq_thread(int irq, void *data) pcie_ep->link_status = QCOM_PCIE_EP_LINK_DOWN; pci_epc_linkdown(pci->ep.epc); } else if (FIELD_GET(PARF_INT_ALL_BME, status)) { - dev_dbg(dev, "Received BME event. Link is enabled!\n"); + dev_dbg(dev, "Received Bus Master Enable event\n"); pcie_ep->link_status = QCOM_PCIE_EP_LINK_ENABLED; qcom_pcie_ep_icc_update(pcie_ep); - pci_epc_bme_notify(pci->ep.epc); + pci_epc_bus_master_enable_notify(pci->ep.epc); } else if (FIELD_GET(PARF_INT_ALL_PM_TURNOFF, status)) { dev_dbg(dev, "Received PM Turn-off event! Entering L23\n"); val = readl_relaxed(pcie_ep->parf + PARF_PM_CTRL); diff --git a/drivers/pci/endpoint/functions/pci-epf-mhi.c b/drivers/pci/endpoint/functions/pci-epf-mhi.c index 95c3206f609f..b662905e2532 100644 --- a/drivers/pci/endpoint/functions/pci-epf-mhi.c +++ b/drivers/pci/endpoint/functions/pci-epf-mhi.c @@ -819,7 +819,7 @@ static int pci_epf_mhi_link_down(struct pci_epf *epf) return 0; } -static int pci_epf_mhi_bme(struct pci_epf *epf) +static int pci_epf_mhi_bus_master_enable(struct pci_epf *epf) { struct pci_epf_mhi *epf_mhi = epf_get_drvdata(epf); const struct pci_epf_mhi_ep_info *info = epf_mhi->info; @@ -882,8 +882,8 @@ static void pci_epf_mhi_unbind(struct pci_epf *epf) /* * Forcefully power down the MHI EP stack. Only way to bring the MHI EP - * stack back to working state after successive bind is by getting BME - * from host. + * stack back to working state after successive bind is by getting Bus + * Master Enable event from host. */ if (mhi_cntrl->mhi_dev) { mhi_ep_power_down(mhi_cntrl); @@ -900,7 +900,7 @@ static const struct pci_epc_event_ops pci_epf_mhi_event_ops = { .epc_init = pci_epf_mhi_epc_init, .link_up = pci_epf_mhi_link_up, .link_down = pci_epf_mhi_link_down, - .bme = pci_epf_mhi_bme, + .bus_master_enable = pci_epf_mhi_bus_master_enable, }; static int pci_epf_mhi_probe(struct pci_epf *epf, diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index e6bffa37a948..56b60330355d 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -775,14 +775,15 @@ void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf) EXPORT_SYMBOL_GPL(pci_epc_notify_pending_init); /** - * pci_epc_bme_notify() - Notify the EPF device that the EPC device has received - * the BME event from the Root complex - * @epc: the EPC device that received the BME event + * pci_epc_bus_master_enable_notify() - Notify the EPF device that the EPC + * device has received the Bus Master + * Enable event from the Root complex + * @epc: the EPC device that received the Bus Master Enable event * - * Invoke to Notify the EPF device that the EPC device has received the Bus - * Master Enable (BME) event from the Root complex + * Notify the EPF device that the EPC device has generated the Bus Master Enable + * event due to host setting the Bus Master Enable bit in the Command register. */ -void pci_epc_bme_notify(struct pci_epc *epc) +void pci_epc_bus_master_enable_notify(struct pci_epc *epc) { struct pci_epf *epf; @@ -792,13 +793,13 @@ void pci_epc_bme_notify(struct pci_epc *epc) mutex_lock(&epc->list_lock); list_for_each_entry(epf, &epc->pci_epf, list) { mutex_lock(&epf->lock); - if (epf->event_ops && epf->event_ops->bme) - epf->event_ops->bme(epf); + if (epf->event_ops && epf->event_ops->bus_master_enable) + epf->event_ops->bus_master_enable(epf); mutex_unlock(&epf->lock); } mutex_unlock(&epc->list_lock); } -EXPORT_SYMBOL_GPL(pci_epc_bme_notify); +EXPORT_SYMBOL_GPL(pci_epc_bus_master_enable_notify); /** * pci_epc_destroy() - destroy the EPC device diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index acc5f96161fe..11115cd0fe5b 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -226,7 +226,7 @@ void pci_epc_linkup(struct pci_epc *epc); void pci_epc_linkdown(struct pci_epc *epc); void pci_epc_init_notify(struct pci_epc *epc); void pci_epc_notify_pending_init(struct pci_epc *epc, struct pci_epf *epf); -void pci_epc_bme_notify(struct pci_epc *epc); +void pci_epc_bus_master_enable_notify(struct pci_epc *epc); void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf, enum pci_epc_interface_type type); int pci_epc_write_header(struct pci_epc *epc, u8 func_no, u8 vfunc_no, diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index afe3bfd88742..dc759eb7157c 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -73,13 +73,13 @@ struct pci_epf_ops { * @epc_init: Callback for the EPC initialization complete event * @link_up: Callback for the EPC link up event * @link_down: Callback for the EPC link down event - * @bme: Callback for the EPC BME (Bus Master Enable) event + * @bus_master_enable: Callback for the EPC Bus Master Enable event */ struct pci_epc_event_ops { int (*epc_init)(struct pci_epf *epf); int (*link_up)(struct pci_epf *epf); int (*link_down)(struct pci_epf *epf); - int (*bme)(struct pci_epf *epf); + int (*bus_master_enable)(struct pci_epf *epf); }; /** -- cgit v1.2.3 From 4eed3dd7116814c82630b0f1b0f73fa96707134f Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 7 May 2024 13:25:19 +0300 Subject: resource: Use typedef for alignf callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make it simpler to declare resource constraint alignf callbacks, add typedef for it and document it. Suggested-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240507102523.57320-5-ilpo.jarvinen@linux.intel.com Tested-by: Lidong Wang Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- drivers/pci/bus.c | 10 ++-------- include/linux/ioport.h | 22 ++++++++++++++++++---- include/linux/pci.h | 5 +---- kernel/resource.c | 8 ++------ 4 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 826b5016a101..dfc99b3cb958 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -176,10 +176,7 @@ static void pci_clip_resource_to_region(struct pci_bus *bus, static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, resource_size_t size, resource_size_t align, resource_size_t min, unsigned long type_mask, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data, struct pci_bus_region *region) { @@ -250,10 +247,7 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, int pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t size, resource_size_t align, resource_size_t min, unsigned long type_mask, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data) { #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT diff --git a/include/linux/ioport.h b/include/linux/ioport.h index db7fe25f3370..28266426e5bf 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -188,6 +188,23 @@ enum { #define DEFINE_RES_DMA(_dma) \ DEFINE_RES_DMA_NAMED((_dma), NULL) +/** + * typedef resource_alignf - Resource alignment callback + * @data: Private data used by the callback + * @res: Resource candidate range (an empty resource space) + * @size: The minimum size of the empty space + * @align: Alignment from the constraints + * + * Callback allows calculating resource placement and alignment beyond min, + * max, and align fields in the struct resource_constraint. + * + * Return: Start address for the resource. + */ +typedef resource_size_t (*resource_alignf)(void *data, + const struct resource *res, + resource_size_t size, + resource_size_t align); + /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; @@ -207,10 +224,7 @@ extern void arch_remove_reservations(struct resource *avail); extern int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data); struct resource *lookup_resource(struct resource *root, resource_size_t start); int adjust_resource(struct resource *res, resource_size_t start, diff --git a/include/linux/pci.h b/include/linux/pci.h index fb004fd4e889..760fdc4b37cc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1551,10 +1551,7 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus, struct resource *res, resource_size_t size, resource_size_t align, resource_size_t min, unsigned long type_mask, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data); diff --git a/kernel/resource.c b/kernel/resource.c index 3f15a32d9c42..26ad6f223652 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -63,8 +63,7 @@ EXPORT_SYMBOL(iomem_resource); */ struct resource_constraint { resource_size_t min, max, align; - resource_size_t (*alignf)(void *, const struct resource *, - resource_size_t, resource_size_t); + resource_alignf alignf; void *alignf_data; }; @@ -783,10 +782,7 @@ out: int allocate_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, resource_size_t max, resource_size_t align, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), + resource_alignf alignf, void *alignf_data) { int err; -- cgit v1.2.3 From 2700225304e8e4f0f29f2bbb8ad0c112c9666d90 Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Tue, 7 May 2024 13:25:21 +0300 Subject: resource: Export find_resource_space() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PCI bridge window logic needs to find out in advance to the actual allocation if there is an empty space big enough to fit the window. Export find_resource_space() for the purpose. Also move the struct resource_constraint into generic header to be able to use the new interface. Link: https://lore.kernel.org/r/20240507102523.57320-7-ilpo.jarvinen@linux.intel.com Tested-by: Lidong Wang Signed-off-by: Ilpo Järvinen Signed-off-by: Bjorn Helgaas Reviewed-by: Andy Shevchenko Reviewed-by: Mika Westerberg --- include/linux/ioport.h | 22 ++++++++++++++++++++++ kernel/resource.c | 26 ++++---------------------- 2 files changed, 26 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 28266426e5bf..6e9fb667a1c5 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -205,6 +205,25 @@ typedef resource_size_t (*resource_alignf)(void *data, resource_size_t size, resource_size_t align); +/** + * struct resource_constraint - constraints to be met while searching empty + * resource space + * @min: The minimum address for the memory range + * @max: The maximum address for the memory range + * @align: Alignment for the start address of the empty space + * @alignf: Additional alignment constraints callback + * @alignf_data: Data provided for @alignf callback + * + * Contains the range and alignment constraints that have to be met during + * find_resource_space(). @alignf can be NULL indicating no alignment beyond + * @align is necessary. + */ +struct resource_constraint { + resource_size_t min, max, align; + resource_alignf alignf; + void *alignf_data; +}; + /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; @@ -278,6 +297,9 @@ static inline bool resource_union(const struct resource *r1, const struct resour return true; } +int find_resource_space(struct resource *root, struct resource *new, + resource_size_t size, struct resource_constraint *constraint); + /* Convenience shorthand with allocation */ #define request_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), 0) #define request_muxed_region(start,n,name) __request_region(&ioport_resource, (start), (n), (name), IORESOURCE_MUXED) diff --git a/kernel/resource.c b/kernel/resource.c index 35c44c23b037..14777afb0a99 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -48,25 +48,6 @@ struct resource iomem_resource = { }; EXPORT_SYMBOL(iomem_resource); -/** - * struct resource_constraint - constraints to be met while searching empty - * resource space - * @min: The minimum address for the memory range - * @max: The maximum address for the memory range - * @align: Alignment for the start address of the empty space - * @alignf: Additional alignment constraints callback - * @alignf_data: Data provided for @alignf callback - * - * Contains the range and alignment constraints that have to be met during - * find_resource_space(). @alignf can be NULL indicating no alignment beyond - * @align is necessary. - */ -struct resource_constraint { - resource_size_t min, max, align; - resource_alignf alignf; - void *alignf_data; -}; - static DEFINE_RWLOCK(resource_lock); static struct resource *next_resource(struct resource *p, bool skip_children) @@ -708,12 +689,13 @@ next: if (!this || this->end == root->end) * * %0 - if successful, @new members start, end, and flags are altered. * * %-EBUSY - if no empty space was found. */ -static int find_resource_space(struct resource *root, struct resource *new, - resource_size_t size, - struct resource_constraint *constraint) +int find_resource_space(struct resource *root, struct resource *new, + resource_size_t size, + struct resource_constraint *constraint) { return __find_resource_space(root, NULL, new, size, constraint); } +EXPORT_SYMBOL_GPL(find_resource_space); /** * reallocate_resource - allocate a slot in the resource tree given range & alignment. -- cgit v1.2.3 From c7ce956bb6d0f32ab921b6ffba1a6a834df96f21 Mon Sep 17 00:00:00 2001 From: MarileneGarcia Date: Sun, 19 May 2024 00:10:27 -0300 Subject: drm/dp: Fix documentation warning It fixes the following warnings when the kernel documentation is generated: ./include/drm/display/drm_dp_helper.h:126: warning: Function parameter or struct member 'mode' not described in 'drm_dp_as_sdp' ./include/drm/display/drm_dp_helper.h:126: warning: Excess struct member 'operation_mode' description in 'drm_dp_as_sdp' Signed-off-by: MarileneGarcia Fixes: 0bbb8f594e33 ("drm/dp: Add Adaptive Sync SDP logging") Reviewed-by: Dmitry Baryshkov Reported-by: Stephen Rothwell Closes: https://lore.kernel.org/r/20240405141640.09b0bdbf@canb.auug.org.au Reviewed-by: Douglas Anderson Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240519031027.433751-1-marilene.agarcia@gmail.com --- include/drm/display/drm_dp_helper.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 8bed890eec2c..8defcc399f42 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -112,7 +112,7 @@ struct drm_dp_vsc_sdp { * @target_rr: Target Refresh * @duration_incr_ms: Successive frame duration increase * @duration_decr_ms: Successive frame duration decrease - * @operation_mode: Adaptive Sync Operation Mode + * @mode: Adaptive Sync Operation Mode */ struct drm_dp_as_sdp { unsigned char sdp_type; -- cgit v1.2.3 From 475beea0b9f631656b5cc39429a39696876af613 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Tue, 30 Apr 2024 23:07:43 -0500 Subject: dt-bindings: clock: Add PCIe pipe related clocks for IPQ9574 Add defines for the missing PCIe PIPE clocks. Signed-off-by: Alexandru Gagniuc Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240501040800.1542805-2-mr.nuke.me@gmail.com Signed-off-by: Bjorn Andersson --- include/dt-bindings/clock/qcom,ipq9574-gcc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/qcom,ipq9574-gcc.h b/include/dt-bindings/clock/qcom,ipq9574-gcc.h index 08fd3a37acaa..52123c5a09fa 100644 --- a/include/dt-bindings/clock/qcom,ipq9574-gcc.h +++ b/include/dt-bindings/clock/qcom,ipq9574-gcc.h @@ -216,4 +216,8 @@ #define GCC_CRYPTO_AHB_CLK 207 #define GCC_USB0_PIPE_CLK 208 #define GCC_USB0_SLEEP_CLK 209 +#define GCC_PCIE0_PIPE_CLK 210 +#define GCC_PCIE1_PIPE_CLK 211 +#define GCC_PCIE2_PIPE_CLK 212 +#define GCC_PCIE3_PIPE_CLK 213 #endif -- cgit v1.2.3 From 8526f8c877baf3f9e678b31fd7d1066b776775cc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:02:13 +0200 Subject: wifi: nl80211: clean up coalescing rule handling There's no need to allocate a tiny struct and then an array again, just allocate the two together and use __counted_by(). Also unify the freeing. Reviewed-by: Miriam Rachel Korenblit Link: https://msgid.link/20240523120213.48a40cfb96f9.Ia02bf8f8fefbf533c64c5fa26175848d4a3a7899@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 +- net/wireless/core.c | 3 ++- net/wireless/nl80211.c | 47 ++++++++++++++++------------------------------- net/wireless/nl80211.h | 4 ++-- 4 files changed, 21 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d79180bec7a1..5da9bb0ac6a4 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3566,8 +3566,8 @@ struct cfg80211_coalesce_rules { * @n_rules: number of rules */ struct cfg80211_coalesce { - struct cfg80211_coalesce_rules *rules; int n_rules; + struct cfg80211_coalesce_rules rules[] __counted_by(n_rules); }; /** diff --git a/net/wireless/core.c b/net/wireless/core.c index 61f7cd8a8e9c..7c35349b9596 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1145,7 +1145,8 @@ void wiphy_unregister(struct wiphy *wiphy) flush_work(&rdev->background_cac_abort_wk); cfg80211_rdev_free_wowlan(rdev); - cfg80211_rdev_free_coalesce(rdev); + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = NULL; } EXPORT_SYMBOL(wiphy_unregister); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6ba988a6f5a2..8ff5f79d446a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13897,9 +13897,8 @@ nla_put_failure: return -ENOBUFS; } -void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) +void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce) { - struct cfg80211_coalesce *coalesce = rdev->coalesce; int i, j; struct cfg80211_coalesce_rules *rule; @@ -13908,13 +13907,13 @@ void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev) for (i = 0; i < coalesce->n_rules; i++) { rule = &coalesce->rules[i]; + if (!rule) + continue; for (j = 0; j < rule->n_patterns; j++) kfree(rule->patterns[j].mask); kfree(rule->patterns); } - kfree(coalesce->rules); kfree(coalesce); - rdev->coalesce = NULL; } static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, @@ -14012,17 +14011,16 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; - struct cfg80211_coalesce new_coalesce = {}; - struct cfg80211_coalesce *n_coalesce; - int err, rem_rule, n_rules = 0, i, j; + struct cfg80211_coalesce *new_coalesce; + int err, rem_rule, n_rules = 0, i; struct nlattr *rule; - struct cfg80211_coalesce_rules *tmp_rule; if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { - cfg80211_rdev_free_coalesce(rdev); + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = NULL; rdev_set_coalesce(rdev, NULL); return 0; } @@ -14033,47 +14031,34 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) if (n_rules > coalesce->n_rules) return -EINVAL; - new_coalesce.rules = kcalloc(n_rules, sizeof(new_coalesce.rules[0]), - GFP_KERNEL); - if (!new_coalesce.rules) + new_coalesce = kzalloc(struct_size(new_coalesce, rules, n_rules), + GFP_KERNEL); + if (!new_coalesce) return -ENOMEM; - new_coalesce.n_rules = n_rules; + new_coalesce->n_rules = n_rules; i = 0; nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) { err = nl80211_parse_coalesce_rule(rdev, rule, - &new_coalesce.rules[i]); + &new_coalesce->rules[i]); if (err) goto error; i++; } - err = rdev_set_coalesce(rdev, &new_coalesce); + err = rdev_set_coalesce(rdev, new_coalesce); if (err) goto error; - n_coalesce = kmemdup(&new_coalesce, sizeof(new_coalesce), GFP_KERNEL); - if (!n_coalesce) { - err = -ENOMEM; - goto error; - } - cfg80211_rdev_free_coalesce(rdev); - rdev->coalesce = n_coalesce; + cfg80211_free_coalesce(rdev->coalesce); + rdev->coalesce = new_coalesce; return 0; error: - for (i = 0; i < new_coalesce.n_rules; i++) { - tmp_rule = &new_coalesce.rules[i]; - if (!tmp_rule) - continue; - for (j = 0; j < tmp_rule->n_patterns; j++) - kfree(tmp_rule->patterns[j].mask); - kfree(tmp_rule->patterns); - } - kfree(new_coalesce.rules); + cfg80211_free_coalesce(new_coalesce); return err; } diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 6376f3a87f8a..ffaab9a92e5b 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file - * Copyright (C) 2018, 2020-2022 Intel Corporation + * Copyright (C) 2018, 2020-2024 Intel Corporation */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -119,7 +119,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id); -void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev); +void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce); /* peer measurement */ int nl80211_pmsr_start(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From ed7ee6a69f9289337af4835a908aa782263d4852 Mon Sep 17 00:00:00 2001 From: John Garry Date: Wed, 29 May 2024 10:39:01 +0200 Subject: statx: Update offset commentary for struct statx In commit 2a82bb02941f ("statx: stx_subvol"), a new member was added to struct statx, but the offset comment was not correct. Update it. Signed-off-by: John Garry Link: https://lore.kernel.org/r/20240529081725.3769290-1-john.g.garry@oracle.com Signed-off-by: Christian Brauner --- include/uapi/linux/stat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 67626d535316..95770941ee2c 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -126,8 +126,8 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ + __u64 stx_subvol; /* Subvolume identifier */ __u64 __spare3[11]; /* Spare space for future expansion */ /* 0x100 */ }; -- cgit v1.2.3 From 1deba6e24c221c61c6eab8656a53f8c17035932b Mon Sep 17 00:00:00 2001 From: Brent Lu Date: Mon, 27 May 2024 14:35:47 -0500 Subject: ASoC: SOF: sof-audio: add sof_dai_get_tdm_slots function An new interface, sof_dai_get_tdm_slots(), is added for machine driver to get tdm slot number from topology. The dai_get_param() callback needs to support new parameter type SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS by returning the tdm slot number of specific SSP port. Reviewed-by: Bard Liao Signed-off-by: Brent Lu Signed-off-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240527193552.165567-14-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- include/sound/sof.h | 1 + sound/soc/sof/sof-audio.c | 10 ++++++++++ sound/soc/sof/sof-audio.h | 5 +++-- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/sound/sof.h b/include/sound/sof.h index ec6c30d54592..64fd5504cb2b 100644 --- a/include/sound/sof.h +++ b/include/sound/sof.h @@ -173,5 +173,6 @@ struct sof_dev_desc { int sof_dai_get_mclk(struct snd_soc_pcm_runtime *rtd); int sof_dai_get_bclk(struct snd_soc_pcm_runtime *rtd); +int sof_dai_get_tdm_slots(struct snd_soc_pcm_runtime *rtd); #endif diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c index 2421a115747e..881eec38c2e2 100644 --- a/sound/soc/sof/sof-audio.c +++ b/sound/soc/sof/sof-audio.c @@ -1016,3 +1016,13 @@ int sof_dai_get_bclk(struct snd_soc_pcm_runtime *rtd) return sof_dai_get_param(rtd, SOF_DAI_PARAM_INTEL_SSP_BCLK); } EXPORT_SYMBOL(sof_dai_get_bclk); + +/* + * Helper to get SSP TDM slot number from a pcm_runtime. + * Return 0 if not exist. + */ +int sof_dai_get_tdm_slots(struct snd_soc_pcm_runtime *rtd) +{ + return sof_dai_get_param(rtd, SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS); +} +EXPORT_SYMBOL(sof_dai_get_tdm_slots); diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index abb2a00c520d..49be02234fc3 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -44,8 +44,9 @@ #define WIDGET_IS_AIF_OR_DAI(id) (WIDGET_IS_DAI(id) || WIDGET_IS_AIF(id)) #define WIDGET_IS_COPIER(id) (WIDGET_IS_AIF_OR_DAI(id) || (id) == snd_soc_dapm_buffer) -#define SOF_DAI_PARAM_INTEL_SSP_MCLK 0 -#define SOF_DAI_PARAM_INTEL_SSP_BCLK 1 +#define SOF_DAI_PARAM_INTEL_SSP_MCLK 0 +#define SOF_DAI_PARAM_INTEL_SSP_BCLK 1 +#define SOF_DAI_PARAM_INTEL_SSP_TDM_SLOTS 2 enum sof_widget_op { SOF_WIDGET_PREPARE, -- cgit v1.2.3 From cc5ac966f26193ab185cc43d64d9f1ae998ccb6e Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:42:57 +0800 Subject: cachefiles: add output string to cachefiles_obj_[get|put]_ondemand_fd This lets us see the correct trace output. Fixes: c8383054506c ("cachefiles: notify the user daemon when looking up cookie") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-2-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- include/trace/events/cachefiles.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index cf4b98b9a9ed..e3213af847cd 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -127,7 +127,9 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ - E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") + EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ + EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ + E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ -- cgit v1.2.3 From da4a827416066191aafeeccee50a8836a826ba10 Mon Sep 17 00:00:00 2001 From: Baokun Li Date: Wed, 22 May 2024 19:43:00 +0800 Subject: cachefiles: fix slab-use-after-free in cachefiles_ondemand_daemon_read() We got the following issue in a fuzz test of randomly issuing the restore command: ================================================================== BUG: KASAN: slab-use-after-free in cachefiles_ondemand_daemon_read+0xb41/0xb60 Read of size 8 at addr ffff888122e84088 by task ondemand-04-dae/963 CPU: 13 PID: 963 Comm: ondemand-04-dae Not tainted 6.8.0-dirty #564 Call Trace: kasan_report+0x93/0xc0 cachefiles_ondemand_daemon_read+0xb41/0xb60 vfs_read+0x169/0xb50 ksys_read+0xf5/0x1e0 Allocated by task 116: kmem_cache_alloc+0x140/0x3a0 cachefiles_lookup_cookie+0x140/0xcd0 fscache_cookie_state_machine+0x43c/0x1230 [...] Freed by task 792: kmem_cache_free+0xfe/0x390 cachefiles_put_object+0x241/0x480 fscache_cookie_state_machine+0x5c8/0x1230 [...] ================================================================== Following is the process that triggers the issue: mount | daemon_thread1 | daemon_thread2 ------------------------------------------------------------ cachefiles_withdraw_cookie cachefiles_ondemand_clean_object(object) cachefiles_ondemand_send_req REQ_A = kzalloc(sizeof(*req) + data_len) wait_for_completion(&REQ_A->done) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req msg->object_id = req->object->ondemand->ondemand_id ------ restore ------ cachefiles_ondemand_restore xas_for_each(&xas, req, ULONG_MAX) xas_set_mark(&xas, CACHEFILES_REQ_NEW) cachefiles_daemon_read cachefiles_ondemand_daemon_read REQ_A = cachefiles_ondemand_select_req copy_to_user(_buffer, msg, n) xa_erase(&cache->reqs, id) complete(&REQ_A->done) ------ close(fd) ------ cachefiles_ondemand_fd_release cachefiles_put_object cachefiles_put_object kmem_cache_free(cachefiles_object_jar, object) REQ_A->object->ondemand->ondemand_id // object UAF !!! When we see the request within xa_lock, req->object must not have been freed yet, so grab the reference count of object before xa_unlock to avoid the above issue. Fixes: 0a7e54c1959c ("cachefiles: resend an open request if the read request's object is closed") Signed-off-by: Baokun Li Link: https://lore.kernel.org/r/20240522114308.2402121-5-libaokun@huaweicloud.com Acked-by: Jeff Layton Reviewed-by: Jia Zhu Reviewed-by: Jingbo Xu Signed-off-by: Christian Brauner --- fs/cachefiles/ondemand.c | 3 +++ include/trace/events/cachefiles.h | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index c011fb24d238..3dd002108a87 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -337,6 +337,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, xas_clear_mark(&xas, CACHEFILES_REQ_NEW); cache->req_id_next = xas.xa_index + 1; refcount_inc(&req->ref); + cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); xa_unlock(&cache->reqs); id = xas.xa_index; @@ -357,6 +358,7 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, goto err_put_fd; } + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); /* CLOSE request has no reply */ if (msg->opcode == CACHEFILES_OP_CLOSE) { xa_erase(&cache->reqs, id); @@ -370,6 +372,7 @@ err_put_fd: if (msg->opcode == CACHEFILES_OP_OPEN) close_fd(((struct cachefiles_open *)msg->data)->fd); error: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); xas_reset(&xas); xas_lock(&xas); if (xas_load(&xas) == req) { diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index e3213af847cd..7d931db02b93 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace { cachefiles_obj_see_withdrawal, cachefiles_obj_get_ondemand_fd, cachefiles_obj_put_ondemand_fd, + cachefiles_obj_get_read_req, + cachefiles_obj_put_read_req, }; enum fscache_why_object_killed { @@ -129,7 +131,9 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ - E_(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") + EM(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") \ + EM(cachefiles_obj_get_read_req, "GET read_req") \ + E_(cachefiles_obj_put_read_req, "PUT read_req") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ -- cgit v1.2.3 From 7c327d56597d8de1680cf24e956b704270d3d84a Mon Sep 17 00:00:00 2001 From: Richard Maina Date: Wed, 29 May 2024 11:09:55 -0700 Subject: hwspinlock: Introduce hwspin_lock_bust() When a remoteproc crashes or goes down unexpectedly this can result in a state where locks held by the remoteproc will remain locked possibly resulting in deadlock. This new API hwspin_lock_bust() allows hwspinlock implementers to define a bust operation for freeing previously acquired hwspinlocks after verifying ownership of the acquired lock. Signed-off-by: Richard Maina Reviewed-by: Bjorn Andersson Signed-off-by: Chris Lew Link: https://lore.kernel.org/r/20240529-hwspinlock-bust-v3-1-c8b924ffa5a2@quicinc.com Signed-off-by: Bjorn Andersson --- Documentation/locking/hwspinlock.rst | 11 +++++++++++ drivers/hwspinlock/hwspinlock_core.c | 28 ++++++++++++++++++++++++++++ drivers/hwspinlock/hwspinlock_internal.h | 3 +++ include/linux/hwspinlock.h | 6 ++++++ 4 files changed, 48 insertions(+) (limited to 'include') diff --git a/Documentation/locking/hwspinlock.rst b/Documentation/locking/hwspinlock.rst index 6f03713b7003..2ffaa3cbd63f 100644 --- a/Documentation/locking/hwspinlock.rst +++ b/Documentation/locking/hwspinlock.rst @@ -85,6 +85,17 @@ is already free). Should be called from a process context (might sleep). +:: + + int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id); + +After verifying the owner of the hwspinlock, release a previously acquired +hwspinlock; returns 0 on success, or an appropriate error code on failure +(e.g. -EOPNOTSUPP if the bust operation is not defined for the specific +hwspinlock). + +Should be called from a process context (might sleep). + :: int hwspin_lock_timeout(struct hwspinlock *hwlock, unsigned int timeout); diff --git a/drivers/hwspinlock/hwspinlock_core.c b/drivers/hwspinlock/hwspinlock_core.c index 0c0a932c00f3..6505261e6068 100644 --- a/drivers/hwspinlock/hwspinlock_core.c +++ b/drivers/hwspinlock/hwspinlock_core.c @@ -305,6 +305,34 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) } EXPORT_SYMBOL_GPL(__hwspin_unlock); +/** + * hwspin_lock_bust() - bust a specific hwspinlock + * @hwlock: a previously-acquired hwspinlock which we want to bust + * @id: identifier of the remote lock holder, if applicable + * + * This function will bust a hwspinlock that was previously acquired as + * long as the current owner of the lock matches the id given by the caller. + * + * Context: Process context. + * + * Returns: 0 on success, or -EINVAL if the hwspinlock does not exist, or + * the bust operation fails, and -EOPNOTSUPP if the bust operation is not + * defined for the hwspinlock. + */ +int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id) +{ + if (WARN_ON(!hwlock)) + return -EINVAL; + + if (!hwlock->bank->ops->bust) { + pr_err("bust operation not defined\n"); + return -EOPNOTSUPP; + } + + return hwlock->bank->ops->bust(hwlock, id); +} +EXPORT_SYMBOL_GPL(hwspin_lock_bust); + /** * of_hwspin_lock_simple_xlate - translate hwlock_spec to return a lock id * @hwlock_spec: hwlock specifier as found in the device tree diff --git a/drivers/hwspinlock/hwspinlock_internal.h b/drivers/hwspinlock/hwspinlock_internal.h index 29892767bb7a..f298fc0ee5ad 100644 --- a/drivers/hwspinlock/hwspinlock_internal.h +++ b/drivers/hwspinlock/hwspinlock_internal.h @@ -21,6 +21,8 @@ struct hwspinlock_device; * @trylock: make a single attempt to take the lock. returns 0 on * failure and true on success. may _not_ sleep. * @unlock: release the lock. always succeed. may _not_ sleep. + * @bust: optional, platform-specific bust handler, called by hwspinlock + * core to bust a specific lock. * @relax: optional, platform-specific relax handler, called by hwspinlock * core while spinning on a lock, between two successive * invocations of @trylock. may _not_ sleep. @@ -28,6 +30,7 @@ struct hwspinlock_device; struct hwspinlock_ops { int (*trylock)(struct hwspinlock *lock); void (*unlock)(struct hwspinlock *lock); + int (*bust)(struct hwspinlock *lock, unsigned int id); void (*relax)(struct hwspinlock *lock); }; diff --git a/include/linux/hwspinlock.h b/include/linux/hwspinlock.h index bfe7c1f1ac6d..f0231dbc4777 100644 --- a/include/linux/hwspinlock.h +++ b/include/linux/hwspinlock.h @@ -68,6 +68,7 @@ int __hwspin_lock_timeout(struct hwspinlock *, unsigned int, int, int __hwspin_trylock(struct hwspinlock *, int, unsigned long *); void __hwspin_unlock(struct hwspinlock *, int, unsigned long *); int of_hwspin_lock_get_id_byname(struct device_node *np, const char *name); +int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id); int devm_hwspin_lock_free(struct device *dev, struct hwspinlock *hwlock); struct hwspinlock *devm_hwspin_lock_request(struct device *dev); struct hwspinlock *devm_hwspin_lock_request_specific(struct device *dev, @@ -127,6 +128,11 @@ void __hwspin_unlock(struct hwspinlock *hwlock, int mode, unsigned long *flags) { } +static inline int hwspin_lock_bust(struct hwspinlock *hwlock, unsigned int id) +{ + return 0; +} + static inline int of_hwspin_lock_get_id(struct device_node *np, int index) { return 0; -- cgit v1.2.3 From 2e3f0d693875db698891ffe89a18121bda5b95b8 Mon Sep 17 00:00:00 2001 From: Chris Lew Date: Wed, 29 May 2024 11:09:57 -0700 Subject: soc: qcom: smem: Add qcom_smem_bust_hwspin_lock_by_host() Add qcom_smem_bust_hwspin_lock_by_host to enable remoteproc to bust the hwspin_lock owned by smem. In the event the remoteproc crashes unexpectedly, the remoteproc driver can invoke this API to try and bust the hwspin_lock and release the lock if still held by the remoteproc device. Signed-off-by: Chris Lew Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/20240529-hwspinlock-bust-v3-3-c8b924ffa5a2@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/smem.c | 26 ++++++++++++++++++++++++++ include/linux/soc/qcom/smem.h | 2 ++ 2 files changed, 28 insertions(+) (limited to 'include') diff --git a/drivers/soc/qcom/smem.c b/drivers/soc/qcom/smem.c index 7191fa0c087f..50039e983eba 100644 --- a/drivers/soc/qcom/smem.c +++ b/drivers/soc/qcom/smem.c @@ -359,6 +359,32 @@ static struct qcom_smem *__smem; /* Timeout (ms) for the trylock of remote spinlocks */ #define HWSPINLOCK_TIMEOUT 1000 +/* The qcom hwspinlock id is always plus one from the smem host id */ +#define SMEM_HOST_ID_TO_HWSPINLOCK_ID(__x) ((__x) + 1) + +/** + * qcom_smem_bust_hwspin_lock_by_host() - bust the smem hwspinlock for a host + * @host: remote processor id + * + * Busts the hwspin_lock for the given smem host id. This helper is intended + * for remoteproc drivers that manage remoteprocs with an equivalent smem + * driver instance in the remote firmware. Drivers can force a release of the + * smem hwspin_lock if the rproc unexpectedly goes into a bad state. + * + * Context: Process context. + * + * Returns: 0 on success, otherwise negative errno. + */ +int qcom_smem_bust_hwspin_lock_by_host(unsigned int host) +{ + /* This function is for remote procs, so ignore SMEM_HOST_APPS */ + if (host == SMEM_HOST_APPS || host >= SMEM_HOST_COUNT) + return -EINVAL; + + return hwspin_lock_bust(__smem->hwlock, SMEM_HOST_ID_TO_HWSPINLOCK_ID(host)); +} +EXPORT_SYMBOL_GPL(qcom_smem_bust_hwspin_lock_by_host); + /** * qcom_smem_is_available() - Check if SMEM is available * diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index a36a3b9d4929..03187bc95851 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -14,4 +14,6 @@ phys_addr_t qcom_smem_virt_to_phys(void *p); int qcom_smem_get_soc_id(u32 *id); +int qcom_smem_bust_hwspin_lock_by_host(unsigned int host); + #endif -- cgit v1.2.3 From 5e514f1cba090e1c8fff03e92a175eccfe46305f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 May 2024 12:52:50 +0000 Subject: tcp: add tcp_done_with_error() helper tcp_reset() ends with a sequence that is carefuly ordered. We need to fix [e]poll bugs in the following patches, it makes sense to use a common helper. Suggested-by: Neal Cardwell Signed-off-by: Eric Dumazet Acked-by: Neal Cardwell Link: https://lore.kernel.org/r/20240528125253.1966136-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/tcp.h | 1 + net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 32 +++++++++++++++++++++----------- 3 files changed, 23 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 060e95b331a2..32815a40dea1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -677,6 +677,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); +void tcp_done_with_error(struct sock *sk, int err); void tcp_reset(struct sock *sk, struct sk_buff *skb); void tcp_fin(struct sock *sk); void tcp_check_space(struct sock *sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 681b54e1f3a6..2a8f8d8676ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -598,7 +598,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) */ mask |= EPOLLOUT | EPOLLWRNORM; } - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in tcp_done_with_error() */ smp_rmb(); if (READ_ONCE(sk->sk_err) || !skb_queue_empty_lockless(&sk->sk_error_queue)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c04a9c8be9d..5aadf64e554d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4436,9 +4436,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp, return SKB_NOT_DROPPED_YET; } + +void tcp_done_with_error(struct sock *sk, int err) +{ + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + WRITE_ONCE(sk->sk_err, err); + smp_wmb(); + + tcp_write_queue_purge(sk); + tcp_done(sk); + + if (!sock_flag(sk, SOCK_DEAD)) + sk_error_report(sk); +} +EXPORT_SYMBOL(tcp_done_with_error); + /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) { + int err; + trace_tcp_receive_reset(sk); /* mptcp can't tell us to ignore reset pkts, @@ -4450,24 +4467,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb) /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: - WRITE_ONCE(sk->sk_err, ECONNREFUSED); + err = ECONNREFUSED; break; case TCP_CLOSE_WAIT: - WRITE_ONCE(sk->sk_err, EPIPE); + err = EPIPE; break; case TCP_CLOSE: return; default: - WRITE_ONCE(sk->sk_err, ECONNRESET); + err = ECONNRESET; } - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - - tcp_write_queue_purge(sk); - tcp_done(sk); - - if (!sock_flag(sk, SOCK_DEAD)) - sk_error_report(sk); + tcp_done_with_error(sk, err); } /* -- cgit v1.2.3 From bbb31b7ae14594aa2a7e74923ee38f312404ad66 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 28 May 2024 16:05:09 +0100 Subject: net: dsa: remove mac_prepare()/mac_finish() shims No DSA driver makes use of the mac_prepare()/mac_finish() shimmed operations anymore, so we can remove these. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/E1sByNx-00ELW1-Vp@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 6 ------ net/dsa/dsa.c | 2 -- net/dsa/port.c | 32 -------------------------------- 3 files changed, 40 deletions(-) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index b60e7e410aba..f9ae3ca66b6f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -882,15 +882,9 @@ struct dsa_switch_ops { struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); - int (*phylink_mac_prepare)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_config)(struct dsa_switch *ds, int port, unsigned int mode, const struct phylink_link_state *state); - int (*phylink_mac_finish)(struct dsa_switch *ds, int port, - unsigned int mode, - phy_interface_t interface); void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, unsigned int mode, phy_interface_t interface); diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 12521a7d4048..668c729946ea 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1507,9 +1507,7 @@ static int dsa_switch_probe(struct dsa_switch *ds) if (ds->phylink_mac_ops) { if (ds->ops->phylink_mac_select_pcs || - ds->ops->phylink_mac_prepare || ds->ops->phylink_mac_config || - ds->ops->phylink_mac_finish || ds->ops->phylink_mac_link_down || ds->ops->phylink_mac_link_up) return -EINVAL; diff --git a/net/dsa/port.c b/net/dsa/port.c index 9a249d4ac3a5..e23db9507546 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1549,21 +1549,6 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config, return pcs; } -static int dsa_port_phylink_mac_prepare(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - int err = 0; - - if (ds->ops->phylink_mac_prepare) - err = ds->ops->phylink_mac_prepare(ds, dp->index, mode, - interface); - - return err; -} - static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) @@ -1577,21 +1562,6 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config, ds->ops->phylink_mac_config(ds, dp->index, mode, state); } -static int dsa_port_phylink_mac_finish(struct phylink_config *config, - unsigned int mode, - phy_interface_t interface) -{ - struct dsa_port *dp = dsa_phylink_to_port(config); - struct dsa_switch *ds = dp->ds; - int err = 0; - - if (ds->ops->phylink_mac_finish) - err = ds->ops->phylink_mac_finish(ds, dp->index, mode, - interface); - - return err; -} - static void dsa_port_phylink_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) @@ -1624,9 +1594,7 @@ static void dsa_port_phylink_mac_link_up(struct phylink_config *config, static const struct phylink_mac_ops dsa_port_phylink_mac_ops = { .mac_select_pcs = dsa_port_phylink_mac_select_pcs, - .mac_prepare = dsa_port_phylink_mac_prepare, .mac_config = dsa_port_phylink_mac_config, - .mac_finish = dsa_port_phylink_mac_finish, .mac_link_down = dsa_port_phylink_mac_link_down, .mac_link_up = dsa_port_phylink_mac_link_up, }; -- cgit v1.2.3 From 6f6bfbc595fbae95f4c1ff80c87d089604e5e6a1 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Mon, 27 May 2024 23:11:37 -0700 Subject: RDMA/bnxt_re: Expose the MSN table capability for user library BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED was introduced to share the HW retransmit capability between driver and lib. The main difference in implementation for HW Retransmit support is the usage of MSN table or PSN table . When HW retrans is enabled, HW expects MSN table to be allocated by driver/lib, else PSN table (for older adapters). FW expose a new field which gives MSN capability. Drivers and libs can depend on the new field instead of HW Retrasns capability. For adapters which support HW_RETX feature, MSN table capability will be set. For older adapters, this value will be 0(to maintain backward compatibility with older FW). Rename UAPI just to capture the correct name of the HW capability that driver/library is interested in. No functional impact even if older rdma-core is used. Signed-off-by: Selvin Xavier Link: https://lore.kernel.org/r/1716876697-25970-3-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 3 +++ include/uapi/rdma/bnxt_re-abi.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index ce9c5bae83bf..d261b09025ca 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -4201,6 +4201,9 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) if (rdev->pacing.dbr_pacing) resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED; + if (_is_host_msn_table(rdev->qplib_res.dattr->dev_cap_flags2)) + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED; + if (udata->inlen >= sizeof(ureq)) { rc = ib_copy_from_udata(&ureq, udata, min(udata->inlen, sizeof(ureq))); if (rc) diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index c0c34aca90ec..e61104f35d73 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -55,7 +55,7 @@ enum { BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL, - BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40, + BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40, }; enum bnxt_re_wqe_mode { -- cgit v1.2.3 From fdefb918496235a11d6c5477c34c81aab2c1343b Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Wed, 22 May 2024 01:24:01 -0700 Subject: RDMA/mana_ib: Implement uapi to create and destroy RC QP Implement user requests to create and destroy an RC QP. As the user does not have an FMR queue, it is skipped and NO_FMR flag is used. Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1716366242-558-3-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Zhu Yanjun Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/mana_ib.h | 4 ++ drivers/infiniband/hw/mana/qp.c | 94 +++++++++++++++++++++++++++++++++++- include/uapi/rdma/mana-abi.h | 9 ++++ 3 files changed, 105 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index a3e229c83c5c..5cccbe397b6d 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -248,6 +248,10 @@ struct mana_rnic_destroy_cq_resp { struct gdma_resp_hdr hdr; }; /* HW Data */ +enum mana_rnic_create_rc_flags { + MANA_RC_FLAG_NO_FMR = 2, +}; + struct mana_rnic_create_qp_req { struct gdma_req_hdr hdr; mana_handle_t adapter; diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index ba13c5abf8ef..7dbeba41b978 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -398,6 +398,78 @@ err_free_vport: return err; } +static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, + struct ib_qp_init_attr *attr, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = container_of(ibpd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct mana_ib_create_rc_qp_resp resp = {}; + struct mana_ib_ucontext *mana_ucontext; + struct mana_ib_create_rc_qp ucmd = {}; + int i, err, j; + u64 flags = 0; + u32 doorbell; + + if (!udata || udata->inlen < sizeof(ucmd)) + return -EINVAL; + + mana_ucontext = rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, ibucontext); + doorbell = mana_ucontext->doorbell; + flags = MANA_RC_FLAG_NO_FMR; + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy from udata, %d\n", err); + return err; + } + + for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) { + /* skip FMR for user-level RC QPs */ + if (i == MANA_RC_SEND_QUEUE_FMR) { + qp->rc_qp.queues[i].id = INVALID_QUEUE_ID; + qp->rc_qp.queues[i].gdma_region = GDMA_INVALID_DMA_REGION; + continue; + } + err = mana_ib_create_queue(mdev, ucmd.queue_buf[j], ucmd.queue_size[j], + &qp->rc_qp.queues[i]); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create queue %d, err %d\n", i, err); + goto destroy_queues; + } + j++; + } + + err = mana_ib_gd_create_rc_qp(mdev, qp, attr, doorbell, flags); + if (err) { + ibdev_err(&mdev->ib_dev, "Failed to create rc qp %d\n", err); + goto destroy_queues; + } + qp->ibqp.qp_num = qp->rc_qp.queues[MANA_RC_RECV_QUEUE_RESPONDER].id; + qp->port = attr->port_num; + + if (udata) { + for (i = 0, j = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) { + if (i == MANA_RC_SEND_QUEUE_FMR) + continue; + resp.queue_id[j] = qp->rc_qp.queues[i].id; + j++; + } + err = ib_copy_to_udata(udata, &resp, min(sizeof(resp), udata->outlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, "Failed to copy to udata, %d\n", err); + goto destroy_qp; + } + } + + return 0; + +destroy_qp: + mana_ib_gd_destroy_rc_qp(mdev, qp); +destroy_queues: + while (i-- > 0) + mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]); + return err; +} + int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -409,8 +481,9 @@ int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, udata); return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); + case IB_QPT_RC: + return mana_ib_create_rc_qp(ibqp, ibqp->pd, attr, udata); default: - /* Creating QP other than IB_QPT_RAW_PACKET is not supported */ ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", attr->qp_type); } @@ -473,6 +546,22 @@ static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) return 0; } +static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + int i; + + /* Ignore return code as there is not much we can do about it. + * The error message is printed inside. + */ + mana_ib_gd_destroy_rc_qp(mdev, qp); + for (i = 0; i < MANA_RC_QUEUE_TYPE_MAX; ++i) + mana_ib_destroy_queue(mdev, &qp->rc_qp.queues[i]); + + return 0; +} + int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); @@ -484,7 +573,8 @@ int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) udata); return mana_ib_destroy_qp_raw(qp, udata); - + case IB_QPT_RC: + return mana_ib_destroy_rc_qp(qp, udata); default: ibdev_dbg(ibqp->device, "Unexpected QP type %u\n", ibqp->qp_type); diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h index 2c41cc315218..45c2df619f07 100644 --- a/include/uapi/rdma/mana-abi.h +++ b/include/uapi/rdma/mana-abi.h @@ -45,6 +45,15 @@ struct mana_ib_create_qp_resp { __u32 reserved; }; +struct mana_ib_create_rc_qp { + __aligned_u64 queue_buf[4]; + __u32 queue_size[4]; +}; + +struct mana_ib_create_rc_qp_resp { + __u32 queue_id[4]; +}; + struct mana_ib_create_wq { __aligned_u64 wq_buf_addr; __u32 wq_buf_size; -- cgit v1.2.3 From 73fc975318e0ab3385c5b3372c7b296ae58c8d6b Mon Sep 17 00:00:00 2001 From: Durai Manickam KR Date: Wed, 24 Apr 2024 11:03:45 +0530 Subject: drm: atmel-hlcdc: Define XLCDC specific registers The register address of the XLCDC IP used in SAM9X7 SoC family are different from the previous HLCDC. Defining those address space with valid macros. Signed-off-by: Durai Manickam KR [manikandan.m@microchip.com: Remove unused macro definitions] Signed-off-by: Manikandan Muralidharan Acked-by: Lee Jones Acked-by: Sam Ravnborg Signed-off-by: Sam Ravnborg Link: https://patchwork.freedesktop.org/patch/msgid/20240424053351.589830-3-manikandan.m@microchip.com --- drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h | 42 ++++++++++++++++++++++++++++ include/linux/mfd/atmel-hlcdc.h | 10 +++++++ 2 files changed, 52 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h index d0ecf0f58cce..c32e5c8809d7 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h @@ -15,6 +15,7 @@ #include +/* LCD controller common registers */ #define ATMEL_HLCDC_LAYER_CHER 0x0 #define ATMEL_HLCDC_LAYER_CHDR 0x4 #define ATMEL_HLCDC_LAYER_CHSR 0x8 @@ -128,6 +129,47 @@ #define ATMEL_HLCDC_MAX_LAYERS 6 +/* XLCDC controller specific registers */ +#define ATMEL_XLCDC_LAYER_ENR 0x10 +#define ATMEL_XLCDC_LAYER_EN BIT(0) + +#define ATMEL_XLCDC_LAYER_IER 0x0 +#define ATMEL_XLCDC_LAYER_IDR 0x4 +#define ATMEL_XLCDC_LAYER_ISR 0xc +#define ATMEL_XLCDC_LAYER_OVR_IRQ(p) BIT(2 + (8 * (p))) + +#define ATMEL_XLCDC_LAYER_PLANE_ADDR(p) (((p) * 0x4) + 0x18) + +#define ATMEL_XLCDC_LAYER_DMA_CFG 0 + +#define ATMEL_XLCDC_LAYER_DMA BIT(0) +#define ATMEL_XLCDC_LAYER_REP BIT(1) +#define ATMEL_XLCDC_LAYER_DISCEN BIT(4) + +#define ATMEL_XLCDC_LAYER_SFACTC_A0_MULT_AS (4 << 6) +#define ATMEL_XLCDC_LAYER_SFACTA_ONE BIT(9) +#define ATMEL_XLCDC_LAYER_DFACTC_M_A0_MULT_AS (6 << 11) +#define ATMEL_XLCDC_LAYER_DFACTA_ONE BIT(14) + +#define ATMEL_XLCDC_LAYER_A0_SHIFT 16 +#define ATMEL_XLCDC_LAYER_A0(x) \ + ((x) << ATMEL_XLCDC_LAYER_A0_SHIFT) + +#define ATMEL_XLCDC_LAYER_VSCALER_LUMA_ENABLE BIT(0) +#define ATMEL_XLCDC_LAYER_VSCALER_CHROMA_ENABLE BIT(1) +#define ATMEL_XLCDC_LAYER_HSCALER_LUMA_ENABLE BIT(4) +#define ATMEL_XLCDC_LAYER_HSCALER_CHROMA_ENABLE BIT(5) + +#define ATMEL_XLCDC_LAYER_VXSYCFG_ONE BIT(0) +#define ATMEL_XLCDC_LAYER_VXSYTAP2_ENABLE BIT(4) +#define ATMEL_XLCDC_LAYER_VXSCCFG_ONE BIT(16) +#define ATMEL_XLCDC_LAYER_VXSCTAP2_ENABLE BIT(20) + +#define ATMEL_XLCDC_LAYER_HXSYCFG_ONE BIT(0) +#define ATMEL_XLCDC_LAYER_HXSYTAP2_ENABLE BIT(4) +#define ATMEL_XLCDC_LAYER_HXSCCFG_ONE BIT(16) +#define ATMEL_XLCDC_LAYER_HXSCTAP2_ENABLE BIT(20) + /** * Atmel HLCDC Layer registers layout structure * diff --git a/include/linux/mfd/atmel-hlcdc.h b/include/linux/mfd/atmel-hlcdc.h index a186119a49b5..80d675a03b39 100644 --- a/include/linux/mfd/atmel-hlcdc.h +++ b/include/linux/mfd/atmel-hlcdc.h @@ -22,6 +22,8 @@ #define ATMEL_HLCDC_DITHER BIT(6) #define ATMEL_HLCDC_DISPDLY BIT(7) #define ATMEL_HLCDC_MODE_MASK GENMASK(9, 8) +#define ATMEL_XLCDC_MODE_MASK GENMASK(10, 8) +#define ATMEL_XLCDC_DPI BIT(11) #define ATMEL_HLCDC_PP BIT(10) #define ATMEL_HLCDC_VSPSU BIT(12) #define ATMEL_HLCDC_VSPHO BIT(13) @@ -34,6 +36,12 @@ #define ATMEL_HLCDC_IDR 0x30 #define ATMEL_HLCDC_IMR 0x34 #define ATMEL_HLCDC_ISR 0x38 +#define ATMEL_XLCDC_ATTRE 0x3c + +#define ATMEL_XLCDC_BASE_UPDATE BIT(0) +#define ATMEL_XLCDC_OVR1_UPDATE BIT(1) +#define ATMEL_XLCDC_OVR3_UPDATE BIT(2) +#define ATMEL_XLCDC_HEO_UPDATE BIT(3) #define ATMEL_HLCDC_CLKPOL BIT(0) #define ATMEL_HLCDC_CLKSEL BIT(2) @@ -48,6 +56,8 @@ #define ATMEL_HLCDC_DISP BIT(2) #define ATMEL_HLCDC_PWM BIT(3) #define ATMEL_HLCDC_SIP BIT(4) +#define ATMEL_XLCDC_SD BIT(5) +#define ATMEL_XLCDC_CM BIT(6) #define ATMEL_HLCDC_SOF BIT(0) #define ATMEL_HLCDC_SYNCDIS BIT(1) -- cgit v1.2.3 From 2c7afc2a880cd4899f9dd6bfa62f10f84773148b Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Thu, 16 May 2024 22:17:31 +0100 Subject: kunit: Cover 'assert.c' with tests There are multiple assertion formatting functions in the `assert.c` file, which are not covered with tests yet. Implement the KUnit test for these functions. The test consists of 11 test cases for the following functions: 1) 'is_literal' 2) 'is_str_literal' 3) 'kunit_assert_prologue', test case for multiple assert types 4) 'kunit_assert_print_msg' 5) 'kunit_unary_assert_format' 6) 'kunit_ptr_not_err_assert_format' 7) 'kunit_binary_assert_format' 8) 'kunit_binary_ptr_assert_format' 9) 'kunit_binary_str_assert_format' 10) 'kunit_assert_hexdump' 11) 'kunit_mem_assert_format' The test aims at maximizing the branch coverage for the assertion formatting functions. As you can see, it covers some of the static helper functions as well, so mark the static functions in `assert.c` as 'VISIBLE_IF_KUNIT' and conditionally export them with EXPORT_SYMBOL_IF_KUNIT. Add the corresponding definitions to `assert.h`. Build the assert test when CONFIG_KUNIT_TEST is enabled, similar to how it is done for the string stream test. Signed-off-by: Ivan Orlov Reviewed-by: Rae Moar Acked-by: David Gow Signed-off-by: Shuah Khan --- include/kunit/assert.h | 11 ++ lib/kunit/Makefile | 1 + lib/kunit/assert.c | 19 ++- lib/kunit/assert_test.c | 388 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 411 insertions(+), 8 deletions(-) create mode 100644 lib/kunit/assert_test.c (limited to 'include') diff --git a/include/kunit/assert.h b/include/kunit/assert.h index 24c2b9fa61e8..7e7490a74b13 100644 --- a/include/kunit/assert.h +++ b/include/kunit/assert.h @@ -218,4 +218,15 @@ void kunit_mem_assert_format(const struct kunit_assert *assert, const struct va_format *message, struct string_stream *stream); +#if IS_ENABLED(CONFIG_KUNIT) +void kunit_assert_print_msg(const struct va_format *message, + struct string_stream *stream); +bool is_literal(const char *text, long long value); +bool is_str_literal(const char *text, const char *value); +void kunit_assert_hexdump(struct string_stream *stream, + const void *buf, + const void *compared_buf, + const size_t len); +#endif + #endif /* _KUNIT_ASSERT_H */ diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile index 309659a32a78..900e6447c8e8 100644 --- a/lib/kunit/Makefile +++ b/lib/kunit/Makefile @@ -22,6 +22,7 @@ obj-$(CONFIG_KUNIT_TEST) += kunit-test.o # string-stream-test compiles built-in only. ifeq ($(CONFIG_KUNIT_TEST),y) obj-$(CONFIG_KUNIT_TEST) += string-stream-test.o +obj-$(CONFIG_KUNIT_TEST) += assert_test.o endif obj-$(CONFIG_KUNIT_EXAMPLE_TEST) += kunit-example-test.o diff --git a/lib/kunit/assert.c b/lib/kunit/assert.c index dd1d633d0fe2..867aa5c4bccf 100644 --- a/lib/kunit/assert.c +++ b/lib/kunit/assert.c @@ -7,6 +7,7 @@ */ #include #include +#include #include "string-stream.h" @@ -30,8 +31,9 @@ void kunit_assert_prologue(const struct kunit_loc *loc, } EXPORT_SYMBOL_GPL(kunit_assert_prologue); -static void kunit_assert_print_msg(const struct va_format *message, - struct string_stream *stream) +VISIBLE_IF_KUNIT +void kunit_assert_print_msg(const struct va_format *message, + struct string_stream *stream) { if (message->fmt) string_stream_add(stream, "\n%pV", message); @@ -89,7 +91,7 @@ void kunit_ptr_not_err_assert_format(const struct kunit_assert *assert, EXPORT_SYMBOL_GPL(kunit_ptr_not_err_assert_format); /* Checks if `text` is a literal representing `value`, e.g. "5" and 5 */ -static bool is_literal(const char *text, long long value) +VISIBLE_IF_KUNIT bool is_literal(const char *text, long long value) { char *buffer; int len; @@ -166,7 +168,7 @@ EXPORT_SYMBOL_GPL(kunit_binary_ptr_assert_format); /* Checks if KUNIT_EXPECT_STREQ() args were string literals. * Note: `text` will have ""s where as `value` will not. */ -static bool is_str_literal(const char *text, const char *value) +VISIBLE_IF_KUNIT bool is_str_literal(const char *text, const char *value) { int len; @@ -208,10 +210,11 @@ EXPORT_SYMBOL_GPL(kunit_binary_str_assert_format); /* Adds a hexdump of a buffer to a string_stream comparing it with * a second buffer. The different bytes are marked with <>. */ -static void kunit_assert_hexdump(struct string_stream *stream, - const void *buf, - const void *compared_buf, - const size_t len) +VISIBLE_IF_KUNIT +void kunit_assert_hexdump(struct string_stream *stream, + const void *buf, + const void *compared_buf, + const size_t len) { size_t i; const u8 *buf1 = buf; diff --git a/lib/kunit/assert_test.c b/lib/kunit/assert_test.c new file mode 100644 index 000000000000..4a5967712186 --- /dev/null +++ b/lib/kunit/assert_test.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * KUnit test for the assertion formatting functions. + * Author: Ivan Orlov + */ +#include +#include "string-stream.h" + +#define TEST_PTR_EXPECTED_BUF_SIZE 32 +#define HEXDUMP_TEST_BUF_LEN 5 +#define ASSERT_TEST_EXPECT_CONTAIN(test, str, substr) KUNIT_EXPECT_TRUE(test, strstr(str, substr)) +#define ASSERT_TEST_EXPECT_NCONTAIN(test, str, substr) KUNIT_EXPECT_FALSE(test, strstr(str, substr)) + +static void kunit_test_is_literal(struct kunit *test) +{ + KUNIT_EXPECT_TRUE(test, is_literal("5", 5)); + KUNIT_EXPECT_TRUE(test, is_literal("0", 0)); + KUNIT_EXPECT_TRUE(test, is_literal("1234567890", 1234567890)); + KUNIT_EXPECT_TRUE(test, is_literal("-1234567890", -1234567890)); + KUNIT_EXPECT_FALSE(test, is_literal("05", 5)); + KUNIT_EXPECT_FALSE(test, is_literal("", 0)); + KUNIT_EXPECT_FALSE(test, is_literal("-0", 0)); + KUNIT_EXPECT_FALSE(test, is_literal("12#45", 1245)); +} + +static void kunit_test_is_str_literal(struct kunit *test) +{ + KUNIT_EXPECT_TRUE(test, is_str_literal("\"Hello, World!\"", "Hello, World!")); + KUNIT_EXPECT_TRUE(test, is_str_literal("\"\"", "")); + KUNIT_EXPECT_TRUE(test, is_str_literal("\"\"\"", "\"")); + KUNIT_EXPECT_FALSE(test, is_str_literal("", "")); + KUNIT_EXPECT_FALSE(test, is_str_literal("\"", "\"")); + KUNIT_EXPECT_FALSE(test, is_str_literal("\"Abacaba", "Abacaba")); + KUNIT_EXPECT_FALSE(test, is_str_literal("Abacaba\"", "Abacaba")); + KUNIT_EXPECT_FALSE(test, is_str_literal("\"Abacaba\"", "\"Abacaba\"")); +} + +KUNIT_DEFINE_ACTION_WRAPPER(kfree_wrapper, kfree, const void *); + +/* this function is used to get a "char *" string from the string stream and defer its cleanup */ +static char *get_str_from_stream(struct kunit *test, struct string_stream *stream) +{ + char *str = string_stream_get_string(stream); + + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, str); + kunit_add_action(test, kfree_wrapper, (void *)str); + + return str; +} + +static void kunit_test_assert_prologue(struct kunit *test) +{ + struct string_stream *stream; + char *str; + const struct kunit_loc location = { + .file = "testfile.c", + .line = 1337, + }; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + /* Test an expectation fail prologue */ + kunit_assert_prologue(&location, KUNIT_EXPECTATION, stream); + str = get_str_from_stream(test, stream); + ASSERT_TEST_EXPECT_CONTAIN(test, str, "EXPECTATION"); + ASSERT_TEST_EXPECT_CONTAIN(test, str, "testfile.c"); + ASSERT_TEST_EXPECT_CONTAIN(test, str, "1337"); + + /* Test an assertion fail prologue */ + string_stream_clear(stream); + kunit_assert_prologue(&location, KUNIT_ASSERTION, stream); + str = get_str_from_stream(test, stream); + ASSERT_TEST_EXPECT_CONTAIN(test, str, "ASSERTION"); + ASSERT_TEST_EXPECT_CONTAIN(test, str, "testfile.c"); + ASSERT_TEST_EXPECT_CONTAIN(test, str, "1337"); +} + +/* + * This function accepts an arbitrary count of parameters and generates a va_format struct, + * which can be used to validate kunit_assert_print_msg function + */ +static void verify_assert_print_msg(struct kunit *test, + struct string_stream *stream, + char *expected, const char *format, ...) +{ + va_list list; + const struct va_format vformat = { + .fmt = format, + .va = &list, + }; + + va_start(list, format); + string_stream_clear(stream); + kunit_assert_print_msg(&vformat, stream); + KUNIT_EXPECT_STREQ(test, get_str_from_stream(test, stream), expected); +} + +static void kunit_test_assert_print_msg(struct kunit *test) +{ + struct string_stream *stream; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + verify_assert_print_msg(test, stream, "\nTest", "Test"); + verify_assert_print_msg(test, stream, "\nAbacaba -123 234", "%s %d %u", + "Abacaba", -123, 234U); + verify_assert_print_msg(test, stream, "", NULL); +} + +/* + * Further code contains the tests for different assert format functions. + * This helper function accepts the assert format function, executes it and + * validates the result string from the stream by checking that all of the + * substrings exist in the output. + */ +static void validate_assert(assert_format_t format_func, struct kunit *test, + const struct kunit_assert *assert, + struct string_stream *stream, int num_checks, ...) +{ + size_t i; + va_list checks; + char *cur_substr_exp; + struct va_format message = { NULL, NULL }; + + va_start(checks, num_checks); + string_stream_clear(stream); + format_func(assert, &message, stream); + + for (i = 0; i < num_checks; i++) { + cur_substr_exp = va_arg(checks, char *); + ASSERT_TEST_EXPECT_CONTAIN(test, get_str_from_stream(test, stream), cur_substr_exp); + } +} + +static void kunit_test_unary_assert_format(struct kunit *test) +{ + struct string_stream *stream; + struct kunit_assert assert = {}; + struct kunit_unary_assert un_assert = { + .assert = assert, + .condition = "expr", + .expected_true = true, + }; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + validate_assert(kunit_unary_assert_format, test, &un_assert.assert, + stream, 2, "true", "is false"); + + un_assert.expected_true = false; + validate_assert(kunit_unary_assert_format, test, &un_assert.assert, + stream, 2, "false", "is true"); +} + +static void kunit_test_ptr_not_err_assert_format(struct kunit *test) +{ + struct string_stream *stream; + struct kunit_assert assert = {}; + struct kunit_ptr_not_err_assert not_err_assert = { + .assert = assert, + .text = "expr", + .value = NULL, + }; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + /* Value is NULL. The corresponding message should be printed out */ + validate_assert(kunit_ptr_not_err_assert_format, test, + ¬_err_assert.assert, + stream, 1, "null"); + + /* Value is not NULL, but looks like an error pointer. Error should be printed out */ + not_err_assert.value = (void *)-12; + validate_assert(kunit_ptr_not_err_assert_format, test, + ¬_err_assert.assert, stream, 2, + "error", "-12"); +} + +static void kunit_test_binary_assert_format(struct kunit *test) +{ + struct string_stream *stream; + struct kunit_assert assert = {}; + struct kunit_binary_assert_text text = { + .left_text = "1 + 2", + .operation = "==", + .right_text = "2", + }; + const struct kunit_binary_assert binary_assert = { + .assert = assert, + .text = &text, + .left_value = 3, + .right_value = 2, + }; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + /* + * Printed values should depend on the input we provide: the left text, right text, left + * value and the right value. + */ + validate_assert(kunit_binary_assert_format, test, &binary_assert.assert, + stream, 4, "1 + 2", "2", "3", "=="); + + text.right_text = "4 - 2"; + validate_assert(kunit_binary_assert_format, test, &binary_assert.assert, + stream, 3, "==", "1 + 2", "4 - 2"); + + text.left_text = "3"; + validate_assert(kunit_binary_assert_format, test, &binary_assert.assert, + stream, 4, "3", "4 - 2", "2", "=="); + + text.right_text = "2"; + validate_assert(kunit_binary_assert_format, test, &binary_assert.assert, + stream, 3, "3", "2", "=="); +} + +static void kunit_test_binary_ptr_assert_format(struct kunit *test) +{ + struct string_stream *stream; + struct kunit_assert assert = {}; + char *addr_var_a, *addr_var_b; + static const void *var_a = (void *)0xDEADBEEF; + static const void *var_b = (void *)0xBADDCAFE; + struct kunit_binary_assert_text text = { + .left_text = "var_a", + .operation = "==", + .right_text = "var_b", + }; + struct kunit_binary_ptr_assert binary_ptr_assert = { + .assert = assert, + .text = &text, + .left_value = var_a, + .right_value = var_b, + }; + + addr_var_a = kunit_kzalloc(test, TEST_PTR_EXPECTED_BUF_SIZE, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addr_var_a); + addr_var_b = kunit_kzalloc(test, TEST_PTR_EXPECTED_BUF_SIZE, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, addr_var_b); + /* + * Print the addresses to the buffers first. + * This is necessary as we may have different count of leading zeros in the pointer + * on different architectures. + */ + snprintf(addr_var_a, TEST_PTR_EXPECTED_BUF_SIZE, "%px", var_a); + snprintf(addr_var_b, TEST_PTR_EXPECTED_BUF_SIZE, "%px", var_b); + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + validate_assert(kunit_binary_ptr_assert_format, test, &binary_ptr_assert.assert, + stream, 3, addr_var_a, addr_var_b, "=="); +} + +static void kunit_test_binary_str_assert_format(struct kunit *test) +{ + struct string_stream *stream; + struct kunit_assert assert = {}; + static const char *var_a = "abacaba"; + static const char *var_b = "kernel"; + struct kunit_binary_assert_text text = { + .left_text = "var_a", + .operation = "==", + .right_text = "var_b", + }; + struct kunit_binary_str_assert binary_str_assert = { + .assert = assert, + .text = &text, + .left_value = var_a, + .right_value = var_b, + }; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + validate_assert(kunit_binary_str_assert_format, test, + &binary_str_assert.assert, + stream, 5, "var_a", "var_b", "\"abacaba\"", + "\"kernel\"", "=="); + + text.left_text = "\"abacaba\""; + validate_assert(kunit_binary_str_assert_format, test, &binary_str_assert.assert, + stream, 4, "\"abacaba\"", "var_b", "\"kernel\"", "=="); + + text.right_text = "\"kernel\""; + validate_assert(kunit_binary_str_assert_format, test, &binary_str_assert.assert, + stream, 3, "\"abacaba\"", "\"kernel\"", "=="); +} + +static const u8 hex_testbuf1[] = { 0x26, 0x74, 0x6b, 0x9c, 0x55, + 0x45, 0x9d, 0x47, 0xd6, 0x47, + 0x2, 0x89, 0x8c, 0x81, 0x94, + 0x12, 0xfe, 0x01 }; +static const u8 hex_testbuf2[] = { 0x26, 0x74, 0x6b, 0x9c, 0x55, + 0x45, 0x9d, 0x47, 0x21, 0x47, + 0xcd, 0x89, 0x24, 0x50, 0x94, + 0x12, 0xba, 0x01 }; +static void kunit_test_assert_hexdump(struct kunit *test) +{ + struct string_stream *stream; + char *str; + size_t i; + char buf[HEXDUMP_TEST_BUF_LEN]; + + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + /* Check that we are getting output like for non-matching numbers. */ + kunit_assert_hexdump(stream, hex_testbuf1, hex_testbuf2, sizeof(hex_testbuf1)); + str = get_str_from_stream(test, stream); + for (i = 0; i < sizeof(hex_testbuf1); i++) { + snprintf(buf, HEXDUMP_TEST_BUF_LEN, "<%02x>", hex_testbuf1[i]); + if (hex_testbuf1[i] != hex_testbuf2[i]) + ASSERT_TEST_EXPECT_CONTAIN(test, str, buf); + } + /* We shouldn't get any numbers when comparing the buffer with itself. */ + string_stream_clear(stream); + kunit_assert_hexdump(stream, hex_testbuf1, hex_testbuf1, sizeof(hex_testbuf1)); + str = get_str_from_stream(test, stream); + ASSERT_TEST_EXPECT_NCONTAIN(test, str, "<"); + ASSERT_TEST_EXPECT_NCONTAIN(test, str, ">"); +} + +static void kunit_test_mem_assert_format(struct kunit *test) +{ + struct string_stream *stream; + struct string_stream *expected_stream; + struct kunit_assert assert = {}; + static const struct kunit_binary_assert_text text = { + .left_text = "hex_testbuf1", + .operation = "==", + .right_text = "hex_testbuf2", + }; + struct kunit_mem_assert mem_assert = { + .assert = assert, + .text = &text, + .left_value = NULL, + .right_value = hex_testbuf2, + .size = sizeof(hex_testbuf1), + }; + + expected_stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, expected_stream); + stream = kunit_alloc_string_stream(test, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, stream); + + /* The left value is NULL */ + validate_assert(kunit_mem_assert_format, test, &mem_assert.assert, + stream, 2, "hex_testbuf1", "is not null"); + + /* The right value is NULL, the left value is not NULL */ + mem_assert.left_value = hex_testbuf1; + mem_assert.right_value = NULL; + validate_assert(kunit_mem_assert_format, test, &mem_assert.assert, + stream, 2, "hex_testbuf2", "is not null"); + + /* Both arguments are not null */ + mem_assert.left_value = hex_testbuf1; + mem_assert.right_value = hex_testbuf2; + + validate_assert(kunit_mem_assert_format, test, &mem_assert.assert, + stream, 3, "hex_testbuf1", "hex_testbuf2", "=="); +} + +static struct kunit_case assert_test_cases[] = { + KUNIT_CASE(kunit_test_is_literal), + KUNIT_CASE(kunit_test_is_str_literal), + KUNIT_CASE(kunit_test_assert_prologue), + KUNIT_CASE(kunit_test_assert_print_msg), + KUNIT_CASE(kunit_test_unary_assert_format), + KUNIT_CASE(kunit_test_ptr_not_err_assert_format), + KUNIT_CASE(kunit_test_binary_assert_format), + KUNIT_CASE(kunit_test_binary_ptr_assert_format), + KUNIT_CASE(kunit_test_binary_str_assert_format), + KUNIT_CASE(kunit_test_assert_hexdump), + KUNIT_CASE(kunit_test_mem_assert_format), + {} +}; + +static struct kunit_suite assert_test_suite = { + .name = "kunit-assert", + .test_cases = assert_test_cases, +}; + +kunit_test_suites(&assert_test_suite); -- cgit v1.2.3 From 020e6c22bd6e67592f38b47d0f1926a831482560 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 May 2024 15:36:57 -0700 Subject: kcsan: Add example to data_race() kerneldoc header Although the data_race() kerneldoc header accurately states what it does, some of the implications and usage patterns are non-obvious. Therefore, add a brief locking example and also state how to have KCSAN ignore accesses while also preventing the compiler from folding, spindling, or otherwise mutilating the access. [ paulmck: Apply Bart Van Assche feedback. ] [ paulmck: Apply feedback from Marco Elver. ] Reported-by: Bart Van Assche Signed-off-by: Paul E. McKenney Cc: Marco Elver Cc: Breno Leitao Cc: Jens Axboe --- include/linux/compiler.h | 10 ++++++++- .../memory-model/Documentation/access-marking.txt | 24 +++++++++++++++++++++- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8c252e073bd8..68a24a3a6979 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -194,9 +194,17 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * This data_race() macro is useful for situations in which data races * should be forgiven. One example is diagnostic code that accesses * shared variables but is not a part of the core synchronization design. + * For example, if accesses to a given variable are protected by a lock, + * except for diagnostic code, then the accesses under the lock should + * be plain C-language accesses and those in the diagnostic code should + * use data_race(). This way, KCSAN will complain if buggy lockless + * accesses to that variable are introduced, even if the buggy accesses + * are protected by READ_ONCE() or WRITE_ONCE(). * * This macro *does not* affect normal code generation, but is a hint - * to tooling that data races here are to be ignored. + * to tooling that data races here are to be ignored. If the access must + * be atomic *and* KCSAN should ignore the access, use both data_race() + * and READ_ONCE(), for example, data_race(READ_ONCE(x)). */ #define data_race(expr) \ ({ \ diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt index 65778222183e..3377d01bb512 100644 --- a/tools/memory-model/Documentation/access-marking.txt +++ b/tools/memory-model/Documentation/access-marking.txt @@ -24,6 +24,11 @@ The Linux kernel provides the following access-marking options: 4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);" The various forms of atomic_set() also fit in here. +5. __data_racy, for example "int __data_racy a;" + +6. KCSAN's negative-marking assertions, ASSERT_EXCLUSIVE_ACCESS() + and ASSERT_EXCLUSIVE_WRITER(), are described in the + "ACCESS-DOCUMENTATION OPTIONS" section below. These may be used in combination, as shown in this admittedly improbable example: @@ -205,6 +210,23 @@ because doing otherwise prevents KCSAN from detecting violations of your code's synchronization rules. +Use of __data_racy +------------------ + +Adding the __data_racy type qualifier to the declaration of a variable +causes KCSAN to treat all accesses to that variable as if they were +enclosed by data_race(). However, __data_racy does not affect the +compiler, though one could imagine hardened kernel builds treating the +__data_racy type qualifier as if it was the volatile keyword. + +Note well that __data_racy is subject to the same pointer-declaration +rules as are other type qualifiers such as const and volatile. +For example: + + int __data_racy *p; // Pointer to data-racy data. + int *__data_racy p; // Data-racy pointer to non-data-racy data. + + ACCESS-DOCUMENTATION OPTIONS ============================ @@ -342,7 +364,7 @@ as follows: Because foo is read locklessly, all accesses are marked. The purpose of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy -concurrent lockless write. +concurrent write, whether marked or not. Lock-Protected Writes With Heuristic Lockless Reads -- cgit v1.2.3 From 73287fe228721b05690e671adbcccc6cf5435be6 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:39 -0700 Subject: bpf: pass bpf_struct_ops_link to callbacks in bpf_struct_ops. Pass an additional pointer of bpf_struct_ops_link to callback function reg, unreg, and update provided by subsystems defined in bpf_struct_ops. A bpf_struct_ops_map can be registered for multiple links. Passing a pointer of bpf_struct_ops_link helps subsystems to distinguish them. This pointer will be used in the later patches to let the subsystem initiate a detachment on a link that was registered to it previously. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-2-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 +++--- kernel/bpf/bpf_struct_ops.c | 10 +++++----- net/bpf/bpf_dummy_struct_ops.c | 4 ++-- net/ipv4/bpf_tcp_ca.c | 6 +++--- tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c | 4 ++-- tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c | 6 +++--- 6 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5e694a308081..19f8836382fc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1730,9 +1730,9 @@ struct bpf_struct_ops { int (*init_member)(const struct btf_type *t, const struct btf_member *member, void *kdata, const void *udata); - int (*reg)(void *kdata); - void (*unreg)(void *kdata); - int (*update)(void *kdata, void *old_kdata); + int (*reg)(void *kdata, struct bpf_link *link); + void (*unreg)(void *kdata, struct bpf_link *link); + int (*update)(void *kdata, void *old_kdata, struct bpf_link *link); int (*validate)(void *kdata); void *cfi_stubs; struct module *owner; diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 86c7884abaf8..1542dded7489 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -757,7 +757,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, goto unlock; } - err = st_ops->reg(kdata); + err = st_ops->reg(kdata, NULL); if (likely(!err)) { /* This refcnt increment on the map here after * 'st_ops->reg()' is secure since the state of the @@ -805,7 +805,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) BPF_STRUCT_OPS_STATE_TOBEFREE); switch (prev_state) { case BPF_STRUCT_OPS_STATE_INUSE: - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, NULL); bpf_map_put(map); return 0; case BPF_STRUCT_OPS_STATE_TOBEFREE: @@ -1060,7 +1060,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link) /* st_link->map can be NULL if * bpf_struct_ops_link_create() fails to register. */ - st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data); + st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data, link); bpf_map_put(&st_map->map); } kfree(st_link); @@ -1125,7 +1125,7 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map goto err_out; } - err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->update(st_map->kvalue.data, old_st_map->kvalue.data, link); if (err) goto err_out; @@ -1176,7 +1176,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; - err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data); + err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data, &link->link); if (err) { bpf_link_cleanup(&link_primer); link = NULL; diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index 891cdf61c65a..3ea52b05adfb 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -272,12 +272,12 @@ static int bpf_dummy_init_member(const struct btf_type *t, return -EOPNOTSUPP; } -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { return -EOPNOTSUPP; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index 18227757ec0c..3f88d0961e5b 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -260,17 +260,17 @@ static int bpf_tcp_ca_check_member(const struct btf_type *t, return 0; } -static int bpf_tcp_ca_reg(void *kdata) +static int bpf_tcp_ca_reg(void *kdata, struct bpf_link *link) { return tcp_register_congestion_control(kdata); } -static void bpf_tcp_ca_unreg(void *kdata) +static void bpf_tcp_ca_unreg(void *kdata, struct bpf_link *link) { tcp_unregister_congestion_control(kdata); } -static int bpf_tcp_ca_update(void *kdata, void *old_kdata) +static int bpf_tcp_ca_update(void *kdata, void *old_kdata, struct bpf_link *link) { return tcp_update_congestion_control(kdata, old_kdata); } diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c index b1dd889d5d7d..948eb3962732 100644 --- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c +++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c @@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t, return 0; } -static int dummy_reg(void *kdata) +static int dummy_reg(void *kdata, struct bpf_link *link) { return 0; } -static void dummy_unreg(void *kdata) +static void dummy_unreg(void *kdata, struct bpf_link *link) { } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 2a18bd320e92..0a09732cde4b 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -820,7 +820,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; -static int bpf_dummy_reg(void *kdata) +static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -835,7 +835,7 @@ static int bpf_dummy_reg(void *kdata) return 0; } -static void bpf_dummy_unreg(void *kdata) +static void bpf_dummy_unreg(void *kdata, struct bpf_link *link) { } @@ -871,7 +871,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = { .owner = THIS_MODULE, }; -static int bpf_dummy_reg2(void *kdata) +static int bpf_dummy_reg2(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops2 *ops = kdata; -- cgit v1.2.3 From 1adddc97aa44c8783f9f0276ea70854d56f9f6df Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:41 -0700 Subject: bpf: support epoll from bpf struct_ops links. Add epoll support to bpf struct_ops links to trigger EPOLLHUP event upon detachment. This patch implements the "poll" of the "struct file_operations" for BPF links and introduces a new "poll" operator in the "struct bpf_link_ops". By implementing "poll" of "struct bpf_link_ops" for the links of struct_ops, the file descriptor of a struct_ops link can be added to an epoll file descriptor to receive EPOLLHUP events. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-4-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 1 + kernel/bpf/bpf_struct_ops.c | 17 +++++++++++++++++ kernel/bpf/syscall.c | 31 ++++++++++++++++++++++++++----- 3 files changed, 44 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19f8836382fc..5eb61120e4f5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1612,6 +1612,7 @@ struct bpf_link_ops { struct bpf_link_info *info); int (*update_map)(struct bpf_link *link, struct bpf_map *new_map, struct bpf_map *old_map); + __poll_t (*poll)(struct file *file, struct poll_table_struct *pts); }; struct bpf_tramp_link { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 58314b1fc39c..a2cf31b14be4 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -12,6 +12,7 @@ #include #include #include +#include struct bpf_struct_ops_value { struct bpf_struct_ops_common_value common; @@ -56,6 +57,7 @@ struct bpf_struct_ops_map { struct bpf_struct_ops_link { struct bpf_link link; struct bpf_map __rcu *map; + wait_queue_head_t wait_hup; }; static DEFINE_MUTEX(update_mutex); @@ -1167,15 +1169,28 @@ static int bpf_struct_ops_map_link_detach(struct bpf_link *link) mutex_unlock(&update_mutex); + wake_up_interruptible_poll(&st_link->wait_hup, EPOLLHUP); + return 0; } +static __poll_t bpf_struct_ops_map_link_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct bpf_struct_ops_link *st_link = file->private_data; + + poll_wait(file, &st_link->wait_hup, pts); + + return rcu_access_pointer(st_link->map) ? 0 : EPOLLHUP; +} + static const struct bpf_link_ops bpf_struct_ops_map_lops = { .dealloc = bpf_struct_ops_map_link_dealloc, .detach = bpf_struct_ops_map_link_detach, .show_fdinfo = bpf_struct_ops_map_link_show_fdinfo, .fill_link_info = bpf_struct_ops_map_link_fill_link_info, .update_map = bpf_struct_ops_map_link_update, + .poll = bpf_struct_ops_map_link_poll, }; int bpf_struct_ops_link_create(union bpf_attr *attr) @@ -1208,6 +1223,8 @@ int bpf_struct_ops_link_create(union bpf_attr *attr) if (err) goto err_out; + init_waitqueue_head(&link->wait_hup); + /* Hold the update_mutex such that the subsystem cannot * do link->ops->detach() before the link is fully initialized. */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2222c3ff88e7..81efa1944942 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3150,6 +3150,13 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) } #endif +static __poll_t bpf_link_poll(struct file *file, struct poll_table_struct *pts) +{ + struct bpf_link *link = file->private_data; + + return link->ops->poll(file, pts); +} + static const struct file_operations bpf_link_fops = { #ifdef CONFIG_PROC_FS .show_fdinfo = bpf_link_show_fdinfo, @@ -3159,6 +3166,16 @@ static const struct file_operations bpf_link_fops = { .write = bpf_dummy_write, }; +static const struct file_operations bpf_link_fops_poll = { +#ifdef CONFIG_PROC_FS + .show_fdinfo = bpf_link_show_fdinfo, +#endif + .release = bpf_link_release, + .read = bpf_dummy_read, + .write = bpf_dummy_write, + .poll = bpf_link_poll, +}; + static int bpf_link_alloc_id(struct bpf_link *link) { int id; @@ -3201,7 +3218,9 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) return id; } - file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); + file = anon_inode_getfile("bpf_link", + link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, + link, O_CLOEXEC); if (IS_ERR(file)) { bpf_link_free_id(id); put_unused_fd(fd); @@ -3229,7 +3248,9 @@ int bpf_link_settle(struct bpf_link_primer *primer) int bpf_link_new_fd(struct bpf_link *link) { - return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); + return anon_inode_getfd("bpf-link", + link->ops->poll ? &bpf_link_fops_poll : &bpf_link_fops, + link, O_CLOEXEC); } struct bpf_link *bpf_link_get_from_fd(u32 ufd) @@ -3239,7 +3260,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) if (!f.file) return ERR_PTR(-EBADF); - if (f.file->f_op != &bpf_link_fops) { + if (f.file->f_op != &bpf_link_fops && f.file->f_op != &bpf_link_fops_poll) { fdput(f); return ERR_PTR(-EINVAL); } @@ -4971,7 +4992,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, uattr); else if (f.file->f_op == &btf_fops) err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr); - else if (f.file->f_op == &bpf_link_fops) + else if (f.file->f_op == &bpf_link_fops || f.file->f_op == &bpf_link_fops_poll) err = bpf_link_get_info_by_fd(f.file, f.file->private_data, attr, uattr); else @@ -5106,7 +5127,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (!file) return -EBADF; - if (file->f_op == &bpf_link_fops) { + if (file->f_op == &bpf_link_fops || file->f_op == &bpf_link_fops_poll) { struct bpf_link *link = file->private_data; if (link->ops == &bpf_raw_tp_link_lops) { -- cgit v1.2.3 From 67c3e8353f45c27800eecc46e00e8272f063f7d1 Mon Sep 17 00:00:00 2001 From: Kui-Feng Lee Date: Wed, 29 May 2024 23:59:42 -0700 Subject: bpf: export bpf_link_inc_not_zero. bpf_link_inc_not_zero() will be used by kernel modules. We will use it in bpf_testmod.c later. Signed-off-by: Kui-Feng Lee Link: https://lore.kernel.org/r/20240530065946.979330-5-thinker.li@gmail.com Signed-off-by: Martin KaFai Lau --- include/linux/bpf.h | 6 ++++++ kernel/bpf/syscall.c | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5eb61120e4f5..a834f4b761bc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2334,6 +2334,7 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); struct bpf_link *bpf_link_get_from_fd(u32 ufd); @@ -2705,6 +2706,11 @@ static inline void bpf_link_inc(struct bpf_link *link) { } +static inline struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +{ + return NULL; +} + static inline void bpf_link_put(struct bpf_link *link) { } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 81efa1944942..5070fa20d05c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -5437,10 +5437,11 @@ static int link_detach(union bpf_attr *attr) return ret; } -static struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) +struct bpf_link *bpf_link_inc_not_zero(struct bpf_link *link) { return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? link : ERR_PTR(-ENOENT); } +EXPORT_SYMBOL(bpf_link_inc_not_zero); struct bpf_link *bpf_link_by_id(u32 id) { -- cgit v1.2.3 From 9ec54934ce857065e38523a2010e20182e76f515 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Thu, 30 May 2024 17:25:07 +0300 Subject: scsi: ufs: core: Allow RTT negotiation The rtt-upiu packets precede any data-out upiu packets, thus synchronizing the data input to the device: this mostly applies to write operations, but there are other operations that requires rtt as well. There are several rules binding this rtt - data-out dialog, specifically There can be at most outstanding bMaxNumOfRTT such packets. This might have an effect on write performance (sequential write in particular), as each data-out upiu must wait for its rtt sibling. UFSHCI expects bMaxNumOfRTT to be min(bDeviceRTTCap, NORTT). However, as of today, there does not appears to be no-one who sets it: not the host controller nor the driver. It wasn't an issue up to now: bMaxNumOfRTT is set to 2 after manufacturing, and wasn't limiting the write performance. UFS4.0, and specifically gear 5 changes this, and requires the device to be more attentive. This doesn't come free - the device has to allocate more resources to that end, but the sequential write performance improvement is significant. Early measurements shows 25% gain when moving from rtt 2 to 9. Therefore, set bMaxNumOfRTT to be min(bDeviceRTTCap, NORTT) as UFSHCI expects. Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240530142510.734-2-avri.altman@wdc.com Reviewed-by: Bean Huo Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 38 ++++++++++++++++++++++++++++++++++++++ include/ufs/ufs.h | 2 ++ include/ufs/ufshcd.h | 2 ++ include/ufs/ufshci.h | 1 + 4 files changed, 43 insertions(+) (limited to 'include') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 0cf07194bbe8..dda6d7e44436 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -102,6 +102,9 @@ /* Default RTC update every 10 seconds */ #define UFS_RTC_UPDATE_INTERVAL_MS (10 * MSEC_PER_SEC) +/* bMaxNumOfRTT is equal to two after device manufacturing */ +#define DEFAULT_MAX_NUM_RTT 2 + /* UFSHC 4.0 compliant HC support this mode. */ static bool use_mcq_mode = true; @@ -2405,6 +2408,8 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) ((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1; hba->reserved_slot = hba->nutrs - 1; + hba->nortt = FIELD_GET(MASK_NUMBER_OUTSTANDING_RTT, hba->capabilities) + 1; + /* Read crypto capabilities */ err = ufshcd_hba_init_crypto_capabilities(hba); if (err) { @@ -8121,6 +8126,35 @@ out: dev_info->b_ext_iid_en = ext_iid_en; } +static void ufshcd_set_rtt(struct ufs_hba *hba) +{ + struct ufs_dev_info *dev_info = &hba->dev_info; + u32 rtt = 0; + u32 dev_rtt = 0; + + /* RTT override makes sense only for UFS-4.0 and above */ + if (dev_info->wspecversion < 0x400) + return; + + if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &dev_rtt)) { + dev_err(hba->dev, "failed reading bMaxNumOfRTT\n"); + return; + } + + /* do not override if it was already written */ + if (dev_rtt != DEFAULT_MAX_NUM_RTT) + return; + + rtt = min_t(int, dev_info->rtt_cap, hba->nortt); + if (rtt == dev_rtt) + return; + + if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, + QUERY_ATTR_IDN_MAX_NUM_OF_RTT, 0, 0, &rtt)) + dev_err(hba->dev, "failed writing bMaxNumOfRTT\n"); +} + void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, const struct ufs_dev_quirk *fixups) { @@ -8256,6 +8290,8 @@ static int ufs_get_device_desc(struct ufs_hba *hba) desc_buf[DEVICE_DESC_PARAM_SPEC_VER + 1]; dev_info->bqueuedepth = desc_buf[DEVICE_DESC_PARAM_Q_DPTH]; + dev_info->rtt_cap = desc_buf[DEVICE_DESC_PARAM_RTT_CAP]; + model_index = desc_buf[DEVICE_DESC_PARAM_PRDCT_NAME]; err = ufshcd_read_string_desc(hba, model_index, @@ -8508,6 +8544,8 @@ static int ufshcd_device_params_init(struct ufs_hba *hba) goto out; } + ufshcd_set_rtt(hba); + ufshcd_get_ref_clk_gating_wait(hba); if (!ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_READ_FLAG, diff --git a/include/ufs/ufs.h b/include/ufs/ufs.h index b6003749bc83..853e95957c31 100644 --- a/include/ufs/ufs.h +++ b/include/ufs/ufs.h @@ -592,6 +592,8 @@ struct ufs_dev_info { enum ufs_rtc_time rtc_type; time64_t rtc_time_baseline; u32 rtc_update_period; + + u8 rtt_cap; /* bDeviceRTTCap */ }; /* diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index bad88bd91995..d74bd2d67b06 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -819,6 +819,7 @@ enum ufshcd_mcq_opr { * @capabilities: UFS Controller Capabilities * @mcq_capabilities: UFS Multi Circular Queue capabilities * @nutrs: Transfer Request Queue depth supported by controller + * @nortt - Max outstanding RTTs supported by controller * @nutmrs: Task Management Queue depth supported by controller * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock. * @ufs_version: UFS Version to which controller complies @@ -957,6 +958,7 @@ struct ufs_hba { u32 capabilities; int nutrs; + int nortt; u32 mcq_capabilities; int nutmrs; u32 reserved_slot; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 385e1c6b8d60..c50f92bf2e1d 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -68,6 +68,7 @@ enum { /* Controller capability masks */ enum { MASK_TRANSFER_REQUESTS_SLOTS = 0x0000001F, + MASK_NUMBER_OUTSTANDING_RTT = 0x0000FF00, MASK_TASK_MANAGEMENT_REQUEST_SLOTS = 0x00070000, MASK_EHSLUTRD_SUPPORTED = 0x00400000, MASK_AUTO_HIBERN8_SUPPORT = 0x00800000, -- cgit v1.2.3 From e75ff63300c5e8fac31649f438ebad6af88e0032 Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Thu, 30 May 2024 17:25:08 +0300 Subject: scsi: ufs: core: Maximum RTT supported by the host driver Allow platform vendors to take precedence having their own max rtt support. This makes sense because the host controller's nortt characteristic may vary among vendors. while at it, set this value for Mediatek, as requested by Peter - https://lore.kernel.org/all/0a57d6bab739d6a10584f2baba115d00dfc9c94c.camel@mediatek.com/ Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20240530142510.734-3-avri.altman@wdc.com Reviewed-by: Peter Wang Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 5 ++++- drivers/ufs/host/ufs-mediatek.c | 1 + drivers/ufs/host/ufs-mediatek.h | 3 +++ include/ufs/ufshcd.h | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index dda6d7e44436..41bf2e249c83 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -8131,6 +8131,8 @@ static void ufshcd_set_rtt(struct ufs_hba *hba) struct ufs_dev_info *dev_info = &hba->dev_info; u32 rtt = 0; u32 dev_rtt = 0; + int host_rtt_cap = hba->vops && hba->vops->max_num_rtt ? + hba->vops->max_num_rtt : hba->nortt; /* RTT override makes sense only for UFS-4.0 and above */ if (dev_info->wspecversion < 0x400) @@ -8146,7 +8148,8 @@ static void ufshcd_set_rtt(struct ufs_hba *hba) if (dev_rtt != DEFAULT_MAX_NUM_RTT) return; - rtt = min_t(int, dev_info->rtt_cap, hba->nortt); + rtt = min_t(int, dev_info->rtt_cap, host_rtt_cap); + if (rtt == dev_rtt) return; diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c index c4f997196c57..c7a0ab9b1f59 100644 --- a/drivers/ufs/host/ufs-mediatek.c +++ b/drivers/ufs/host/ufs-mediatek.c @@ -1785,6 +1785,7 @@ static int ufs_mtk_config_esi(struct ufs_hba *hba) */ static const struct ufs_hba_variant_ops ufs_hba_mtk_vops = { .name = "mediatek.ufshci", + .max_num_rtt = MTK_MAX_NUM_RTT, .init = ufs_mtk_init, .get_ufs_hci_version = ufs_mtk_get_ufs_hci_version, .setup_clocks = ufs_mtk_setup_clocks, diff --git a/drivers/ufs/host/ufs-mediatek.h b/drivers/ufs/host/ufs-mediatek.h index 3ff17e95afab..05d76a6bd772 100644 --- a/drivers/ufs/host/ufs-mediatek.h +++ b/drivers/ufs/host/ufs-mediatek.h @@ -189,4 +189,7 @@ struct ufs_mtk_host { /* MTK delay of autosuspend: 500 ms */ #define MTK_RPM_AUTOSUSPEND_DELAY_MS 500 +/* MTK RTT support number */ +#define MTK_MAX_NUM_RTT 2 + #endif /* !_UFS_MEDIATEK_H */ diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d74bd2d67b06..ef04ec8aad69 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -295,6 +295,7 @@ struct ufs_pwr_mode_info { /** * struct ufs_hba_variant_ops - variant specific callbacks * @name: variant name + * @max_num_rtt: maximum RTT supported by the host * @init: called when the driver is initialized * @exit: called to cleanup everything done in init * @get_ufs_hci_version: called to get UFS HCI version @@ -332,6 +333,7 @@ struct ufs_pwr_mode_info { */ struct ufs_hba_variant_ops { const char *name; + int max_num_rtt; int (*init)(struct ufs_hba *); void (*exit)(struct ufs_hba *); u32 (*get_ufs_hci_version)(struct ufs_hba *); -- cgit v1.2.3 From 2fc39848952dfb91a9233563cc1444669b8e79c3 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Mon, 20 May 2024 07:14:56 +0900 Subject: scsi: ufs: mcq: Fix missing argument 'hba' in MCQ_OPR_OFFSET_n The MCQ_OPR_OFFSET_n macro takes 'hba' in the caller context without receiving 'hba' instance as an argument. To prevent potential bugs in future use cases, add an argument 'hba'. Fixes: 2468da61ea09 ("scsi: ufs: core: mcq: Configure operation and runtime interface") Cc: Asutosh Das Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240519221457.772346-2-minwoo.im@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-mcq.c | 10 ++++------ include/ufs/ufshcd.h | 6 ++++++ 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 005d63ab1f44..3dc34fa2a81d 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -231,8 +231,6 @@ int ufshcd_mcq_memory_alloc(struct ufs_hba *hba) /* Operation and runtime registers configuration */ #define MCQ_CFG_n(r, i) ((r) + MCQ_QCFG_SIZE * (i)) -#define MCQ_OPR_OFFSET_n(p, i) \ - (hba->mcq_opr[(p)].offset + hba->mcq_opr[(p)].stride * (i)) static void __iomem *mcq_opr_base(struct ufs_hba *hba, enum ufshcd_mcq_opr n, int i) @@ -343,10 +341,10 @@ void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba) ufsmcq_writelx(hba, upper_32_bits(hwq->sqe_dma_addr), MCQ_CFG_n(REG_SQUBA, i)); /* Submission Queue Doorbell Address Offset */ - ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQD, i), + ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_SQD, i), MCQ_CFG_n(REG_SQDAO, i)); /* Submission Queue Interrupt Status Address Offset */ - ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_SQIS, i), + ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_SQIS, i), MCQ_CFG_n(REG_SQISAO, i)); /* Completion Queue Lower Base Address */ @@ -356,10 +354,10 @@ void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba) ufsmcq_writelx(hba, upper_32_bits(hwq->cqe_dma_addr), MCQ_CFG_n(REG_CQUBA, i)); /* Completion Queue Doorbell Address Offset */ - ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQD, i), + ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_CQD, i), MCQ_CFG_n(REG_CQDAO, i)); /* Completion Queue Interrupt Status Address Offset */ - ufsmcq_writelx(hba, MCQ_OPR_OFFSET_n(OPR_CQIS, i), + ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_CQIS, i), MCQ_CFG_n(REG_CQISAO, i)); /* Save the base addresses for quicker access */ diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index ef04ec8aad69..1150b86e9355 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1135,6 +1135,12 @@ static inline bool is_mcq_enabled(struct ufs_hba *hba) return hba->mcq_enabled; } +static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba, + enum ufshcd_mcq_opr opr, int idx) +{ + return hba->mcq_opr[opr].offset + hba->mcq_opr[opr].stride * idx; +} + #ifdef CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba) { -- cgit v1.2.3 From e8a1d87b7983b461d1d625e2973cdaadc0bd8ff5 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Mon, 20 May 2024 07:14:57 +0900 Subject: scsi: ufs: mcq: Convert MCQ_CFG_n to an inline function Inline functions are preferred over macros. Convert the MCQ_CFG_n macro to an inline function. Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240519221457.772346-3-minwoo.im@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-mcq.c | 25 ++++++++++--------------- include/ufs/ufshcd.h | 7 +++++++ 2 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 3dc34fa2a81d..52210c4c20dc 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -25,7 +25,6 @@ #define QUEUE_ID_OFFSET 16 #define MCQ_CFG_MAC_MASK GENMASK(16, 8) -#define MCQ_QCFG_SIZE 0x40 #define MCQ_ENTRY_SIZE_IN_DWORD 8 #define CQE_UCD_BA GENMASK_ULL(63, 7) @@ -228,10 +227,6 @@ int ufshcd_mcq_memory_alloc(struct ufs_hba *hba) return 0; } - -/* Operation and runtime registers configuration */ -#define MCQ_CFG_n(r, i) ((r) + MCQ_QCFG_SIZE * (i)) - static void __iomem *mcq_opr_base(struct ufs_hba *hba, enum ufshcd_mcq_opr n, int i) { @@ -336,29 +331,29 @@ void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba) /* Submission Queue Lower Base Address */ ufsmcq_writelx(hba, lower_32_bits(hwq->sqe_dma_addr), - MCQ_CFG_n(REG_SQLBA, i)); + ufshcd_mcq_cfg_offset(REG_SQLBA, i)); /* Submission Queue Upper Base Address */ ufsmcq_writelx(hba, upper_32_bits(hwq->sqe_dma_addr), - MCQ_CFG_n(REG_SQUBA, i)); + ufshcd_mcq_cfg_offset(REG_SQUBA, i)); /* Submission Queue Doorbell Address Offset */ ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_SQD, i), - MCQ_CFG_n(REG_SQDAO, i)); + ufshcd_mcq_cfg_offset(REG_SQDAO, i)); /* Submission Queue Interrupt Status Address Offset */ ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_SQIS, i), - MCQ_CFG_n(REG_SQISAO, i)); + ufshcd_mcq_cfg_offset(REG_SQISAO, i)); /* Completion Queue Lower Base Address */ ufsmcq_writelx(hba, lower_32_bits(hwq->cqe_dma_addr), - MCQ_CFG_n(REG_CQLBA, i)); + ufshcd_mcq_cfg_offset(REG_CQLBA, i)); /* Completion Queue Upper Base Address */ ufsmcq_writelx(hba, upper_32_bits(hwq->cqe_dma_addr), - MCQ_CFG_n(REG_CQUBA, i)); + ufshcd_mcq_cfg_offset(REG_CQUBA, i)); /* Completion Queue Doorbell Address Offset */ ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_CQD, i), - MCQ_CFG_n(REG_CQDAO, i)); + ufshcd_mcq_cfg_offset(REG_CQDAO, i)); /* Completion Queue Interrupt Status Address Offset */ ufsmcq_writelx(hba, ufshcd_mcq_opr_offset(hba, OPR_CQIS, i), - MCQ_CFG_n(REG_CQISAO, i)); + ufshcd_mcq_cfg_offset(REG_CQISAO, i)); /* Save the base addresses for quicker access */ hwq->mcq_sq_head = mcq_opr_base(hba, OPR_SQD, i) + REG_SQHP; @@ -375,7 +370,7 @@ void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba) /* Completion Queue Enable|Size to Completion Queue Attribute */ ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize, - MCQ_CFG_n(REG_CQATTR, i)); + ufshcd_mcq_cfg_offset(REG_CQATTR, i)); /* * Submission Qeueue Enable|Size|Completion Queue ID to @@ -383,7 +378,7 @@ void ufshcd_mcq_make_queues_operational(struct ufs_hba *hba) */ ufsmcq_writel(hba, (1 << QUEUE_EN_OFFSET) | qsize | (i << QUEUE_ID_OFFSET), - MCQ_CFG_n(REG_SQATTR, i)); + ufshcd_mcq_cfg_offset(REG_SQATTR, i)); } } EXPORT_SYMBOL_GPL(ufshcd_mcq_make_queues_operational); diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index 1150b86e9355..df68fb1d4f3f 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1130,6 +1130,8 @@ struct ufs_hw_queue { struct mutex sq_mutex; }; +#define MCQ_QCFG_SIZE 0x40 + static inline bool is_mcq_enabled(struct ufs_hba *hba) { return hba->mcq_enabled; @@ -1141,6 +1143,11 @@ static inline unsigned int ufshcd_mcq_opr_offset(struct ufs_hba *hba, return hba->mcq_opr[opr].offset + hba->mcq_opr[opr].stride * idx; } +static inline unsigned int ufshcd_mcq_cfg_offset(unsigned int reg, int idx) +{ + return reg + MCQ_QCFG_SIZE * idx; +} + #ifdef CONFIG_SCSI_UFS_VARIABLE_SG_ENTRY_SIZE static inline size_t ufshcd_sg_entry_size(const struct ufs_hba *hba) { -- cgit v1.2.3 From a79d8fe2ff8e78e549dc86cc853a61b029404871 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Wed, 29 May 2024 12:09:08 +0800 Subject: ipv6: sr: restruct ifdefines There are too many ifdef in IPv6 segment routing code that may cause logic problems. like commit 160e9d275218 ("ipv6: sr: fix invalid unregister error path"). To avoid this, the init functions are redefined for both cases. The code could be more clear after all fidefs are removed. Suggested-by: Simon Horman Suggested-by: David Ahern Signed-off-by: Hangbin Liu Reviewed-by: Sabrina Dubroca Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240529040908.3472952-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- include/net/seg6.h | 7 +++++++ include/net/seg6_hmac.h | 7 +++++++ net/ipv6/seg6.c | 33 +++++---------------------------- 3 files changed, 19 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/net/seg6.h b/include/net/seg6.h index af668f17b398..82b3fbbcbb93 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -52,10 +52,17 @@ static inline struct seg6_pernet_data *seg6_pernet(struct net *net) extern int seg6_init(void); extern void seg6_exit(void); +#ifdef CONFIG_IPV6_SEG6_LWTUNNEL extern int seg6_iptunnel_init(void); extern void seg6_iptunnel_exit(void); extern int seg6_local_init(void); extern void seg6_local_exit(void); +#else +static inline int seg6_iptunnel_init(void) { return 0; } +static inline void seg6_iptunnel_exit(void) {} +static inline int seg6_local_init(void) { return 0; } +static inline void seg6_local_exit(void) {} +#endif extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced); extern struct ipv6_sr_hdr *seg6_get_srh(struct sk_buff *skb, int flags); diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 2b5d2ee5613e..24f733b3e3fe 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -49,9 +49,16 @@ extern int seg6_hmac_info_del(struct net *net, u32 key); extern int seg6_push_hmac(struct net *net, struct in6_addr *saddr, struct ipv6_sr_hdr *srh); extern bool seg6_hmac_validate_skb(struct sk_buff *skb); +#ifdef CONFIG_IPV6_SEG6_HMAC extern int seg6_hmac_init(void); extern void seg6_hmac_exit(void); extern int seg6_hmac_net_init(struct net *net); extern void seg6_hmac_net_exit(struct net *net); +#else +static inline int seg6_hmac_init(void) { return 0; } +static inline void seg6_hmac_exit(void) {} +static inline int seg6_hmac_net_init(struct net *net) { return 0; } +static inline void seg6_hmac_net_exit(struct net *net) {} +#endif #endif diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index a31521e270f7..180da19c148c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -21,9 +21,7 @@ #include #include #include -#ifdef CONFIG_IPV6_SEG6_HMAC #include -#endif bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len, bool reduced) { @@ -437,13 +435,11 @@ static int __net_init seg6_net_init(struct net *net) net->ipv6.seg6_data = sdata; -#ifdef CONFIG_IPV6_SEG6_HMAC if (seg6_hmac_net_init(net)) { kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); return -ENOMEM; } -#endif return 0; } @@ -452,9 +448,7 @@ static void __net_exit seg6_net_exit(struct net *net) { struct seg6_pernet_data *sdata = seg6_pernet(net); -#ifdef CONFIG_IPV6_SEG6_HMAC seg6_hmac_net_exit(net); -#endif kfree(rcu_dereference_raw(sdata->tun_src)); kfree(sdata); @@ -520,41 +514,28 @@ int __init seg6_init(void) if (err) goto out_unregister_pernet; -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL err = seg6_iptunnel_init(); if (err) goto out_unregister_genl; err = seg6_local_init(); - if (err) { - seg6_iptunnel_exit(); - goto out_unregister_genl; - } -#endif + if (err) + goto out_unregister_iptun; -#ifdef CONFIG_IPV6_SEG6_HMAC err = seg6_hmac_init(); if (err) - goto out_unregister_iptun; -#endif + goto out_unregister_seg6; pr_info("Segment Routing with IPv6\n"); out: return err; -#ifdef CONFIG_IPV6_SEG6_HMAC -out_unregister_iptun: -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL +out_unregister_seg6: seg6_local_exit(); +out_unregister_iptun: seg6_iptunnel_exit(); -#endif -#endif -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL out_unregister_genl: -#endif -#if IS_ENABLED(CONFIG_IPV6_SEG6_LWTUNNEL) || IS_ENABLED(CONFIG_IPV6_SEG6_HMAC) genl_unregister_family(&seg6_genl_family); -#endif out_unregister_pernet: unregister_pernet_subsys(&ip6_segments_ops); goto out; @@ -562,13 +543,9 @@ out_unregister_pernet: void seg6_exit(void) { -#ifdef CONFIG_IPV6_SEG6_HMAC seg6_hmac_exit(); -#endif -#ifdef CONFIG_IPV6_SEG6_LWTUNNEL seg6_local_exit(); seg6_iptunnel_exit(); -#endif genl_unregister_family(&seg6_genl_family); unregister_pernet_subsys(&ip6_segments_ops); } -- cgit v1.2.3 From 02d00dc73d8d0613f82063ed53d67912a422c21e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 May 2024 14:29:29 +0100 Subject: net: phylink: rename ovr_an_inband to default_an_inband Since ovr_an_inband no longer overrides every MLO_AN_xxx mode, rename it to reflect what it now does - it changes the default mode from MLO_AN_PHY to MLO_AN_INBAND. Fix up the two users of this. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/E1sCJMv-00Ecr1-Sk@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/fman/fman_memac.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/phy/phylink.c | 2 +- include/linux/phylink.h | 5 +++-- 4 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c index 92b8f4ab26f1..9c44a3581950 100644 --- a/drivers/net/ethernet/freescale/fman/fman_memac.c +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c @@ -1232,7 +1232,7 @@ int memac_initialization(struct mac_device *mac_dev, !of_property_read_bool(mac_node, "managed") && mac_dev->phy_if != PHY_INTERFACE_MODE_MII && !phy_interface_mode_is_rgmii(mac_dev->phy_if)) - mac_dev->phylink_config.ovr_an_inband = true; + mac_dev->phylink_config.default_an_inband = true; of_node_put(fixed); err = memac_init(mac_dev->fman_mac); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ca19b232431a..488b2fd2349c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1221,7 +1221,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) - priv->phylink_config.ovr_an_inband = + priv->phylink_config.default_an_inband = mdio_bus_data->xpcs_an_inband; /* Set the platform/firmware specified interface mode. Note, phylink diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index c81f1c1ee675..02427378acfd 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -885,7 +885,7 @@ static int phylink_parse_mode(struct phylink *pl, const char *managed; unsigned long caps; - if (pl->config->ovr_an_inband) + if (pl->config->default_an_inband) pl->cfg_link_an_mode = MLO_AN_INBAND; dn = fwnode_get_named_child_node(fwnode, "fixed-link"); diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5ea6b2ad2396..a30a692acc32 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -141,7 +141,8 @@ enum phylink_op_type { * @mac_requires_rxc: if true, the MAC always requires a receive clock from PHY. * The PHY driver should start the clock signal as soon as * possible and avoid stopping it during suspend events. - * @ovr_an_inband: if true, override PCS to MLO_AN_INBAND + * @default_an_inband: if true, defaults to MLO_AN_INBAND rather than + * MLO_AN_PHY. A fixed-link specification will override. * @get_fixed_state: callback to execute to determine the fixed link state, * if MAC link is at %MLO_AN_FIXED mode. * @supported_interfaces: bitmap describing which PHY_INTERFACE_MODE_xxx @@ -154,7 +155,7 @@ struct phylink_config { bool poll_fixed_state; bool mac_managed_pm; bool mac_requires_rxc; - bool ovr_an_inband; + bool default_an_inband; void (*get_fixed_state)(struct phylink_config *config, struct phylink_link_state *state); DECLARE_PHY_INTERFACE_MASK(supported_interfaces); -- cgit v1.2.3 From 83f55b01dd903040d6ab64f4f9d78a27391a5916 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Wed, 29 May 2024 14:29:40 +0100 Subject: net: stmmac: rename xpcs_an_inband to default_an_inband Rename xpcs_an_inband to default_an_inband to reflect the change in phylink and its changed functionality. Signed-off-by: Russell King (Oracle) Reviewed-by: Andrew Halaney Link: https://lore.kernel.org/r/E1sCJN6-00EcrD-43@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 8 ++++---- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- include/linux/stmmac.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 60283543ffc8..5e96146b8bd9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -248,7 +248,7 @@ static void intel_speed_mode_2500(struct net_device *ndev, void *intel_data) dev_info(priv->device, "Link Speed Mode: 2.5Gbps\n"); priv->plat->max_speed = 2500; priv->plat->phy_interface = PHY_INTERFACE_MODE_2500BASEX; - priv->plat->mdio_bus_data->xpcs_an_inband = false; + priv->plat->mdio_bus_data->default_an_inband = false; } else { priv->plat->max_speed = 1000; } @@ -586,16 +586,16 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, if (plat->phy_interface == PHY_INTERFACE_MODE_SGMII || plat->phy_interface == PHY_INTERFACE_MODE_1000BASEX) { plat->mdio_bus_data->has_xpcs = true; - plat->mdio_bus_data->xpcs_an_inband = true; + plat->mdio_bus_data->default_an_inband = true; } - /* For fixed-link setup, we clear xpcs_an_inband */ + /* For fixed-link setup, we clear default_an_inband */ if (fwnode) { struct fwnode_handle *fixed_node; fixed_node = fwnode_get_named_child_node(fwnode, "fixed-link"); if (fixed_node) - plat->mdio_bus_data->xpcs_an_inband = false; + plat->mdio_bus_data->default_an_inband = false; fwnode_handle_put(fixed_node); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 488b2fd2349c..bbedf2a8c60f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1222,7 +1222,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) mdio_bus_data = priv->plat->mdio_bus_data; if (mdio_bus_data) priv->phylink_config.default_an_inband = - mdio_bus_data->xpcs_an_inband; + mdio_bus_data->default_an_inband; /* Set the platform/firmware specified interface mode. Note, phylink * deals with the PHY interface mode, not the MAC interface mode. diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index f92c195c76ed..8f0f156d50d3 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -83,7 +83,7 @@ struct stmmac_priv; struct stmmac_mdio_bus_data { unsigned int phy_mask; unsigned int has_xpcs; - unsigned int xpcs_an_inband; + unsigned int default_an_inband; int *irqs; int probed_phy_irq; bool needs_reset; -- cgit v1.2.3 From 5607ca92e6274dfb85d0ff7c4e91e6c4ddb6d25c Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 13:41:22 +0200 Subject: leds: core: Add led_mc_set_brightness() function Add a new led_mc_set_brightness() function for in kernel color/brightness changing of multi-color LEDs. led-class-multicolor can be build as a module and led_mc_set_brightness() will have the builtin callers, so put led_mc_set_brightness() inside led-core instead, just like how led_set_brightness() is part of the core and not of the led-class object. This also adds a new LED_MULTI_COLOR led_classdev flag to allow led_mc_set_brightness() to verify that it is operating on a multi-color LED classdev, avoiding casting the passed in LED classdev to a multi-color LED classdev, when it actually is not a multi-color LED. Signed-off-by: Hans de Goede Reviewed-by: Jacek Anaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531114124.45346-5-hdegoede@redhat.com Signed-off-by: Lee Jones --- drivers/leds/led-class-multicolor.c | 1 + drivers/leds/led-core.c | 31 +++++++++++++++++++++++++++++++ include/linux/leds.h | 20 ++++++++++++++++++++ 3 files changed, 52 insertions(+) (limited to 'include') diff --git a/drivers/leds/led-class-multicolor.c b/drivers/leds/led-class-multicolor.c index ec62a4811613..df01c0e66c8b 100644 --- a/drivers/leds/led-class-multicolor.c +++ b/drivers/leds/led-class-multicolor.c @@ -134,6 +134,7 @@ int led_classdev_multicolor_register_ext(struct device *parent, return -EINVAL; led_cdev = &mcled_cdev->led_cdev; + led_cdev->flags |= LED_MULTI_COLOR; mcled_cdev->led_cdev.groups = led_multicolor_groups; return led_classdev_register_ext(parent, led_cdev, init_data); diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c index 89c9806cc97f..ef7d1c6767ca 100644 --- a/drivers/leds/led-core.c +++ b/drivers/leds/led-core.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -362,6 +363,36 @@ int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value) } EXPORT_SYMBOL_GPL(led_set_brightness_sync); +/* + * This is a led-core function because just like led_set_brightness() + * it is used in the kernel by e.g. triggers. + */ +void led_mc_set_brightness(struct led_classdev *led_cdev, + unsigned int *intensity_value, unsigned int num_colors, + unsigned int brightness) +{ + struct led_classdev_mc *mcled_cdev; + unsigned int i; + + if (!(led_cdev->flags & LED_MULTI_COLOR)) { + dev_err_once(led_cdev->dev, "error not a multi-color LED\n"); + return; + } + + mcled_cdev = lcdev_to_mccdev(led_cdev); + if (num_colors != mcled_cdev->num_colors) { + dev_err_once(led_cdev->dev, "error num_colors mismatch %u != %u\n", + num_colors, mcled_cdev->num_colors); + return; + } + + for (i = 0; i < mcled_cdev->num_colors; i++) + mcled_cdev->subled_info[i].intensity = intensity_value[i]; + + led_set_brightness(led_cdev, brightness); +} +EXPORT_SYMBOL_GPL(led_mc_set_brightness); + int led_update_brightness(struct led_classdev *led_cdev) { int ret; diff --git a/include/linux/leds.h b/include/linux/leds.h index 6300313c46b7..ae07e44f1dcb 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -107,6 +107,7 @@ struct led_classdev { #define LED_BRIGHT_HW_CHANGED BIT(21) #define LED_RETAIN_AT_SHUTDOWN BIT(22) #define LED_INIT_DEFAULT_TRIGGER BIT(23) +#define LED_MULTI_COLOR BIT(24) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; @@ -373,6 +374,25 @@ void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness); */ int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value); +/** + * led_mc_set_brightness - set mc LED color intensity values and brightness + * @led_cdev: the LED to set + * @intensity_value: array of per color intensity values to set + * @num_colors: amount of entries in intensity_value array + * @brightness: the brightness to set the LED to + * + * Set a multi-color LED's per color intensity values and brightness. + * If necessary, this cancels the software blink timer. This function is + * guaranteed not to sleep. + * + * Calling this function on a non multi-color led_classdev or with the wrong + * num_colors value is an error. In this case an error will be logged once + * and the call will do nothing. + */ +void led_mc_set_brightness(struct led_classdev *led_cdev, + unsigned int *intensity_value, unsigned int num_colors, + unsigned int brightness); + /** * led_update_brightness - update LED brightness * @led_cdev: the LED to query -- cgit v1.2.3 From 0921a57c91648b08857b47a2f26fa7942f06120f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 13:41:23 +0200 Subject: leds: trigger: Add led_mc_trigger_event() function Add a new led_mc_trigger_event() function for triggers which want to change the color of a multi-color LED based on their trigger conditions. Signed-off-by: Hans de Goede Reviewed-by: Jacek Anaszewski Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531114124.45346-6-hdegoede@redhat.com Signed-off-by: Lee Jones --- drivers/leds/led-triggers.c | 20 ++++++++++++++++++++ include/linux/leds.h | 6 ++++++ 2 files changed, 26 insertions(+) (limited to 'include') diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c index b1b323b19301..638aa6591092 100644 --- a/drivers/leds/led-triggers.c +++ b/drivers/leds/led-triggers.c @@ -396,6 +396,26 @@ void led_trigger_event(struct led_trigger *trig, } EXPORT_SYMBOL_GPL(led_trigger_event); +void led_mc_trigger_event(struct led_trigger *trig, + unsigned int *intensity_value, unsigned int num_colors, + enum led_brightness brightness) +{ + struct led_classdev *led_cdev; + + if (!trig) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) { + if (!(led_cdev->flags & LED_MULTI_COLOR)) + continue; + + led_mc_set_brightness(led_cdev, intensity_value, num_colors, brightness); + } + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(led_mc_trigger_event); + static void led_trigger_blink_setup(struct led_trigger *trig, unsigned long delay_on, unsigned long delay_off, diff --git a/include/linux/leds.h b/include/linux/leds.h index ae07e44f1dcb..517b6198df07 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -510,6 +510,9 @@ void led_trigger_register_simple(const char *name, struct led_trigger **trigger); void led_trigger_unregister_simple(struct led_trigger *trigger); void led_trigger_event(struct led_trigger *trigger, enum led_brightness event); +void led_mc_trigger_event(struct led_trigger *trig, + unsigned int *intensity_value, unsigned int num_colors, + enum led_brightness brightness); void led_trigger_blink(struct led_trigger *trigger, unsigned long delay_on, unsigned long delay_off); void led_trigger_blink_oneshot(struct led_trigger *trigger, @@ -552,6 +555,9 @@ static inline void led_trigger_register_simple(const char *name, static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} static inline void led_trigger_event(struct led_trigger *trigger, enum led_brightness event) {} +static inline void led_mc_trigger_event(struct led_trigger *trig, + unsigned int *intensity_value, unsigned int num_colors, + enum led_brightness brightness) {} static inline void led_trigger_blink(struct led_trigger *trigger, unsigned long delay_on, unsigned long delay_off) {} -- cgit v1.2.3 From 9af12f57f1f9785f231d31a7365ad244c656b7ff Mon Sep 17 00:00:00 2001 From: Kate Hsuan Date: Fri, 31 May 2024 13:41:24 +0200 Subject: power: supply: power-supply-leds: Add charging_orange_full_green trigger for RGB LED Add a charging_orange_full_green LED trigger and the trigger is based on led_mc_trigger_event() which can set an RGB LED when the trigger is triggered. The LED will show orange when the battery status is charging. The LED will show green when the battery status is full. Link: https://lore.kernel.org/linux-leds/f40a0b1a-ceac-e269-c2dd-0158c5b4a1ad@gmail.com/ Signed-off-by: Kate Hsuan Acked-by: Sebastian Reichel Reviewed-by: Andy Shevchenko [hdegoede@redhat.com change color order to RGB] Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20240531114124.45346-7-hdegoede@redhat.com Signed-off-by: Lee Jones --- drivers/power/supply/power_supply_leds.c | 23 +++++++++++++++++++++++ include/linux/power_supply.h | 2 ++ 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/drivers/power/supply/power_supply_leds.c b/drivers/power/supply/power_supply_leds.c index c7db29d5fcb8..73935de844d9 100644 --- a/drivers/power/supply/power_supply_leds.c +++ b/drivers/power/supply/power_supply_leds.c @@ -22,6 +22,8 @@ static void power_supply_update_bat_leds(struct power_supply *psy) { union power_supply_propval status; + unsigned int intensity_green[3] = { 0, 255, 0 }; + unsigned int intensity_orange[3] = { 255, 128, 0 }; if (power_supply_get_property(psy, POWER_SUPPLY_PROP_STATUS, &status)) return; @@ -36,12 +38,20 @@ static void power_supply_update_bat_leds(struct power_supply *psy) /* Going from blink to LED on requires a LED_OFF event to stop blink */ led_trigger_event(psy->charging_blink_full_solid_trig, LED_OFF); led_trigger_event(psy->charging_blink_full_solid_trig, LED_FULL); + led_mc_trigger_event(psy->charging_orange_full_green_trig, + intensity_green, + ARRAY_SIZE(intensity_green), + LED_FULL); break; case POWER_SUPPLY_STATUS_CHARGING: led_trigger_event(psy->charging_full_trig, LED_FULL); led_trigger_event(psy->charging_trig, LED_FULL); led_trigger_event(psy->full_trig, LED_OFF); led_trigger_blink(psy->charging_blink_full_solid_trig, 0, 0); + led_mc_trigger_event(psy->charging_orange_full_green_trig, + intensity_orange, + ARRAY_SIZE(intensity_orange), + LED_FULL); break; default: led_trigger_event(psy->charging_full_trig, LED_OFF); @@ -49,6 +59,8 @@ static void power_supply_update_bat_leds(struct power_supply *psy) led_trigger_event(psy->full_trig, LED_OFF); led_trigger_event(psy->charging_blink_full_solid_trig, LED_OFF); + led_trigger_event(psy->charging_orange_full_green_trig, + LED_OFF); break; } } @@ -74,6 +86,11 @@ static int power_supply_create_bat_triggers(struct power_supply *psy) if (!psy->charging_blink_full_solid_trig_name) goto charging_blink_full_solid_failed; + psy->charging_orange_full_green_trig_name = kasprintf(GFP_KERNEL, + "%s-charging-orange-full-green", psy->desc->name); + if (!psy->charging_orange_full_green_trig_name) + goto charging_red_full_green_failed; + led_trigger_register_simple(psy->charging_full_trig_name, &psy->charging_full_trig); led_trigger_register_simple(psy->charging_trig_name, @@ -82,9 +99,13 @@ static int power_supply_create_bat_triggers(struct power_supply *psy) &psy->full_trig); led_trigger_register_simple(psy->charging_blink_full_solid_trig_name, &psy->charging_blink_full_solid_trig); + led_trigger_register_simple(psy->charging_orange_full_green_trig_name, + &psy->charging_orange_full_green_trig); return 0; +charging_red_full_green_failed: + kfree(psy->charging_blink_full_solid_trig_name); charging_blink_full_solid_failed: kfree(psy->full_trig_name); full_failed: @@ -101,10 +122,12 @@ static void power_supply_remove_bat_triggers(struct power_supply *psy) led_trigger_unregister_simple(psy->charging_trig); led_trigger_unregister_simple(psy->full_trig); led_trigger_unregister_simple(psy->charging_blink_full_solid_trig); + led_trigger_unregister_simple(psy->charging_orange_full_green_trig); kfree(psy->charging_blink_full_solid_trig_name); kfree(psy->full_trig_name); kfree(psy->charging_trig_name); kfree(psy->charging_full_trig_name); + kfree(psy->charging_orange_full_green_trig_name); } /* Generated power specific LEDs triggers. */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 8e5705a56b85..c852cc882501 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -319,6 +319,8 @@ struct power_supply { char *online_trig_name; struct led_trigger *charging_blink_full_solid_trig; char *charging_blink_full_solid_trig_name; + struct led_trigger *charging_orange_full_green_trig; + char *charging_orange_full_green_trig_name; #endif }; -- cgit v1.2.3 From b44d79d6bad16c30978c2cee3421133d3f181494 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 27 May 2024 16:29:33 +0300 Subject: platform/x86/intel/tpmi: Add support for performance limit reasons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TPMI ID 0x0C (Perf Limit Reasons) to the list of supported TPMI IDs. Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20240527133400.483634-2-tero.kristo@linux.intel.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/tpmi.c | 2 ++ include/linux/intel_tpmi.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/platform/x86/intel/tpmi.c b/drivers/platform/x86/intel/tpmi.c index 6c0cbccd80bb..c2ef2cd587ba 100644 --- a/drivers/platform/x86/intel/tpmi.c +++ b/drivers/platform/x86/intel/tpmi.c @@ -577,6 +577,8 @@ static const char *intel_tpmi_name(enum intel_tpmi_id id) return "uncore"; case TPMI_ID_SST: return "sst"; + case TPMI_ID_PLR: + return "plr"; default: return NULL; } diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index 1e880cb0f454..a88ac937d2c2 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -21,6 +21,7 @@ enum intel_tpmi_id { TPMI_ID_PEM = 1, /* Power and Perf excursion Monitor */ TPMI_ID_UNCORE = 2, /* Uncore Frequency Scaling */ TPMI_ID_SST = 5, /* Speed Select Technology */ + TPMI_ID_PLR = 0xc, /* Performance Limit Reasons */ TPMI_CONTROL_ID = 0x80, /* Special ID for getting feature status */ TPMI_INFO_ID = 0x81, /* Special ID for PCI BDF and Package ID information */ }; -- cgit v1.2.3 From d36842bacf8e3491f555059f27de57b3436cc3ff Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 27 May 2024 16:29:34 +0300 Subject: platform/x86/intel/tpmi: Add API to get debugfs root MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add new API to get the debugfs root directory for TPMI. This allows any TPMI devices to add their own debugfs items under the same directory structure. Reviewed-by: Andy Shevchenko Reviewed-by: Ilpo Järvinen Signed-off-by: Tero Kristo Link: https://lore.kernel.org/r/20240527133400.483634-3-tero.kristo@linux.intel.com Signed-off-by: Ilpo Järvinen --- drivers/platform/x86/intel/tpmi.c | 9 +++++++++ include/linux/intel_tpmi.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/drivers/platform/x86/intel/tpmi.c b/drivers/platform/x86/intel/tpmi.c index c2ef2cd587ba..83e8b1fe53b3 100644 --- a/drivers/platform/x86/intel/tpmi.c +++ b/drivers/platform/x86/intel/tpmi.c @@ -357,6 +357,15 @@ int tpmi_get_feature_status(struct auxiliary_device *auxdev, } EXPORT_SYMBOL_NS_GPL(tpmi_get_feature_status, INTEL_TPMI); +struct dentry *tpmi_get_debugfs_dir(struct auxiliary_device *auxdev) +{ + struct intel_vsec_device *intel_vsec_dev = dev_to_ivdev(auxdev->dev.parent); + struct intel_tpmi_info *tpmi_info = auxiliary_get_drvdata(&intel_vsec_dev->auxdev); + + return tpmi_info->dbgfs_dir; +} +EXPORT_SYMBOL_NS_GPL(tpmi_get_debugfs_dir, INTEL_TPMI); + static int tpmi_pfs_dbg_show(struct seq_file *s, void *unused) { struct intel_tpmi_info *tpmi_info = s->private; diff --git a/include/linux/intel_tpmi.h b/include/linux/intel_tpmi.h index a88ac937d2c2..ff480b47ae64 100644 --- a/include/linux/intel_tpmi.h +++ b/include/linux/intel_tpmi.h @@ -54,4 +54,5 @@ struct resource *tpmi_get_resource_at_index(struct auxiliary_device *auxdev, int int tpmi_get_resource_count(struct auxiliary_device *auxdev); int tpmi_get_feature_status(struct auxiliary_device *auxdev, int feature_id, bool *read_blocked, bool *write_blocked); +struct dentry *tpmi_get_debugfs_dir(struct auxiliary_device *auxdev); #endif -- cgit v1.2.3 From 4d2bcefa965b06a1f2be6912456bcfa86a34f184 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 31 May 2024 13:29:02 +0100 Subject: mm: Reduce the number of slab->folio casts Mark a few more folio functions as taking a const folio pointer, which allows us to remove a few places in slab which cast away the const. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Vlastimil Babka --- include/linux/mm.h | 6 +++--- mm/slab.h | 4 ++-- mm/slub.c | 6 ++---- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..a983d371fafa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1105,7 +1105,7 @@ static inline unsigned int compound_order(struct page *page) * * Return: The order of the folio. */ -static inline unsigned int folio_order(struct folio *folio) +static inline unsigned int folio_order(const struct folio *folio) { if (!folio_test_large(folio)) return 0; @@ -2145,7 +2145,7 @@ static inline struct folio *folio_next(struct folio *folio) * it from being split. It is not necessary for the folio to be locked. * Return: The base-2 logarithm of the size of this folio. */ -static inline unsigned int folio_shift(struct folio *folio) +static inline unsigned int folio_shift(const struct folio *folio) { return PAGE_SHIFT + folio_order(folio); } @@ -2158,7 +2158,7 @@ static inline unsigned int folio_shift(struct folio *folio) * it from being split. It is not necessary for the folio to be locked. * Return: The number of bytes in this folio. */ -static inline size_t folio_size(struct folio *folio) +static inline size_t folio_size(const struct folio *folio) { return PAGE_SIZE << folio_order(folio); } diff --git a/mm/slab.h b/mm/slab.h index 5f8f47c5bee0..b16e63191578 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -166,7 +166,7 @@ static_assert(IS_ALIGNED(offsetof(struct slab, freelist), sizeof(freelist_aba_t) */ static inline bool slab_test_pfmemalloc(const struct slab *slab) { - return folio_test_active((struct folio *)slab_folio(slab)); + return folio_test_active(slab_folio(slab)); } static inline void slab_set_pfmemalloc(struct slab *slab) @@ -211,7 +211,7 @@ static inline struct slab *virt_to_slab(const void *addr) static inline int slab_order(const struct slab *slab) { - return folio_order((struct folio *)slab_folio(slab)); + return folio_order(slab_folio(slab)); } static inline size_t slab_size(const struct slab *slab) diff --git a/mm/slub.c b/mm/slub.c index 95e0a3332c44..b8ba068ca079 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -962,11 +962,9 @@ void print_tracking(struct kmem_cache *s, void *object) static void print_slab_info(const struct slab *slab) { - struct folio *folio = (struct folio *)slab_folio(slab); - pr_err("Slab 0x%p objects=%u used=%u fp=0x%p flags=%pGp\n", slab, slab->objects, slab->inuse, slab->freelist, - folio_flags(folio, 0)); + &slab->__page_flags); } /* @@ -2532,7 +2530,7 @@ static void discard_slab(struct kmem_cache *s, struct slab *slab) */ static inline bool slab_test_node_partial(const struct slab *slab) { - return folio_test_workingset((struct folio *)slab_folio(slab)); + return folio_test_workingset(slab_folio(slab)); } static inline void slab_set_node_partial(struct slab *slab) -- cgit v1.2.3 From b7c5e64fecfa88764791679cca4786ac65de739e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 29 May 2024 22:52:30 -0600 Subject: vfio: Create vfio_fs_type with inode per device By linking all the device fds we provide to userspace to an address space through a new pseudo fs, we can use tools like unmap_mapping_range() to zap all vmas associated with a device. Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240530045236.1005864-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/device_cdev.c | 7 +++++++ drivers/vfio/group.c | 7 +++++++ drivers/vfio/vfio_main.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 1 + 4 files changed, 59 insertions(+) (limited to 'include') diff --git a/drivers/vfio/device_cdev.c b/drivers/vfio/device_cdev.c index e75da0a70d1f..bb1817bd4ff3 100644 --- a/drivers/vfio/device_cdev.c +++ b/drivers/vfio/device_cdev.c @@ -39,6 +39,13 @@ int vfio_device_fops_cdev_open(struct inode *inode, struct file *filep) filep->private_data = df; + /* + * Use the pseudo fs inode on the device to link all mmaps + * to the same address space, allowing us to unmap all vmas + * associated to this device using unmap_mapping_range(). + */ + filep->f_mapping = device->inode->i_mapping; + return 0; err_put_registration: diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c index 610a429c6191..ded364588d29 100644 --- a/drivers/vfio/group.c +++ b/drivers/vfio/group.c @@ -286,6 +286,13 @@ static struct file *vfio_device_open_file(struct vfio_device *device) */ filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); + /* + * Use the pseudo fs inode on the device to link all mmaps + * to the same address space, allowing us to unmap all vmas + * associated to this device using unmap_mapping_range(). + */ + filep->f_mapping = device->inode->i_mapping; + if (device->group->type == VFIO_NO_IOMMU) dev_warn(device->dev, "vfio-noiommu device opened by user " "(%s:%d)\n", current->comm, task_pid_nr(current)); diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index e97d796a54fb..a5a62d9d963f 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -22,8 +22,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -43,9 +45,13 @@ #define DRIVER_AUTHOR "Alex Williamson " #define DRIVER_DESC "VFIO - User Level meta-driver" +#define VFIO_MAGIC 0x5646494f /* "VFIO" */ + static struct vfio { struct class *device_class; struct ida device_ida; + struct vfsmount *vfs_mount; + int fs_count; } vfio; #ifdef CONFIG_VFIO_NOIOMMU @@ -186,6 +192,8 @@ static void vfio_device_release(struct device *dev) if (device->ops->release) device->ops->release(device); + iput(device->inode); + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); kvfree(device); } @@ -228,6 +236,34 @@ out_free: } EXPORT_SYMBOL_GPL(_vfio_alloc_device); +static int vfio_fs_init_fs_context(struct fs_context *fc) +{ + return init_pseudo(fc, VFIO_MAGIC) ? 0 : -ENOMEM; +} + +static struct file_system_type vfio_fs_type = { + .name = "vfio", + .owner = THIS_MODULE, + .init_fs_context = vfio_fs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static struct inode *vfio_fs_inode_new(void) +{ + struct inode *inode; + int ret; + + ret = simple_pin_fs(&vfio_fs_type, &vfio.vfs_mount, &vfio.fs_count); + if (ret) + return ERR_PTR(ret); + + inode = alloc_anon_inode(vfio.vfs_mount->mnt_sb); + if (IS_ERR(inode)) + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); + + return inode; +} + /* * Initialize a vfio_device so it can be registered to vfio core. */ @@ -246,6 +282,11 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, init_completion(&device->comp); device->dev = dev; device->ops = ops; + device->inode = vfio_fs_inode_new(); + if (IS_ERR(device->inode)) { + ret = PTR_ERR(device->inode); + goto out_inode; + } if (ops->init) { ret = ops->init(device); @@ -260,6 +301,9 @@ static int vfio_init_device(struct vfio_device *device, struct device *dev, return 0; out_uninit: + iput(device->inode); + simple_release_fs(&vfio.vfs_mount, &vfio.fs_count); +out_inode: vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); return ret; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 8b1a29820409..000a6cab2d31 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -64,6 +64,7 @@ struct vfio_device { struct completion comp; struct iommufd_access *iommufd_access; void (*put_kvm)(struct kvm *kvm); + struct inode *inode; #if IS_ENABLED(CONFIG_IOMMUFD) struct iommufd_device *iommufd_device; u8 iommufd_attached:1; -- cgit v1.2.3 From aac6db75a9fc2c7a6f73e152df8f15101dda38e6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 29 May 2024 22:52:31 -0600 Subject: vfio/pci: Use unmap_mapping_range() With the vfio device fd tied to the address space of the pseudo fs inode, we can use the mm to track all vmas that might be mmap'ing device BARs, which removes our vma_list and all the complicated lock ordering necessary to manually zap each related vma. Note that we can no longer store the pfn in vm_pgoff if we want to use unmap_mapping_range() to zap a selective portion of the device fd corresponding to BAR mappings. This also converts our mmap fault handler to use vmf_insert_pfn() because we no longer have a vma_list to avoid the concurrency problem with io_remap_pfn_range(). The goal is to eventually use the vm_ops huge_fault handler to avoid the additional faulting overhead, but vmf_insert_pfn_{pmd,pud}() need to learn about pfnmaps first. Also, Jason notes that a race exists between unmap_mapping_range() and the fops mmap callback if we were to call io_remap_pfn_range() to populate the vma on mmap. Specifically, mmap_region() does call_mmap() before it does vma_link_file() which gives a window where the vma is populated but invisible to unmap_mapping_range(). Suggested-by: Jason Gunthorpe Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/20240530045236.1005864-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_core.c | 264 ++++++++------------------------------- include/linux/vfio_pci_core.h | 2 - 2 files changed, 55 insertions(+), 211 deletions(-) (limited to 'include') diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index 80cae87fff36..db31c27bf78b 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1610,100 +1610,20 @@ ssize_t vfio_pci_core_write(struct vfio_device *core_vdev, const char __user *bu } EXPORT_SYMBOL_GPL(vfio_pci_core_write); -/* Return 1 on zap and vma_lock acquired, 0 on contention (only with @try) */ -static int vfio_pci_zap_and_vma_lock(struct vfio_pci_core_device *vdev, bool try) +static void vfio_pci_zap_bars(struct vfio_pci_core_device *vdev) { - struct vfio_pci_mmap_vma *mmap_vma, *tmp; + struct vfio_device *core_vdev = &vdev->vdev; + loff_t start = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_BAR0_REGION_INDEX); + loff_t end = VFIO_PCI_INDEX_TO_OFFSET(VFIO_PCI_ROM_REGION_INDEX); + loff_t len = end - start; - /* - * Lock ordering: - * vma_lock is nested under mmap_lock for vm_ops callback paths. - * The memory_lock semaphore is used by both code paths calling - * into this function to zap vmas and the vm_ops.fault callback - * to protect the memory enable state of the device. - * - * When zapping vmas we need to maintain the mmap_lock => vma_lock - * ordering, which requires using vma_lock to walk vma_list to - * acquire an mm, then dropping vma_lock to get the mmap_lock and - * reacquiring vma_lock. This logic is derived from similar - * requirements in uverbs_user_mmap_disassociate(). - * - * mmap_lock must always be the top-level lock when it is taken. - * Therefore we can only hold the memory_lock write lock when - * vma_list is empty, as we'd need to take mmap_lock to clear - * entries. vma_list can only be guaranteed empty when holding - * vma_lock, thus memory_lock is nested under vma_lock. - * - * This enables the vm_ops.fault callback to acquire vma_lock, - * followed by memory_lock read lock, while already holding - * mmap_lock without risk of deadlock. - */ - while (1) { - struct mm_struct *mm = NULL; - - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) - return 0; - } else { - mutex_lock(&vdev->vma_lock); - } - while (!list_empty(&vdev->vma_list)) { - mmap_vma = list_first_entry(&vdev->vma_list, - struct vfio_pci_mmap_vma, - vma_next); - mm = mmap_vma->vma->vm_mm; - if (mmget_not_zero(mm)) - break; - - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - mm = NULL; - } - if (!mm) - return 1; - mutex_unlock(&vdev->vma_lock); - - if (try) { - if (!mmap_read_trylock(mm)) { - mmput(mm); - return 0; - } - } else { - mmap_read_lock(mm); - } - if (try) { - if (!mutex_trylock(&vdev->vma_lock)) { - mmap_read_unlock(mm); - mmput(mm); - return 0; - } - } else { - mutex_lock(&vdev->vma_lock); - } - list_for_each_entry_safe(mmap_vma, tmp, - &vdev->vma_list, vma_next) { - struct vm_area_struct *vma = mmap_vma->vma; - - if (vma->vm_mm != mm) - continue; - - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - - zap_vma_ptes(vma, vma->vm_start, - vma->vm_end - vma->vm_start); - } - mutex_unlock(&vdev->vma_lock); - mmap_read_unlock(mm); - mmput(mm); - } + unmap_mapping_range(core_vdev->inode->i_mapping, start, len, true); } void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev) { - vfio_pci_zap_and_vma_lock(vdev, false); down_write(&vdev->memory_lock); - mutex_unlock(&vdev->vma_lock); + vfio_pci_zap_bars(vdev); } u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev) @@ -1725,99 +1645,41 @@ void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev, u16 c up_write(&vdev->memory_lock); } -/* Caller holds vma_lock */ -static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev, - struct vm_area_struct *vma) -{ - struct vfio_pci_mmap_vma *mmap_vma; - - mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT); - if (!mmap_vma) - return -ENOMEM; - - mmap_vma->vma = vma; - list_add(&mmap_vma->vma_next, &vdev->vma_list); - - return 0; -} - -/* - * Zap mmaps on open so that we can fault them in on access and therefore - * our vma_list only tracks mappings accessed since last zap. - */ -static void vfio_pci_mmap_open(struct vm_area_struct *vma) -{ - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); -} - -static void vfio_pci_mmap_close(struct vm_area_struct *vma) +static unsigned long vma_to_pfn(struct vm_area_struct *vma) { struct vfio_pci_core_device *vdev = vma->vm_private_data; - struct vfio_pci_mmap_vma *mmap_vma; + int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT); + u64 pgoff; - mutex_lock(&vdev->vma_lock); - list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { - if (mmap_vma->vma == vma) { - list_del(&mmap_vma->vma_next); - kfree(mmap_vma); - break; - } - } - mutex_unlock(&vdev->vma_lock); + pgoff = vma->vm_pgoff & + ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1); + + return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff; } static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct vfio_pci_core_device *vdev = vma->vm_private_data; - struct vfio_pci_mmap_vma *mmap_vma; - vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff; + vm_fault_t ret = VM_FAULT_SIGBUS; - mutex_lock(&vdev->vma_lock); - down_read(&vdev->memory_lock); + pfn = vma_to_pfn(vma); - /* - * Memory region cannot be accessed if the low power feature is engaged - * or memory access is disabled. - */ - if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) { - ret = VM_FAULT_SIGBUS; - goto up_out; - } + down_read(&vdev->memory_lock); - /* - * We populate the whole vma on fault, so we need to test whether - * the vma has already been mapped, such as for concurrent faults - * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if - * we ask it to fill the same range again. - */ - list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) { - if (mmap_vma->vma == vma) - goto up_out; - } + if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) + goto out_disabled; - if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, - vma->vm_end - vma->vm_start, - vma->vm_page_prot)) { - ret = VM_FAULT_SIGBUS; - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - goto up_out; - } + ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff); - if (__vfio_pci_add_vma(vdev, vma)) { - ret = VM_FAULT_OOM; - zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start); - } - -up_out: +out_disabled: up_read(&vdev->memory_lock); - mutex_unlock(&vdev->vma_lock); + return ret; } static const struct vm_operations_struct vfio_pci_mmap_ops = { - .open = vfio_pci_mmap_open, - .close = vfio_pci_mmap_close, .fault = vfio_pci_mmap_fault, }; @@ -1880,11 +1742,12 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma vma->vm_private_data = vdev; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_pgoff = (pci_resource_start(pdev, index) >> PAGE_SHIFT) + pgoff; + vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot); /* - * See remap_pfn_range(), called from vfio_pci_fault() but we can't - * change vm_flags within the fault handler. Set them now. + * Set vm_flags now, they should not be changed in the fault handler. + * We want the same flags and page protection (decrypted above) as + * io_remap_pfn_range() would set. * * VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64, * allowing KVM stage 2 device mapping attributes to use Normal-NC @@ -2202,8 +2065,6 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev) mutex_init(&vdev->ioeventfds_lock); INIT_LIST_HEAD(&vdev->dummy_resources_list); INIT_LIST_HEAD(&vdev->ioeventfds_list); - mutex_init(&vdev->vma_lock); - INIT_LIST_HEAD(&vdev->vma_list); INIT_LIST_HEAD(&vdev->sriov_pfs_item); init_rwsem(&vdev->memory_lock); xa_init(&vdev->ctx); @@ -2219,7 +2080,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->igate); mutex_destroy(&vdev->ioeventfds_lock); - mutex_destroy(&vdev->vma_lock); kfree(vdev->region); kfree(vdev->pm_save); } @@ -2497,26 +2357,15 @@ unwind: return ret; } -/* - * We need to get memory_lock for each device, but devices can share mmap_lock, - * therefore we need to zap and hold the vma_lock for each device, and only then - * get each memory_lock. - */ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, struct vfio_pci_group_info *groups, struct iommufd_ctx *iommufd_ctx) { - struct vfio_pci_core_device *cur_mem; - struct vfio_pci_core_device *cur_vma; - struct vfio_pci_core_device *cur; + struct vfio_pci_core_device *vdev; struct pci_dev *pdev; - bool is_mem = true; int ret; mutex_lock(&dev_set->lock); - cur_mem = list_first_entry(&dev_set->device_list, - struct vfio_pci_core_device, - vdev.dev_set_list); pdev = vfio_pci_dev_set_resettable(dev_set); if (!pdev) { @@ -2533,7 +2382,7 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, if (ret) goto err_unlock; - list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) { + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) { bool owned; /* @@ -2557,38 +2406,38 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * Otherwise, reset is not allowed. */ if (iommufd_ctx) { - int devid = vfio_iommufd_get_dev_id(&cur_vma->vdev, + int devid = vfio_iommufd_get_dev_id(&vdev->vdev, iommufd_ctx); owned = (devid > 0 || devid == -ENOENT); } else { - owned = vfio_dev_in_groups(&cur_vma->vdev, groups); + owned = vfio_dev_in_groups(&vdev->vdev, groups); } if (!owned) { ret = -EINVAL; - goto err_undo; + break; } /* - * Locking multiple devices is prone to deadlock, runaway and - * unwind if we hit contention. + * Take the memory write lock for each device and zap BAR + * mappings to prevent the user accessing the device while in + * reset. Locking multiple devices is prone to deadlock, + * runaway and unwind if we hit contention. */ - if (!vfio_pci_zap_and_vma_lock(cur_vma, true)) { + if (!down_write_trylock(&vdev->memory_lock)) { ret = -EBUSY; - goto err_undo; + break; } + + vfio_pci_zap_bars(vdev); } - cur_vma = NULL; - list_for_each_entry(cur_mem, &dev_set->device_list, vdev.dev_set_list) { - if (!down_write_trylock(&cur_mem->memory_lock)) { - ret = -EBUSY; - goto err_undo; - } - mutex_unlock(&cur_mem->vma_lock); + if (!list_entry_is_head(vdev, + &dev_set->device_list, vdev.dev_set_list)) { + vdev = list_prev_entry(vdev, vdev.dev_set_list); + goto err_undo; } - cur_mem = NULL; /* * The pci_reset_bus() will reset all the devices in the bus. @@ -2599,25 +2448,22 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set, * cause the PCI config space reset without restoring the original * state (saved locally in 'vdev->pm_save'). */ - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) - vfio_pci_set_power_state(cur, PCI_D0); + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) + vfio_pci_set_power_state(vdev, PCI_D0); ret = pci_reset_bus(pdev); + vdev = list_last_entry(&dev_set->device_list, + struct vfio_pci_core_device, vdev.dev_set_list); + err_undo: - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) { - if (cur == cur_mem) - is_mem = false; - if (cur == cur_vma) - break; - if (is_mem) - up_write(&cur->memory_lock); - else - mutex_unlock(&cur->vma_lock); - } + list_for_each_entry_from_reverse(vdev, &dev_set->device_list, + vdev.dev_set_list) + up_write(&vdev->memory_lock); + + list_for_each_entry(vdev, &dev_set->device_list, vdev.dev_set_list) + pm_runtime_put(&vdev->pdev->dev); - list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list) - pm_runtime_put(&cur->pdev->dev); err_unlock: mutex_unlock(&dev_set->lock); return ret; diff --git a/include/linux/vfio_pci_core.h b/include/linux/vfio_pci_core.h index a2c8b8bba711..f87067438ed4 100644 --- a/include/linux/vfio_pci_core.h +++ b/include/linux/vfio_pci_core.h @@ -93,8 +93,6 @@ struct vfio_pci_core_device { struct list_head sriov_pfs_item; struct vfio_pci_core_device *sriov_pf_core_dev; struct notifier_block nb; - struct mutex vma_lock; - struct list_head vma_list; struct rw_semaphore memory_lock; }; -- cgit v1.2.3 From 055afc34fd219c8e2290d736ad186edd58870a9e Mon Sep 17 00:00:00 2001 From: Unnathi Chalicheemala Date: Fri, 31 May 2024 09:45:25 -0700 Subject: soc: qcom: llcc: Add regmap for Broadcast_AND region Until SM8450, there was only one broadcast region (Broadcast_OR) used to broadcast write and check for status bit 0. >From SM8450 onwards another broadcast region (Broadcast_AND) has been added which checks for status bit 1. This hasn't been updated and Broadcast_OR region was wrongly being used to check for status bit 1 all along. Hence define new regmap structure for Broadcast_AND region and initialize this regmap when HW block version is greater than 4.1, otherwise initialize as a NULL pointer for backwards compatibility. Switch from broadcast_OR to broadcast_AND region (when defined in DT) for checking status bit 1 as Broadcast_OR region checks only for bit 0. Signed-off-by: Unnathi Chalicheemala Reviewed-by: Bjorn Andersson Link: https://lore.kernel.org/r/9cf19928a67eaa577ae0f02de5bf86276be34ea2.1717014052.git.quic_uchalich@quicinc.com Signed-off-by: Bjorn Andersson --- drivers/soc/qcom/llcc-qcom.c | 16 +++++++++++++++- include/linux/soc/qcom/llcc-qcom.h | 4 +++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c index 4379a5f8ddf3..110b1f89aa59 100644 --- a/drivers/soc/qcom/llcc-qcom.c +++ b/drivers/soc/qcom/llcc-qcom.c @@ -855,6 +855,7 @@ EXPORT_SYMBOL_GPL(llcc_slice_putd); static int llcc_update_act_ctrl(u32 sid, u32 act_ctrl_reg_val, u32 status) { + struct regmap *regmap; u32 act_ctrl_reg; u32 act_clear_reg; u32 status_reg; @@ -883,7 +884,8 @@ static int llcc_update_act_ctrl(u32 sid, return ret; if (drv_data->version >= LLCC_VERSION_4_1_0_0) { - ret = regmap_read_poll_timeout(drv_data->bcast_regmap, status_reg, + regmap = drv_data->bcast_and_regmap ?: drv_data->bcast_regmap; + ret = regmap_read_poll_timeout(regmap, status_reg, slice_status, (slice_status & ACT_COMPLETE), 0, LLCC_STATUS_READ_DELAY); if (ret) @@ -1318,6 +1320,18 @@ static int qcom_llcc_probe(struct platform_device *pdev) drv_data->version = version; + /* Applicable only when drv_data->version >= 4.1 */ + if (drv_data->version >= LLCC_VERSION_4_1_0_0) { + drv_data->bcast_and_regmap = qcom_llcc_init_mmio(pdev, i + 1, "llcc_broadcast_and_base"); + if (IS_ERR(drv_data->bcast_and_regmap)) { + ret = PTR_ERR(drv_data->bcast_and_regmap); + if (ret == -EINVAL) + drv_data->bcast_and_regmap = NULL; + else + goto err; + } + } + llcc_cfg = cfg->sct_data; sz = cfg->size; diff --git a/include/linux/soc/qcom/llcc-qcom.h b/include/linux/soc/qcom/llcc-qcom.h index 1a886666bbb6..9e9f528b1370 100644 --- a/include/linux/soc/qcom/llcc-qcom.h +++ b/include/linux/soc/qcom/llcc-qcom.h @@ -115,7 +115,8 @@ struct llcc_edac_reg_offset { /** * struct llcc_drv_data - Data associated with the llcc driver * @regmaps: regmaps associated with the llcc device - * @bcast_regmap: regmap associated with llcc broadcast offset + * @bcast_regmap: regmap associated with llcc broadcast OR offset + * @bcast_and_regmap: regmap associated with llcc broadcast AND offset * @cfg: pointer to the data structure for slice configuration * @edac_reg_offset: Offset of the LLCC EDAC registers * @lock: mutex associated with each slice @@ -129,6 +130,7 @@ struct llcc_edac_reg_offset { struct llcc_drv_data { struct regmap **regmaps; struct regmap *bcast_regmap; + struct regmap *bcast_and_regmap; const struct llcc_slice_config *cfg; const struct llcc_edac_reg_offset *edac_reg_offset; struct mutex lock; -- cgit v1.2.3 From 69c8b998717c03adeada070882512ebeae11d71f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 29 May 2024 09:29:22 -0700 Subject: net: qstat: extend kdoc about get_base_stats mlx5 has a dedicated queue for PTP packets. Clarify that this sort of queues can also be accounted towards the base. Reviewed-by: Joe Damato Link: https://lore.kernel.org/r/20240529162922.3690698-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/net/netdev_queues.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index a8a7e48dfa6c..5ca019d294ca 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -62,6 +62,8 @@ struct netdev_queue_stats_tx { * statistics will not generally add up to the total number of events for * the device. The @get_base_stats callback allows filling in the delta * between events for currently live queues and overall device history. + * @get_base_stats can also be used to report any miscellaneous packets + * transferred outside of the main set of queues used by the networking stack. * When the statistics for the entire device are queried, first @get_base_stats * is issued to collect the delta, and then a series of per-queue callbacks. * Only statistics which are set in @get_base_stats will be reported -- cgit v1.2.3 From 964a504b1261b641797d93cdbf79b68ff7d85720 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 15:47:00 +0200 Subject: power: supply: leds: Add power_supply_[un]register_led_trigger() Add power_supply_[un]register_led_trigger() helper functions. The primary goal of this is as a preparation patch for adding an activate callback to the power-supply LED triggers to ensure that power-supply LEDs get the correct initial value when the LED gets registered after the power_supply has been registered (this will use the psy back pointer). There also is quite a lot of code duplication in the existing LED trigger registration in the form of the kasprintf() for the name-template for each trigger + related error handling. This duplication is removed by these new helpers. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531134702.166145-2-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_leds.c | 153 +++++++++++++++++-------------- include/linux/power_supply.h | 6 -- 2 files changed, 84 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply_leds.c b/drivers/power/supply/power_supply_leds.c index 73935de844d9..e3f65ff2f064 100644 --- a/drivers/power/supply/power_supply_leds.c +++ b/drivers/power/supply/power_supply_leds.c @@ -19,6 +19,65 @@ /* Battery specific LEDs triggers. */ +struct power_supply_led_trigger { + struct led_trigger trig; + struct power_supply *psy; +}; + +#define trigger_to_psy_trigger(trigger) \ + container_of(trigger, struct power_supply_led_trigger, trig) + +static int power_supply_register_led_trigger(struct power_supply *psy, + const char *name_template, + struct led_trigger **tp, int *err) +{ + struct power_supply_led_trigger *psy_trig; + int ret = -ENOMEM; + + /* Bail on previous errors */ + if (err && *err) + return *err; + + psy_trig = kzalloc(sizeof(*psy_trig), GFP_KERNEL); + if (!psy_trig) + goto err_free_trigger; + + psy_trig->trig.name = kasprintf(GFP_KERNEL, name_template, psy->desc->name); + if (!psy_trig->trig.name) + goto err_free_trigger; + + psy_trig->psy = psy; + + ret = led_trigger_register(&psy_trig->trig); + if (ret) + goto err_free_name; + + *tp = &psy_trig->trig; + return 0; + +err_free_name: + kfree(psy_trig->trig.name); +err_free_trigger: + kfree(psy_trig); + if (err) + *err = ret; + + return ret; +} + +static void power_supply_unregister_led_trigger(struct led_trigger *trig) +{ + struct power_supply_led_trigger *psy_trig; + + if (!trig) + return; + + psy_trig = trigger_to_psy_trigger(trig); + led_trigger_unregister(&psy_trig->trig); + kfree(psy_trig->trig.name); + kfree(psy_trig); +} + static void power_supply_update_bat_leds(struct power_supply *psy) { union power_supply_propval status; @@ -65,69 +124,33 @@ static void power_supply_update_bat_leds(struct power_supply *psy) } } -static int power_supply_create_bat_triggers(struct power_supply *psy) +static void power_supply_remove_bat_triggers(struct power_supply *psy) { - psy->charging_full_trig_name = kasprintf(GFP_KERNEL, - "%s-charging-or-full", psy->desc->name); - if (!psy->charging_full_trig_name) - goto charging_full_failed; - - psy->charging_trig_name = kasprintf(GFP_KERNEL, - "%s-charging", psy->desc->name); - if (!psy->charging_trig_name) - goto charging_failed; - - psy->full_trig_name = kasprintf(GFP_KERNEL, "%s-full", psy->desc->name); - if (!psy->full_trig_name) - goto full_failed; - - psy->charging_blink_full_solid_trig_name = kasprintf(GFP_KERNEL, - "%s-charging-blink-full-solid", psy->desc->name); - if (!psy->charging_blink_full_solid_trig_name) - goto charging_blink_full_solid_failed; - - psy->charging_orange_full_green_trig_name = kasprintf(GFP_KERNEL, - "%s-charging-orange-full-green", psy->desc->name); - if (!psy->charging_orange_full_green_trig_name) - goto charging_red_full_green_failed; - - led_trigger_register_simple(psy->charging_full_trig_name, - &psy->charging_full_trig); - led_trigger_register_simple(psy->charging_trig_name, - &psy->charging_trig); - led_trigger_register_simple(psy->full_trig_name, - &psy->full_trig); - led_trigger_register_simple(psy->charging_blink_full_solid_trig_name, - &psy->charging_blink_full_solid_trig); - led_trigger_register_simple(psy->charging_orange_full_green_trig_name, - &psy->charging_orange_full_green_trig); - - return 0; - -charging_red_full_green_failed: - kfree(psy->charging_blink_full_solid_trig_name); -charging_blink_full_solid_failed: - kfree(psy->full_trig_name); -full_failed: - kfree(psy->charging_trig_name); -charging_failed: - kfree(psy->charging_full_trig_name); -charging_full_failed: - return -ENOMEM; + power_supply_unregister_led_trigger(psy->charging_full_trig); + power_supply_unregister_led_trigger(psy->charging_trig); + power_supply_unregister_led_trigger(psy->full_trig); + power_supply_unregister_led_trigger(psy->charging_blink_full_solid_trig); + power_supply_unregister_led_trigger(psy->charging_orange_full_green_trig); } -static void power_supply_remove_bat_triggers(struct power_supply *psy) +static int power_supply_create_bat_triggers(struct power_supply *psy) { - led_trigger_unregister_simple(psy->charging_full_trig); - led_trigger_unregister_simple(psy->charging_trig); - led_trigger_unregister_simple(psy->full_trig); - led_trigger_unregister_simple(psy->charging_blink_full_solid_trig); - led_trigger_unregister_simple(psy->charging_orange_full_green_trig); - kfree(psy->charging_blink_full_solid_trig_name); - kfree(psy->full_trig_name); - kfree(psy->charging_trig_name); - kfree(psy->charging_full_trig_name); - kfree(psy->charging_orange_full_green_trig_name); + int err = 0; + + power_supply_register_led_trigger(psy, "%s-charging-or-full", + &psy->charging_full_trig, &err); + power_supply_register_led_trigger(psy, "%s-charging", + &psy->charging_trig, &err); + power_supply_register_led_trigger(psy, "%s-full", + &psy->full_trig, &err); + power_supply_register_led_trigger(psy, "%s-charging-blink-full-solid", + &psy->charging_blink_full_solid_trig, &err); + power_supply_register_led_trigger(psy, "%s-charging-orange-full-green", + &psy->charging_orange_full_green_trig, &err); + if (err) + power_supply_remove_bat_triggers(psy); + + return err; } /* Generated power specific LEDs triggers. */ @@ -149,20 +172,12 @@ static void power_supply_update_gen_leds(struct power_supply *psy) static int power_supply_create_gen_triggers(struct power_supply *psy) { - psy->online_trig_name = kasprintf(GFP_KERNEL, "%s-online", - psy->desc->name); - if (!psy->online_trig_name) - return -ENOMEM; - - led_trigger_register_simple(psy->online_trig_name, &psy->online_trig); - - return 0; + return power_supply_register_led_trigger(psy, "%s-online", &psy->online_trig, NULL); } static void power_supply_remove_gen_triggers(struct power_supply *psy) { - led_trigger_unregister_simple(psy->online_trig); - kfree(psy->online_trig_name); + power_supply_unregister_led_trigger(psy->online_trig); } /* Choice what triggers to create&update. */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index c852cc882501..e218a8ab72ee 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -310,17 +310,11 @@ struct power_supply { #ifdef CONFIG_LEDS_TRIGGERS struct led_trigger *charging_full_trig; - char *charging_full_trig_name; struct led_trigger *charging_trig; - char *charging_trig_name; struct led_trigger *full_trig; - char *full_trig_name; struct led_trigger *online_trig; - char *online_trig_name; struct led_trigger *charging_blink_full_solid_trig; - char *charging_blink_full_solid_trig_name; struct led_trigger *charging_orange_full_green_trig; - char *charging_orange_full_green_trig_name; #endif }; -- cgit v1.2.3 From 6c951a84dab9c0e051aec54d7497f25e910a4953 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 31 May 2024 15:47:01 +0200 Subject: power: supply: leds: Share trig pointer for online and charging_full Either 5 different LED triggers are registered for battery power-supply devices or a single online LED trigger is used for non battery power-supply devices. These 5 / 1 LED trigger(s) are never used at the same time. So there is no need for a separate LED trigger pointer for the online trigger. Rename the first battery trigger from charging_full_trig to just trig and use this for the online trigger too. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531134702.166145-3-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_leds.c | 18 +++++++++--------- include/linux/power_supply.h | 3 +-- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply_leds.c b/drivers/power/supply/power_supply_leds.c index e3f65ff2f064..f2d6b8d33707 100644 --- a/drivers/power/supply/power_supply_leds.c +++ b/drivers/power/supply/power_supply_leds.c @@ -91,7 +91,7 @@ static void power_supply_update_bat_leds(struct power_supply *psy) switch (status.intval) { case POWER_SUPPLY_STATUS_FULL: - led_trigger_event(psy->charging_full_trig, LED_FULL); + led_trigger_event(psy->trig, LED_FULL); led_trigger_event(psy->charging_trig, LED_OFF); led_trigger_event(psy->full_trig, LED_FULL); /* Going from blink to LED on requires a LED_OFF event to stop blink */ @@ -103,7 +103,7 @@ static void power_supply_update_bat_leds(struct power_supply *psy) LED_FULL); break; case POWER_SUPPLY_STATUS_CHARGING: - led_trigger_event(psy->charging_full_trig, LED_FULL); + led_trigger_event(psy->trig, LED_FULL); led_trigger_event(psy->charging_trig, LED_FULL); led_trigger_event(psy->full_trig, LED_OFF); led_trigger_blink(psy->charging_blink_full_solid_trig, 0, 0); @@ -113,7 +113,7 @@ static void power_supply_update_bat_leds(struct power_supply *psy) LED_FULL); break; default: - led_trigger_event(psy->charging_full_trig, LED_OFF); + led_trigger_event(psy->trig, LED_OFF); led_trigger_event(psy->charging_trig, LED_OFF); led_trigger_event(psy->full_trig, LED_OFF); led_trigger_event(psy->charging_blink_full_solid_trig, @@ -126,7 +126,7 @@ static void power_supply_update_bat_leds(struct power_supply *psy) static void power_supply_remove_bat_triggers(struct power_supply *psy) { - power_supply_unregister_led_trigger(psy->charging_full_trig); + power_supply_unregister_led_trigger(psy->trig); power_supply_unregister_led_trigger(psy->charging_trig); power_supply_unregister_led_trigger(psy->full_trig); power_supply_unregister_led_trigger(psy->charging_blink_full_solid_trig); @@ -138,7 +138,7 @@ static int power_supply_create_bat_triggers(struct power_supply *psy) int err = 0; power_supply_register_led_trigger(psy, "%s-charging-or-full", - &psy->charging_full_trig, &err); + &psy->trig, &err); power_supply_register_led_trigger(psy, "%s-charging", &psy->charging_trig, &err); power_supply_register_led_trigger(psy, "%s-full", @@ -165,19 +165,19 @@ static void power_supply_update_gen_leds(struct power_supply *psy) dev_dbg(&psy->dev, "%s %d\n", __func__, online.intval); if (online.intval) - led_trigger_event(psy->online_trig, LED_FULL); + led_trigger_event(psy->trig, LED_FULL); else - led_trigger_event(psy->online_trig, LED_OFF); + led_trigger_event(psy->trig, LED_OFF); } static int power_supply_create_gen_triggers(struct power_supply *psy) { - return power_supply_register_led_trigger(psy, "%s-online", &psy->online_trig, NULL); + return power_supply_register_led_trigger(psy, "%s-online", &psy->trig, NULL); } static void power_supply_remove_gen_triggers(struct power_supply *psy) { - power_supply_unregister_led_trigger(psy->online_trig); + power_supply_unregister_led_trigger(psy->trig); } /* Choice what triggers to create&update. */ diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index e218a8ab72ee..65082ef75692 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -309,10 +309,9 @@ struct power_supply { #endif #ifdef CONFIG_LEDS_TRIGGERS - struct led_trigger *charging_full_trig; + struct led_trigger *trig; struct led_trigger *charging_trig; struct led_trigger *full_trig; - struct led_trigger *online_trig; struct led_trigger *charging_blink_full_solid_trig; struct led_trigger *charging_orange_full_green_trig; #endif -- cgit v1.2.3 From a14a569a9918a0c7e340257a17dbc088bb27db72 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 29 May 2024 08:27:11 +0200 Subject: platform/chrome: cros_ec_proto: Introduce cros_ec_cmd_readmem() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To read from the EC memory different mechanism are possible. ECs connected via LPC expose their memory via a ->cmd_readmem operation. Other protocols require the usage of EC_CMD_READ_MEMMAP, which on the other hand is not implemented by LPC ECs. Provide a helper that automatically selects the correct mechanism. Signed-off-by: Thomas Weißschuh Reviewed-by: Guenter Roeck Link: https://lore.kernel.org/r/20240529-cros_ec-hwmon-v4-1-5cdf0c5db50a@weissschuh.net Signed-off-by: Tzung-Bi Shih --- drivers/platform/chrome/cros_ec_proto.c | 27 +++++++++++++++++++++++++++ include/linux/platform_data/cros_ec_proto.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index dcfc18fe1cdf..2ad1b8221ec0 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -1033,3 +1033,30 @@ error: return ret; } EXPORT_SYMBOL_GPL(cros_ec_cmd); + +/** + * cros_ec_cmd_readmem - Read from EC memory. + * + * @ec_dev: EC device + * @offset: Is within EC_LPC_ADDR_MEMMAP region. + * @size: Number of bytes to read. + * @dest: EC command output data + * + * Return: >= 0 on success, negative error number on failure. + */ +int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void *dest) +{ + struct ec_params_read_memmap params = {}; + + if (!size) + return -EINVAL; + + if (ec_dev->cmd_readmem) + return ec_dev->cmd_readmem(ec_dev, offset, size, dest); + + params.offset = offset; + params.size = size; + return cros_ec_cmd(ec_dev, 0, EC_CMD_READ_MEMMAP, + ¶ms, sizeof(params), dest, size); +} +EXPORT_SYMBOL_GPL(cros_ec_cmd_readmem); diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 8865e350c12a..1ddc52603f9a 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -261,6 +261,8 @@ int cros_ec_get_sensor_count(struct cros_ec_dev *ec); int cros_ec_cmd(struct cros_ec_device *ec_dev, unsigned int version, int command, const void *outdata, size_t outsize, void *indata, size_t insize); +int cros_ec_cmd_readmem(struct cros_ec_device *ec_dev, u8 offset, u8 size, void *dest); + /** * cros_ec_get_time_ns() - Return time in ns. * -- cgit v1.2.3 From 6b2e29977518ec13ef3022f234ff8f3014c243da Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:02 +0530 Subject: timekeeping: Provide infrastructure for converting to/from a base clock Hardware time stamps like provided by PTP clock implementations are based on a clock which feeds both the PCIe device and the system clock. For further processing the underlying hardwarre clock timestamp must be converted to the system clock. Right now this requires drivers to invoke an architecture specific conversion function, e.g. to convert the ART (Always Running Timer) timestamp to a TSC timestamp. As the system clock is aware of the underlying base clock, this can be moved to the core code by providing a base clock property for the system clock which contains the conversion factors and assigning a clocksource ID to the base clock. Add the required data structures and the conversion infrastructure in the core code to prepare for converting X86 and the related PTP drivers over. [ tglx: Added a missing READ_ONCE(). Massaged change log ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-2-lakshmi.sowjanya.d@intel.com --- include/linux/clocksource.h | 27 +++++++++++++++++++++++++++ include/linux/timekeeping.h | 2 ++ kernel/time/timekeeping.c | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 70 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 0ad8b550bb4b..d35b677b08fe 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -21,6 +21,7 @@ #include #include +struct clocksource_base; struct clocksource; struct module; @@ -50,6 +51,7 @@ struct module; * multiplication * @name: Pointer to clocksource name * @list: List head for registration (internal) + * @freq_khz: Clocksource frequency in khz. * @rating: Rating value for selection (higher is better) * To avoid rating inflation the following * list should give you a guide as to how @@ -70,6 +72,8 @@ struct module; * validate the clocksource from which the snapshot was * taken. * @flags: Flags describing special properties + * @base: Hardware abstraction for clock on which a clocksource + * is based * @enable: Optional function to enable the clocksource * @disable: Optional function to disable the clocksource * @suspend: Optional suspend function for the clocksource @@ -107,10 +111,12 @@ struct clocksource { u64 max_cycles; const char *name; struct list_head list; + u32 freq_khz; int rating; enum clocksource_ids id; enum vdso_clock_mode vdso_clock_mode; unsigned long flags; + struct clocksource_base *base; int (*enable)(struct clocksource *cs); void (*disable)(struct clocksource *cs); @@ -306,4 +312,25 @@ static inline unsigned int clocksource_get_max_watchdog_retry(void) void clocksource_verify_percpu(struct clocksource *cs); +/** + * struct clocksource_base - hardware abstraction for clock on which a clocksource + * is based + * @id: Defaults to CSID_GENERIC. The id value is used for conversion + * functions which require that the current clocksource is based + * on a clocksource_base with a particular ID in certain snapshot + * functions to allow callers to validate the clocksource from + * which the snapshot was taken. + * @freq_khz: Nominal frequency of the base clock in kHz + * @offset: Offset between the base clock and the clocksource + * @numerator: Numerator of the clock ratio between base clock and the clocksource + * @denominator: Denominator of the clock ratio between base clock and the clocksource + */ +struct clocksource_base { + enum clocksource_ids id; + u32 freq_khz; + u64 offset; + u32 numerator; + u32 denominator; +}; + #endif /* _LINUX_CLOCKSOURCE_H */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0ea7823b7f31..b2ee182d891e 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -310,10 +310,12 @@ struct system_device_crosststamp { * timekeeping code to verify comparability of two cycle values. * The default ID, CSID_GENERIC, does not identify a specific * clocksource. + * @use_nsecs: @cycles is in nanoseconds. */ struct system_counterval_t { u64 cycles; enum clocksource_ids cs_id; + bool use_nsecs; }; /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 4e18db1819f8..3096e10e0a1d 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1195,6 +1195,46 @@ static bool timestamp_in_interval(u64 start, u64 end, u64 ts) return false; } +static bool convert_clock(u64 *val, u32 numerator, u32 denominator) +{ + u64 rem, res; + + if (!numerator || !denominator) + return false; + + res = div64_u64_rem(*val, denominator, &rem) * numerator; + *val = res + div_u64(rem * numerator, denominator); + return true; +} + +static bool convert_base_to_cs(struct system_counterval_t *scv) +{ + struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; + struct clocksource_base *base; + u32 num, den; + + /* The timestamp was taken from the time keeper clock source */ + if (cs->id == scv->cs_id) + return true; + + /* + * Check whether cs_id matches the base clock. Prevent the compiler from + * re-evaluating @base as the clocksource might change concurrently. + */ + base = READ_ONCE(cs->base); + if (!base || base->id != scv->cs_id) + return false; + + num = scv->use_nsecs ? cs->freq_khz : base->numerator; + den = scv->use_nsecs ? USEC_PER_SEC : base->denominator; + + if (!convert_clock(&scv->cycles, num, den)) + return false; + + scv->cycles += base->offset; + return true; +} + /** * get_device_system_crosststamp - Synchronously capture system/device timestamp * @get_time_fn: Callback to get simultaneous device time and @@ -1241,7 +1281,7 @@ int get_device_system_crosststamp(int (*get_time_fn) * installed timekeeper clocksource */ if (system_counterval.cs_id == CSID_GENERIC || - tk->tkr_mono.clock->id != system_counterval.cs_id) + !convert_base_to_cs(&system_counterval)) return -ENODEV; cycles = system_counterval.cycles; -- cgit v1.2.3 From 3a52886c8f972c3a5b70bfec330c71817cd7fc63 Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:03 +0530 Subject: x86/tsc: Provide ART base clock information for TSC The core code provides a new mechanism to allow conversion between ART and TSC. This allows to replace the x86 specific ART/TSC conversion functions. Prepare for removal by filling in the base clock conversion information for ART and associating the base clock to the TSC clocksource. The existing conversion functions will be removed once the usage sites are converted over to the new model. [ tglx: Massaged change log ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-3-lakshmi.sowjanya.d@intel.com --- arch/x86/kernel/tsc.c | 42 +++++++++++++++++++++++------------------ include/linux/clocksource_ids.h | 1 + 2 files changed, 25 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 06b170759e5b..d1888db5db91 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -50,9 +50,9 @@ int tsc_clocksource_reliable; static int __read_mostly tsc_force_recalibrate; -static u32 art_to_tsc_numerator; -static u32 art_to_tsc_denominator; -static u64 art_to_tsc_offset; +static struct clocksource_base art_base_clk = { + .id = CSID_X86_ART, +}; static bool have_art; struct cyc2ns { @@ -1074,7 +1074,7 @@ core_initcall(cpufreq_register_tsc_scaling); */ static void __init detect_art(void) { - unsigned int unused[2]; + unsigned int unused; if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF) return; @@ -1089,13 +1089,14 @@ static void __init detect_art(void) tsc_async_resets) return; - cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator, - &art_to_tsc_numerator, unused, unused+1); + cpuid(ART_CPUID_LEAF, &art_base_clk.denominator, + &art_base_clk.numerator, &art_base_clk.freq_khz, &unused); - if (art_to_tsc_denominator < ART_MIN_DENOMINATOR) + art_base_clk.freq_khz /= KHZ; + if (art_base_clk.denominator < ART_MIN_DENOMINATOR) return; - rdmsrl(MSR_IA32_TSC_ADJUST, art_to_tsc_offset); + rdmsrl(MSR_IA32_TSC_ADJUST, art_base_clk.offset); /* Make this sticky over multiple CPU init calls */ setup_force_cpu_cap(X86_FEATURE_ART); @@ -1303,13 +1304,13 @@ struct system_counterval_t convert_art_to_tsc(u64 art) { u64 tmp, res, rem; - rem = do_div(art, art_to_tsc_denominator); + rem = do_div(art, art_base_clk.denominator); - res = art * art_to_tsc_numerator; - tmp = rem * art_to_tsc_numerator; + res = art * art_base_clk.numerator; + tmp = rem * art_base_clk.numerator; - do_div(tmp, art_to_tsc_denominator); - res += tmp + art_to_tsc_offset; + do_div(tmp, art_base_clk.denominator); + res += tmp + art_base_clk.offset; return (struct system_counterval_t) { .cs_id = have_art ? CSID_X86_TSC : CSID_GENERIC, @@ -1356,7 +1357,6 @@ struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns) } EXPORT_SYMBOL(convert_art_ns_to_tsc); - static void tsc_refine_calibration_work(struct work_struct *work); static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work); /** @@ -1458,8 +1458,10 @@ out: if (tsc_unstable) goto unreg; - if (boot_cpu_has(X86_FEATURE_ART)) + if (boot_cpu_has(X86_FEATURE_ART)) { have_art = true; + clocksource_tsc.base = &art_base_clk; + } clocksource_register_khz(&clocksource_tsc, tsc_khz); unreg: clocksource_unregister(&clocksource_tsc_early); @@ -1484,8 +1486,10 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { - if (boot_cpu_has(X86_FEATURE_ART)) + if (boot_cpu_has(X86_FEATURE_ART)) { have_art = true; + clocksource_tsc.base = &art_base_clk; + } clocksource_register_khz(&clocksource_tsc, tsc_khz); clocksource_unregister(&clocksource_tsc_early); @@ -1509,10 +1513,12 @@ static bool __init determine_cpu_tsc_frequencies(bool early) if (early) { cpu_khz = x86_platform.calibrate_cpu(); - if (tsc_early_khz) + if (tsc_early_khz) { tsc_khz = tsc_early_khz; - else + } else { tsc_khz = x86_platform.calibrate_tsc(); + clocksource_tsc.freq_khz = tsc_khz; + } } else { /* We should not be here with non-native cpu calibration */ WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu); diff --git a/include/linux/clocksource_ids.h b/include/linux/clocksource_ids.h index a4fa3436940c..2bb4d8c2f1b0 100644 --- a/include/linux/clocksource_ids.h +++ b/include/linux/clocksource_ids.h @@ -9,6 +9,7 @@ enum clocksource_ids { CSID_X86_TSC_EARLY, CSID_X86_TSC, CSID_X86_KVM_CLK, + CSID_X86_ART, CSID_MAX, }; -- cgit v1.2.3 From 02ecee07ca30f76f2a0f1381661a688b8e501ab0 Mon Sep 17 00:00:00 2001 From: Lakshmi Sowjanya D Date: Mon, 13 May 2024 16:08:10 +0530 Subject: timekeeping: Add function to convert realtime to base clock PPS (Pulse Per Second) generates a hardware pulse every second based on CLOCK_REALTIME. This works fine when the pulse is generated in software from a hrtimer callback function. For hardware which generates the pulse by programming a timer it is required to convert CLOCK_REALTIME to the underlying hardware clock. The X86 Timed IO device is based on the Always Running Timer (ART), which is the base clock of the TSC, which is usually the system clocksource on X86. The core code already has functionality to convert base clock timestamps to system clocksource timestamps, but there is no support for converting the other way around. Provide the required functionality to support such devices in a generic way to avoid code duplication in drivers: 1) ktime_real_to_base_clock() to convert a CLOCK_REALTIME timestamp to a base clock timestamp 2) timekeeping_clocksource_has_base() to allow drivers to validate that the system clocksource is based on a particular clocksource ID. [ tglx: Simplify timekeeping_clocksource_has_base() and add missing READ_ONCE() ] Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Co-developed-by: Christopher S. Hall Signed-off-by: Christopher S. Hall Signed-off-by: Lakshmi Sowjanya D Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240513103813.5666-10-lakshmi.sowjanya.d@intel.com --- include/linux/timekeeping.h | 4 +++ kernel/time/timekeeping.c | 86 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) (limited to 'include') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b2ee182d891e..fc12a9ba2c88 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -318,6 +318,10 @@ struct system_counterval_t { bool use_nsecs; }; +extern bool ktime_real_to_base_clock(ktime_t treal, + enum clocksource_ids base_id, u64 *cycles); +extern bool timekeeping_clocksource_has_base(enum clocksource_ids id); + /* * Get cross timestamp between system clock and device clock */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3096e10e0a1d..da984a368a95 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1235,6 +1235,68 @@ static bool convert_base_to_cs(struct system_counterval_t *scv) return true; } +static bool convert_cs_to_base(u64 *cycles, enum clocksource_ids base_id) +{ + struct clocksource *cs = tk_core.timekeeper.tkr_mono.clock; + struct clocksource_base *base; + + /* + * Check whether base_id matches the base clock. Prevent the compiler from + * re-evaluating @base as the clocksource might change concurrently. + */ + base = READ_ONCE(cs->base); + if (!base || base->id != base_id) + return false; + + *cycles -= base->offset; + if (!convert_clock(cycles, base->denominator, base->numerator)) + return false; + return true; +} + +static bool convert_ns_to_cs(u64 *delta) +{ + struct tk_read_base *tkr = &tk_core.timekeeper.tkr_mono; + + if (BITS_TO_BYTES(fls64(*delta) + tkr->shift) >= sizeof(*delta)) + return false; + + *delta = div_u64((*delta << tkr->shift) - tkr->xtime_nsec, tkr->mult); + return true; +} + +/** + * ktime_real_to_base_clock() - Convert CLOCK_REALTIME timestamp to a base clock timestamp + * @treal: CLOCK_REALTIME timestamp to convert + * @base_id: base clocksource id + * @cycles: pointer to store the converted base clock timestamp + * + * Converts a supplied, future realtime clock value to the corresponding base clock value. + * + * Return: true if the conversion is successful, false otherwise. + */ +bool ktime_real_to_base_clock(ktime_t treal, enum clocksource_ids base_id, u64 *cycles) +{ + struct timekeeper *tk = &tk_core.timekeeper; + unsigned int seq; + u64 delta; + + do { + seq = read_seqcount_begin(&tk_core.seq); + if ((u64)treal < tk->tkr_mono.base_real) + return false; + delta = (u64)treal - tk->tkr_mono.base_real; + if (!convert_ns_to_cs(&delta)) + return false; + *cycles = tk->tkr_mono.cycle_last + delta; + if (!convert_cs_to_base(cycles, base_id)) + return false; + } while (read_seqcount_retry(&tk_core.seq, seq)); + + return true; +} +EXPORT_SYMBOL_GPL(ktime_real_to_base_clock); + /** * get_device_system_crosststamp - Synchronously capture system/device timestamp * @get_time_fn: Callback to get simultaneous device time and @@ -1346,6 +1408,30 @@ int get_device_system_crosststamp(int (*get_time_fn) } EXPORT_SYMBOL_GPL(get_device_system_crosststamp); +/** + * timekeeping_clocksource_has_base - Check whether the current clocksource + * is based on given a base clock + * @id: base clocksource ID + * + * Note: The return value is a snapshot which can become invalid right + * after the function returns. + * + * Return: true if the timekeeper clocksource has a base clock with @id, + * false otherwise + */ +bool timekeeping_clocksource_has_base(enum clocksource_ids id) +{ + /* + * This is a snapshot, so no point in using the sequence + * count. Just prevent the compiler from re-evaluating @base as the + * clocksource might change concurrently. + */ + struct clocksource_base *base = READ_ONCE(tk_core.timekeeper.tkr_mono.clock->base); + + return base ? base->id == id : false; +} +EXPORT_SYMBOL_GPL(timekeeping_clocksource_has_base); + /** * do_settimeofday64 - Sets the time of day. * @ts: pointer to the timespec64 variable containing the new time -- cgit v1.2.3 From 56f45266df67aa0f5b2a6881c8c4d16dbfff6b7d Mon Sep 17 00:00:00 2001 From: Ken Sloat Date: Thu, 15 Dec 2022 16:07:15 +0000 Subject: pwm: xilinx: Fix u32 overflow issue in 32-bit width PWM mode. This timer HW supports 8, 16 and 32-bit timer widths. This driver currently uses a u32 to store the max possible value of the timer. However, statements perform addition of 2 in xilinx_pwm_apply() when calculating the period_cycles and duty_cycles values. Since priv->max is a u32, this will result in an overflow to 1 which will not only be incorrect but fail on range comparison. This results in making it impossible to set the PWM in this timer mode. There are two obvious solutions to the current problem: 1. Cast each instance where overflow occurs to u64. 2. Change priv->max from a u32 to a u64. Solution #1 requires more code modifications, and leaves opportunity to introduce similar overflows if other math statements are added in the future. These may also go undetected if running in non 32-bit timer modes. Solution #2 is the much smaller and cleaner approach and thus the chosen method in this patch. This was tested on a Zynq UltraScale+ with multiple instances of the PWM IP. Signed-off-by: Ken Sloat Reviewed-by: Michal Simek Reviewed-by: Sean Anderson Link: https://lore.kernel.org/r/SJ0P222MB0107490C5371B848EF04351CA1E19@SJ0P222MB0107.NAMP222.PROD.OUTLOOK.COM Signed-off-by: Michal Simek --- include/clocksource/timer-xilinx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/clocksource/timer-xilinx.h b/include/clocksource/timer-xilinx.h index c0f56fe6d22a..d116f18de899 100644 --- a/include/clocksource/timer-xilinx.h +++ b/include/clocksource/timer-xilinx.h @@ -41,7 +41,7 @@ struct regmap; struct xilinx_timer_priv { struct regmap *map; struct clk *clk; - u32 max; + u64 max; }; /** -- cgit v1.2.3 From fcf544ac64397776a18246eba5a8ca72a47c4405 Mon Sep 17 00:00:00 2001 From: Jay Buddhabhatti Date: Wed, 24 Apr 2024 05:49:00 -0700 Subject: soc: xilinx: Add cb event for subsystem restart Add support to register subsystem restart events from firmware for Versal and Versal NET platforms. This event is received when firmware requests for subsystem restart. After receiving this event, the kernel needs to be restarted. Signed-off-by: Jay Buddhabhatti Link: https://lore.kernel.org/r/20240424124900.29287-1-jay.buddhabhatti@amd.com Signed-off-by: Michal Simek --- drivers/soc/xilinx/zynqmp_power.c | 151 ++++++++++++++++++++++++---- include/linux/firmware/xlnx-event-manager.h | 10 ++ 2 files changed, 141 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c index 965b1143936a..fced6bedca43 100644 --- a/drivers/soc/xilinx/zynqmp_power.c +++ b/drivers/soc/xilinx/zynqmp_power.c @@ -30,9 +30,27 @@ struct zynqmp_pm_work_struct { u32 args[CB_ARG_CNT]; }; -static struct zynqmp_pm_work_struct *zynqmp_pm_init_suspend_work; +/** + * struct zynqmp_pm_event_info - event related information + * @cb_fun: Function pointer to store the callback function. + * @cb_type: Type of callback from pm_api_cb_id, + * PM_NOTIFY_CB - for Error Events, + * PM_INIT_SUSPEND_CB - for suspend callback. + * @node_id: Node-Id related to event. + * @event: Event Mask for the Error Event. + * @wake: Flag specifying whether the subsystem should be woken upon + * event notification. + */ +struct zynqmp_pm_event_info { + event_cb_func_t cb_fun; + enum pm_api_cb_id cb_type; + u32 node_id; + u32 event; + bool wake; +}; + +static struct zynqmp_pm_work_struct *zynqmp_pm_init_suspend_work, *zynqmp_pm_init_restart_work; static struct mbox_chan *rx_chan; -static bool event_registered; enum pm_suspend_mode { PM_SUSPEND_MODE_FIRST = 0, @@ -54,6 +72,19 @@ static void zynqmp_pm_get_callback_data(u32 *buf) zynqmp_pm_invoke_fn(GET_CALLBACK_DATA, buf, 0); } +static void subsystem_restart_event_callback(const u32 *payload, void *data) +{ + /* First element is callback API ID, others are callback arguments */ + if (work_pending(&zynqmp_pm_init_restart_work->callback_work)) + return; + + /* Copy callback arguments into work's structure */ + memcpy(zynqmp_pm_init_restart_work->args, &payload[0], + sizeof(zynqmp_pm_init_restart_work->args)); + + queue_work(system_unbound_wq, &zynqmp_pm_init_restart_work->callback_work); +} + static void suspend_event_callback(const u32 *payload, void *data) { /* First element is callback API ID, others are callback arguments */ @@ -119,6 +150,37 @@ static void ipi_receive_callback(struct mbox_client *cl, void *data) } } +/** + * zynqmp_pm_subsystem_restart_work_fn - Initiate Subsystem restart + * @work: Pointer to work_struct + * + * Bottom-half of PM callback IRQ handler. + */ +static void zynqmp_pm_subsystem_restart_work_fn(struct work_struct *work) +{ + int ret; + struct zynqmp_pm_work_struct *pm_work = container_of(work, struct zynqmp_pm_work_struct, + callback_work); + + /* First element is callback API ID, others are callback arguments */ + if (pm_work->args[0] == PM_NOTIFY_CB) { + if (pm_work->args[2] == EVENT_SUBSYSTEM_RESTART) { + ret = zynqmp_pm_system_shutdown(ZYNQMP_PM_SHUTDOWN_TYPE_SETSCOPE_ONLY, + ZYNQMP_PM_SHUTDOWN_SUBTYPE_SUBSYSTEM); + if (ret) { + pr_err("unable to set shutdown scope\n"); + return; + } + + kernel_restart(NULL); + } else { + pr_err("%s Unsupported Event - %d\n", __func__, pm_work->args[2]); + } + } else { + pr_err("%s() Unsupported Callback %d\n", __func__, pm_work->args[0]); + } +} + /** * zynqmp_pm_init_suspend_work_fn - Initialize suspend * @work: Pointer to work_struct @@ -184,10 +246,46 @@ static ssize_t suspend_mode_store(struct device *dev, static DEVICE_ATTR_RW(suspend_mode); +static void unregister_event(struct device *dev, void *res) +{ + struct zynqmp_pm_event_info *event_info = res; + + xlnx_unregister_event(event_info->cb_type, event_info->node_id, + event_info->event, event_info->cb_fun, NULL); +} + +static int register_event(struct device *dev, const enum pm_api_cb_id cb_type, const u32 node_id, + const u32 event, const bool wake, event_cb_func_t cb_fun) +{ + int ret; + struct zynqmp_pm_event_info *event_info; + + event_info = devres_alloc(unregister_event, sizeof(struct zynqmp_pm_event_info), + GFP_KERNEL); + if (!event_info) + return -ENOMEM; + + event_info->cb_type = cb_type; + event_info->node_id = node_id; + event_info->event = event; + event_info->wake = wake; + event_info->cb_fun = cb_fun; + + ret = xlnx_register_event(event_info->cb_type, event_info->node_id, + event_info->event, event_info->wake, event_info->cb_fun, NULL); + if (ret) { + devres_free(event_info); + return ret; + } + + devres_add(dev, event_info); + return 0; +} + static int zynqmp_pm_probe(struct platform_device *pdev) { int ret, irq; - u32 pm_api_version; + u32 pm_api_version, pm_family_code, pm_sub_family_code, node_id; struct mbox_client *client; zynqmp_pm_get_api_version(&pm_api_version); @@ -203,21 +301,43 @@ static int zynqmp_pm_probe(struct platform_device *pdev) * is not available to use) or -ENODEV(Xilinx Event Manager not compiled), * then use ipi-mailbox or interrupt method. */ - ret = xlnx_register_event(PM_INIT_SUSPEND_CB, 0, 0, false, - suspend_event_callback, NULL); + ret = register_event(&pdev->dev, PM_INIT_SUSPEND_CB, 0, 0, false, + suspend_event_callback); if (!ret) { zynqmp_pm_init_suspend_work = devm_kzalloc(&pdev->dev, sizeof(struct zynqmp_pm_work_struct), GFP_KERNEL); - if (!zynqmp_pm_init_suspend_work) { - xlnx_unregister_event(PM_INIT_SUSPEND_CB, 0, 0, - suspend_event_callback, NULL); + if (!zynqmp_pm_init_suspend_work) return -ENOMEM; - } - event_registered = true; INIT_WORK(&zynqmp_pm_init_suspend_work->callback_work, zynqmp_pm_init_suspend_work_fn); + + ret = zynqmp_pm_get_family_info(&pm_family_code, &pm_sub_family_code); + if (ret < 0) + return ret; + + if (pm_sub_family_code == VERSALNET_SUB_FAMILY_CODE) + node_id = PM_DEV_ACPU_0_0; + else + node_id = PM_DEV_ACPU_0; + + ret = register_event(&pdev->dev, PM_NOTIFY_CB, node_id, EVENT_SUBSYSTEM_RESTART, + false, subsystem_restart_event_callback); + if (ret) { + dev_err(&pdev->dev, "Failed to Register with Xilinx Event manager %d\n", + ret); + return ret; + } + + zynqmp_pm_init_restart_work = devm_kzalloc(&pdev->dev, + sizeof(struct zynqmp_pm_work_struct), + GFP_KERNEL); + if (!zynqmp_pm_init_restart_work) + return -ENOMEM; + + INIT_WORK(&zynqmp_pm_init_restart_work->callback_work, + zynqmp_pm_subsystem_restart_work_fn); } else if (ret != -EACCES && ret != -ENODEV) { dev_err(&pdev->dev, "Failed to Register with Xilinx Event manager %d\n", ret); return ret; @@ -264,15 +384,8 @@ static int zynqmp_pm_probe(struct platform_device *pdev) } ret = sysfs_create_file(&pdev->dev.kobj, &dev_attr_suspend_mode.attr); - if (ret) { - if (event_registered) { - xlnx_unregister_event(PM_INIT_SUSPEND_CB, 0, 0, suspend_event_callback, - NULL); - event_registered = false; - } - dev_err(&pdev->dev, "unable to create sysfs interface\n"); + if (ret) return ret; - } return 0; } @@ -280,8 +393,6 @@ static int zynqmp_pm_probe(struct platform_device *pdev) static void zynqmp_pm_remove(struct platform_device *pdev) { sysfs_remove_file(&pdev->dev.kobj, &dev_attr_suspend_mode.attr); - if (event_registered) - xlnx_unregister_event(PM_INIT_SUSPEND_CB, 0, 0, suspend_event_callback, NULL); if (!rx_chan) mbox_free_channel(rx_chan); diff --git a/include/linux/firmware/xlnx-event-manager.h b/include/linux/firmware/xlnx-event-manager.h index 82e8254b0f80..645dd34155e6 100644 --- a/include/linux/firmware/xlnx-event-manager.h +++ b/include/linux/firmware/xlnx-event-manager.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Xilinx Event Management Driver + * + * Copyright (C) 2024, Advanced Micro Devices, Inc. + */ #ifndef _FIRMWARE_XLNX_EVENT_MANAGER_H_ #define _FIRMWARE_XLNX_EVENT_MANAGER_H_ @@ -7,6 +12,11 @@ #define CB_MAX_PAYLOAD_SIZE (4U) /*In payload maximum 32bytes */ +#define EVENT_SUBSYSTEM_RESTART (4U) + +#define PM_DEV_ACPU_0_0 (0x1810c0afU) +#define PM_DEV_ACPU_0 (0x1810c003U) + /************************** Exported Function *****************************/ typedef void (*event_cb_func_t)(const u32 *payload, void *data); -- cgit v1.2.3 From 494c55a1ec0ab40198cf43f5a41c7c5e0b70e7fc Mon Sep 17 00:00:00 2001 From: Ronak Jain Date: Thu, 25 Apr 2024 02:59:13 -0700 Subject: firmware: xilinx: Move FIRMWARE_VERSION_MASK to xlnx-zynqmp.h Move FIRMWARE_VERSION_MASK macro to xlnx-zynqmp.h so that other drivers can use it for verifying the supported firmware version. Signed-off-by: Ronak Jain Signed-off-by: Anand Ashok Dumbre Signed-off-by: Radhey Shyam Pandey Link: https://lore.kernel.org/r/20240425095913.919390-1-ronak.jain@amd.com Signed-off-by: Michal Simek --- drivers/firmware/xilinx/zynqmp.c | 3 --- drivers/soc/xilinx/xlnx_event_manager.c | 1 - include/linux/firmware/xlnx-zynqmp.h | 3 +++ 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c index 9bc45357e1a8..add8acf66a9c 100644 --- a/drivers/firmware/xilinx/zynqmp.c +++ b/drivers/firmware/xilinx/zynqmp.c @@ -41,9 +41,6 @@ /* IOCTL/QUERY feature payload size */ #define FEATURE_PAYLOAD_SIZE 2 -/* Firmware feature check version mask */ -#define FIRMWARE_VERSION_MASK GENMASK(15, 0) - static bool feature_check_enabled; static DEFINE_HASHTABLE(pm_api_features_map, PM_API_FEATURE_CHECK_MAX_ORDER); static u32 ioctl_features[FEATURE_PAYLOAD_SIZE]; diff --git a/drivers/soc/xilinx/xlnx_event_manager.c b/drivers/soc/xilinx/xlnx_event_manager.c index 366018f6a0ee..f529e1346247 100644 --- a/drivers/soc/xilinx/xlnx_event_manager.c +++ b/drivers/soc/xilinx/xlnx_event_manager.c @@ -36,7 +36,6 @@ static int event_manager_availability = -EACCES; #define MAX_BITS (32U) /* Number of bits available for error mask */ -#define FIRMWARE_VERSION_MASK (0xFFFFU) #define REGISTER_NOTIFIER_FIRMWARE_VERSION (2U) static DEFINE_HASHTABLE(reg_driver_map, REGISTERED_DRIVER_MAX_ORDER); diff --git a/include/linux/firmware/xlnx-zynqmp.h b/include/linux/firmware/xlnx-zynqmp.h index 1a069a56c961..d7d07afc0532 100644 --- a/include/linux/firmware/xlnx-zynqmp.h +++ b/include/linux/firmware/xlnx-zynqmp.h @@ -52,6 +52,9 @@ #define API_ID_MASK GENMASK(7, 0) #define MODULE_ID_MASK GENMASK(11, 8) +/* Firmware feature check version mask */ +#define FIRMWARE_VERSION_MASK 0xFFFFU + /* ATF only commands */ #define TF_A_PM_REGISTER_SGI 0xa04 #define PM_GET_TRUSTZONE_VERSION 0xa03 -- cgit v1.2.3 From a5b7365f28c191df6b93f60942d2b9a9fe71746c Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 3 Jun 2024 14:58:41 +0800 Subject: soundwire: bus: add stream refcount The notion of stream is by construction based on a multi-bus capability, to allow for aggregation of Peripheral devices or functions located on different segments. We currently count how many master_rt contexts are used by a stream, but we don't have the dual refcount of how many streams are allocated on a given bus. This refcount will be useful to check if BTP/BRA streams can be allocated. Note that the stream_refcount is modified in sdw_master_rt_alloc() and sdw_master_rt_free() which are both called with the bus_lock mutex held, so there's no need for refcount_ primitives for additional protection. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Rander Wang Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20240603065841.4860-2-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/stream.c | 5 +++++ include/linux/soundwire/sdw.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index 4e9e7d2a942d..7aa4900dcf31 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -1181,6 +1181,8 @@ static struct sdw_master_runtime m_rt->bus = bus; m_rt->stream = stream; + bus->stream_refcount++; + return m_rt; } @@ -1217,6 +1219,7 @@ static void sdw_master_rt_free(struct sdw_master_runtime *m_rt, struct sdw_stream_runtime *stream) { struct sdw_slave_runtime *s_rt, *_s_rt; + struct sdw_bus *bus = m_rt->bus; list_for_each_entry_safe(s_rt, _s_rt, &m_rt->slave_rt_list, m_rt_node) { sdw_slave_port_free(s_rt->slave, stream); @@ -1226,6 +1229,8 @@ static void sdw_master_rt_free(struct sdw_master_runtime *m_rt, list_del(&m_rt->stream_node); list_del(&m_rt->bus_node); kfree(m_rt); + + bus->stream_refcount--; } /** diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 13e96d8b7423..94fc1b57c57b 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -903,6 +903,7 @@ struct sdw_master_ops { * meaningful if multi_link is set. If set to 1, hardware-based * synchronization will be used even if a stream only uses a single * SoundWire segment. + * @stream_refcount: number of streams currently using this bus */ struct sdw_bus { struct device *dev; @@ -933,6 +934,7 @@ struct sdw_bus { u32 bank_switch_timeout; bool multi_link; int hw_sync_min_links; + int stream_refcount; }; int sdw_bus_master_add(struct sdw_bus *bus, struct device *parent, -- cgit v1.2.3 From 9b5fd115e7d5a98b82054cff5c96f6768ee06845 Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Mon, 3 Jun 2024 15:02:40 +0800 Subject: soundwire: intel_ace2.x: add AC timing extensions for PantherLake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACE3 IP used in PantherLake exposes new bitfields in the ACTMCTL register to better control clocks/delays. These bitfields were reserved/zero in the ACE2.x IP, to simplify the integration the new bifields are added unconditionally. The behavior will only be impacted when the firmware exposes DSD properties to set non-zero values. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://lore.kernel.org/r/20240603070240.5165-1-yung-chuan.liao@linux.intel.com Signed-off-by: Vinod Koul --- drivers/soundwire/intel.h | 5 +++++ drivers/soundwire/intel_ace2x.c | 15 +++++++++++++++ drivers/soundwire/intel_auxdevice.c | 20 ++++++++++++++++++++ include/linux/soundwire/sdw_intel.h | 5 +++++ 4 files changed, 45 insertions(+) (limited to 'include') diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h index b68e74c294e7..68838e843b54 100644 --- a/drivers/soundwire/intel.h +++ b/drivers/soundwire/intel.h @@ -59,6 +59,11 @@ struct sdw_intel { }; struct sdw_intel_prop { + u16 clde; + u16 doaise2; + u16 dodse2; + u16 clds; + u16 clss; u16 doaise; u16 doais; u16 dodse; diff --git a/drivers/soundwire/intel_ace2x.c b/drivers/soundwire/intel_ace2x.c index 8b1b6ad420cf..0dadf46faca6 100644 --- a/drivers/soundwire/intel_ace2x.c +++ b/drivers/soundwire/intel_ace2x.c @@ -27,6 +27,11 @@ static void intel_shim_vs_init(struct sdw_intel *sdw) void __iomem *shim_vs = sdw->link_res->shim_vs; struct sdw_bus *bus = &sdw->cdns.bus; struct sdw_intel_prop *intel_prop; + u16 clde; + u16 doaise2; + u16 dodse2; + u16 clds; + u16 clss; u16 doaise; u16 doais; u16 dodse; @@ -34,12 +39,22 @@ static void intel_shim_vs_init(struct sdw_intel *sdw) u16 act; intel_prop = bus->vendor_specific_prop; + clde = intel_prop->clde; + doaise2 = intel_prop->doaise2; + dodse2 = intel_prop->dodse2; + clds = intel_prop->clds; + clss = intel_prop->clss; doaise = intel_prop->doaise; doais = intel_prop->doais; dodse = intel_prop->dodse; dods = intel_prop->dods; act = intel_readw(shim_vs, SDW_SHIM2_INTEL_VS_ACTMCTL); + u16p_replace_bits(&act, clde, SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE); + u16p_replace_bits(&act, doaise2, SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2); + u16p_replace_bits(&act, dodse2, SDW_SHIM3_INTEL_VS_ACTMCTL_DODSE2); + u16p_replace_bits(&act, clds, SDW_SHIM3_INTEL_VS_ACTMCTL_CLDS); + u16p_replace_bits(&act, clss, SDW_SHIM3_INTEL_VS_ACTMCTL_CLSS); u16p_replace_bits(&act, doaise, SDW_SHIM2_INTEL_VS_ACTMCTL_DOAISE); u16p_replace_bits(&act, doais, SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS); u16p_replace_bits(&act, dodse, SDW_SHIM2_INTEL_VS_ACTMCTL_DODSE); diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c index 17cf27e6ea73..54cb455ed870 100644 --- a/drivers/soundwire/intel_auxdevice.c +++ b/drivers/soundwire/intel_auxdevice.c @@ -159,11 +159,31 @@ static int sdw_master_read_intel_prop(struct sdw_bus *bus) return -ENOMEM; /* initialize with hardware defaults, in case the properties are not found */ + intel_prop->clde = 0x0; + intel_prop->doaise2 = 0x0; + intel_prop->dodse2 = 0x0; + intel_prop->clds = 0x0; + intel_prop->clss = 0x0; intel_prop->doaise = 0x1; intel_prop->doais = 0x3; intel_prop->dodse = 0x0; intel_prop->dods = 0x1; + fwnode_property_read_u16(link, + "intel-sdw-clde", + &intel_prop->clde); + fwnode_property_read_u16(link, + "intel-sdw-doaise2", + &intel_prop->doaise2); + fwnode_property_read_u16(link, + "intel-sdw-dodse2", + &intel_prop->dodse2); + fwnode_property_read_u16(link, + "intel-sdw-clds", + &intel_prop->clds); + fwnode_property_read_u16(link, + "intel-sdw-clss", + &intel_prop->clss); fwnode_property_read_u16(link, "intel-sdw-doaise", &intel_prop->doaise); diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h index 8e78417156e3..d537587b4499 100644 --- a/include/linux/soundwire/sdw_intel.h +++ b/include/linux/soundwire/sdw_intel.h @@ -182,6 +182,11 @@ #define SDW_SHIM2_INTEL_VS_ACTMCTL_DODSE BIT(2) #define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAIS GENMASK(4, 3) #define SDW_SHIM2_INTEL_VS_ACTMCTL_DOAISE BIT(5) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLSS BIT(6) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDS GENMASK(11, 7) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_DODSE2 GENMASK(13, 12) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_DOAISE2 BIT(14) +#define SDW_SHIM3_INTEL_VS_ACTMCTL_CLDE BIT(15) /** * struct sdw_intel_stream_params_data: configuration passed during -- cgit v1.2.3 From 32fe91524e1651b9a0e11ddfbc63de9aa485cf48 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 13 May 2024 11:25:18 +0200 Subject: sysctl: constify ctl_table arguments of utility function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In a future commit the proc_handlers themselves will change to "const struct ctl_table". As a preparation for that adapt the internal helper. Signed-off-by: Thomas Weißschuh Reviewed-by: Kees Cook Signed-off-by: Joel Granados --- include/linux/sysctl.h | 2 +- kernel/sysctl.c | 21 +++++++++++---------- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 09db2f2e6488..54fbec062772 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -237,7 +237,7 @@ extern struct ctl_table_header *register_sysctl_mount_point(const char *path); void do_sysctl_args(void); bool sysctl_is_alias(char *param); -int do_proc_douintvec(struct ctl_table *table, int write, +int do_proc_douintvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c0a1164eaf59..e4421594fc25 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -205,7 +205,7 @@ static int _proc_do_string(char *data, int maxlen, int write, return 0; } -static void warn_sysctl_write(struct ctl_table *table) +static void warn_sysctl_write(const struct ctl_table *table) { pr_warn_once("%s wrote to %s when file position was not 0!\n" "This will not be supported in the future. To silence this\n" @@ -223,7 +223,7 @@ static void warn_sysctl_write(struct ctl_table *table) * handlers can ignore the return value. */ static bool proc_first_pos_non_zero_ignore(loff_t *ppos, - struct ctl_table *table) + const struct ctl_table *table) { if (!*ppos) return false; @@ -468,7 +468,7 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; -static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, +static int __do_proc_dointvec(void *tbl_data, const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(bool *negp, unsigned long *lvalp, int *valp, @@ -541,7 +541,7 @@ out: return err; } -static int do_proc_dointvec(struct ctl_table *table, int write, +static int do_proc_dointvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), @@ -552,7 +552,7 @@ static int do_proc_dointvec(struct ctl_table *table, int write, } static int do_proc_douintvec_w(unsigned int *tbl_data, - struct ctl_table *table, + const struct ctl_table *table, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, @@ -639,7 +639,7 @@ out: return err; } -static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, +static int __do_proc_douintvec(void *tbl_data, const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, @@ -675,7 +675,7 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data); } -int do_proc_douintvec(struct ctl_table *table, int write, +int do_proc_douintvec(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, @@ -1017,8 +1017,9 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, } #endif -static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, - int write, void *buffer, size_t *lenp, loff_t *ppos, +static int __do_proc_doulongvec_minmax(void *data, + const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { unsigned long *i, *min, *max; @@ -1090,7 +1091,7 @@ out: return err; } -static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, +static int do_proc_doulongvec_minmax(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, unsigned long convdiv) { -- cgit v1.2.3 From 45919c28134519080a85a5fb66d0f65955ef7572 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 28 May 2024 05:05:14 +0000 Subject: ASoC: simple-card-utils: remove both playback/capture_only check soc-pcm.c :: soc_get_playback_capture() will indicate error if both playback_only / capture_only were true. Thus, graph_util_parse_link_direction() which setup playback_only / capture_only don't need to check it. And, its return value is not used on existing driver. Let's remove it. Signed-off-by: Kuninori Morimoto Link: https://msgid.link/r/87a5kah6gm.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/simple_card_utils.h | 2 +- sound/soc/generic/simple-card-utils.c | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/sound/simple_card_utils.h b/include/sound/simple_card_utils.h index 2c2279d082ec..0a6435ac5c5f 100644 --- a/include/sound/simple_card_utils.h +++ b/include/sound/simple_card_utils.h @@ -197,7 +197,7 @@ int graph_util_is_ports0(struct device_node *port); int graph_util_parse_dai(struct device *dev, struct device_node *ep, struct snd_soc_dai_link_component *dlc, int *is_single_link); -int graph_util_parse_link_direction(struct device_node *np, +void graph_util_parse_link_direction(struct device_node *np, bool *is_playback_only, bool *is_capture_only); #ifdef DEBUG diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index 852989611362..bc4d518c35cb 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -1140,7 +1140,7 @@ parse_dai_end: } EXPORT_SYMBOL_GPL(graph_util_parse_dai); -int graph_util_parse_link_direction(struct device_node *np, +void graph_util_parse_link_direction(struct device_node *np, bool *playback_only, bool *capture_only) { bool is_playback_only = false; @@ -1149,13 +1149,8 @@ int graph_util_parse_link_direction(struct device_node *np, is_playback_only = of_property_read_bool(np, "playback-only"); is_capture_only = of_property_read_bool(np, "capture-only"); - if (is_playback_only && is_capture_only) - return -EINVAL; - *playback_only = is_playback_only; *capture_only = is_capture_only; - - return 0; } EXPORT_SYMBOL_GPL(graph_util_parse_link_direction); -- cgit v1.2.3 From 3d9a0a25336433d48ddca1e1f4fea25fd4a3b1d8 Mon Sep 17 00:00:00 2001 From: Sreenath Vijayan Date: Thu, 30 May 2024 13:15:47 +0530 Subject: printk: Rename console_replay_all() and update context Rename console_replay_all() to console_try_replay_all() to make clear that the implementation is best effort. Also, the function should not be called in NMI context as it takes locks, so update the comment in code. Fixes: 693f75b91a91 ("printk: Add function to replay kernel log on consoles") Fixes: 1b743485e27f ("tty/sysrq: Replay kernel log messages on consoles via sysrq") Suggested-by: Petr Mladek Signed-off-by: Shimoyashiki Taichi Signed-off-by: Sreenath Vijayan Link: https://lore.kernel.org/r/Zlguq/wU21Z8MqI4@sreenath.vijayan@sony.com Reviewed-by: Petr Mladek Signed-off-by: Petr Mladek --- drivers/tty/sysrq.c | 2 +- include/linux/printk.h | 4 ++-- kernel/printk/printk.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index e5974b8239c9..53f8c2329c30 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -452,7 +452,7 @@ static const struct sysrq_key_op sysrq_unrt_op = { static void sysrq_handle_replay_logs(u8 key) { - console_replay_all(); + console_try_replay_all(); } static struct sysrq_key_op sysrq_replay_logs_op = { .handler = sysrq_handle_replay_logs, diff --git a/include/linux/printk.h b/include/linux/printk.h index 40afab23881a..ca4c9271daf6 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -195,7 +195,7 @@ void show_regs_print_info(const char *log_lvl); extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold; extern asmlinkage void dump_stack(void) __cold; void printk_trigger_flush(void); -void console_replay_all(void); +void console_try_replay_all(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -275,7 +275,7 @@ static inline void dump_stack(void) static inline void printk_trigger_flush(void) { } -static inline void console_replay_all(void) +static inline void console_try_replay_all(void) { } #endif diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 420fd310129d..ed003c06c335 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -4314,15 +4314,15 @@ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) EXPORT_SYMBOL_GPL(kmsg_dump_rewind); /** - * console_replay_all - replay kernel log on consoles + * console_try_replay_all - try to replay kernel log on consoles * * Try to obtain lock on console subsystem and replay all * available records in printk buffer on the consoles. * Does nothing if lock is not obtained. * - * Context: Any context. + * Context: Any, except for NMI. */ -void console_replay_all(void) +void console_try_replay_all(void) { if (console_trylock()) { __console_rewind_all(); -- cgit v1.2.3 From 96a02b9fa95108269cd0cd012f091f86bd6f41a4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 15 May 2024 17:08:04 +0200 Subject: KVM: Unexport kvm_debugfs_dir After faf01aef0570 ("KVM: PPC: Merge powerpc's debugfs entry content into generic entry") kvm_debugfs_dir is not used anywhere else outside of kvm_main.c Unexport it and make it static. Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240515150804.9354-1-bp@kernel.org Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 2 -- virt/kvm/kvm_main.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 692c01e41a18..c80fe03a1fa4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1955,8 +1955,6 @@ struct _kvm_stats_desc { HALT_POLL_HIST_COUNT), \ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking) -extern struct dentry *kvm_debugfs_dir; - ssize_t kvm_stats_read(char *id, const struct kvm_stats_header *header, const struct _kvm_stats_desc *desc, void *stats, size_t size_stats, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 14841acb8b95..fabc3e287e56 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -110,8 +110,7 @@ static struct kmem_cache *kvm_vcpu_cache; static __read_mostly struct preempt_ops kvm_preempt_ops; static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_running_vcpu); -struct dentry *kvm_debugfs_dir; -EXPORT_SYMBOL_GPL(kvm_debugfs_dir); +static struct dentry *kvm_debugfs_dir; static const struct file_operations stat_fops_per_vm; -- cgit v1.2.3 From 61df7b82820494368bd46071ca97e43a3dfc3b11 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 2 May 2024 17:57:51 -0400 Subject: lsm: fixup the inode xattr capability handling The current security_inode_setxattr() and security_inode_removexattr() hooks rely on individual LSMs to either call into the associated capability hooks (cap_inode_setxattr() or cap_inode_removexattr()), or return a magic value of 1 to indicate that the LSM layer itself should perform the capability checks. Unfortunately, with the default return value for these LSM hooks being 0, an individual LSM hook returning a 1 will cause the LSM hook processing to exit early, potentially skipping a LSM. Thankfully, with the exception of the BPF LSM, none of the LSMs which currently register inode xattr hooks should end up returning a value of 1, and in the BPF LSM case, with the BPF LSM hooks executing last there should be no real harm in stopping processing of the LSM hooks. However, the reliance on the individual LSMs to either call the capability hooks themselves, or signal the LSM with a return value of 1, is fragile and relies on a specific set of LSMs being enabled. This patch is an effort to resolve, or minimize, these issues. Before we discuss the solution, there are a few observations and considerations that we need to take into account: * BPF LSM registers an implementation for every LSM hook, and that implementation simply returns the hook's default return value, a 0 in this case. We want to ensure that the default BPF LSM behavior results in the capability checks being called. * SELinux and Smack do not expect the traditional capability checks to be applied to the xattrs that they "own". * SELinux and Smack are currently written in such a way that the xattr capability checks happen before any additional LSM specific access control checks. SELinux does apply SELinux specific access controls to all xattrs, even those not "owned" by SELinux. * IMA and EVM also register xattr hooks but assume that the LSM layer and specific LSMs have already authorized the basic xattr operation. In order to ensure we perform the capability based access controls before the individual LSM access controls, perform only one capability access control check for each operation, and clarify the logic around applying the capability controls, we need a mechanism to determine if any of the enabled LSMs "own" a particular xattr and want to take responsibility for controlling access to that xattr. The solution in this patch is to create a new LSM hook, 'inode_xattr_skipcap', that is not exported to the rest of the kernel via a security_XXX() function, but is used by the LSM layer to determine if a LSM wants to control access to a given xattr and avoid the traditional capability controls. Registering an inode_xattr_skipcap hook is optional, if a LSM declines to register an implementation, or uses an implementation that simply returns the default value (0), there is no effect as the LSM continues to enforce the capability based controls (unless another LSM takes ownership of the xattr). If none of the LSMs signal that the capability checks should be skipped, the capability check is performed and if access is granted the individual LSM xattr access control hooks are executed, keeping with the DAC-before-LSM convention. Cc: stable@vger.kernel.org Acked-by: Casey Schaufler Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 1 + security/security.c | 70 +++++++++++++++++++++++++++++-------------- security/selinux/hooks.c | 28 ++++++++++++----- security/smack/smack_lsm.c | 31 +++++++++++++++++-- 4 files changed, 98 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f804b76cde44..8fd87f823d3a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -144,6 +144,7 @@ LSM_HOOK(int, 0, inode_setattr, struct mnt_idmap *idmap, struct dentry *dentry, LSM_HOOK(void, LSM_RET_VOID, inode_post_setattr, struct mnt_idmap *idmap, struct dentry *dentry, int ia_valid) LSM_HOOK(int, 0, inode_getattr, const struct path *path) +LSM_HOOK(int, 0, inode_xattr_skipcap, const char *name) LSM_HOOK(int, 0, inode_setxattr, struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) diff --git a/security/security.c b/security/security.c index e5da848c50b9..b52e81ac5526 100644 --- a/security/security.c +++ b/security/security.c @@ -2278,7 +2278,20 @@ int security_inode_getattr(const struct path *path) * @size: size of xattr value * @flags: flags * - * Check permission before setting the extended attributes. + * This hook performs the desired permission checks before setting the extended + * attributes (xattrs) on @dentry. It is important to note that we have some + * additional logic before the main LSM implementation calls to detect if we + * need to perform an additional capability check at the LSM layer. + * + * Normally we enforce a capability check prior to executing the various LSM + * hook implementations, but if a LSM wants to avoid this capability check, + * it can register a 'inode_xattr_skipcap' hook and return a value of 1 for + * xattrs that it wants to avoid the capability check, leaving the LSM fully + * responsible for enforcing the access control for the specific xattr. If all + * of the enabled LSMs refrain from registering a 'inode_xattr_skipcap' hook, + * or return a 0 (the default return value), the capability check is still + * performed. If no 'inode_xattr_skipcap' hooks are registered the capability + * check is performed. * * Return: Returns 0 if permission is granted. */ @@ -2286,20 +2299,20 @@ int security_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - int ret; + int rc; if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - /* - * SELinux and Smack integrate the cap call, - * so assume that all LSMs supplying this call do so. - */ - ret = call_int_hook(inode_setxattr, idmap, dentry, name, value, size, - flags); - if (ret == 1) - ret = cap_inode_setxattr(dentry, name, value, size, flags); - return ret; + /* enforce the capability checks at the lsm layer, if needed */ + if (!call_int_hook(inode_xattr_skipcap, name)) { + rc = cap_inode_setxattr(dentry, name, value, size, flags); + if (rc) + return rc; + } + + return call_int_hook(inode_setxattr, idmap, dentry, name, value, size, + flags); } /** @@ -2452,26 +2465,39 @@ int security_inode_listxattr(struct dentry *dentry) * @dentry: file * @name: xattr name * - * Check permission before removing the extended attribute identified by @name - * for @dentry. + * This hook performs the desired permission checks before setting the extended + * attributes (xattrs) on @dentry. It is important to note that we have some + * additional logic before the main LSM implementation calls to detect if we + * need to perform an additional capability check at the LSM layer. + * + * Normally we enforce a capability check prior to executing the various LSM + * hook implementations, but if a LSM wants to avoid this capability check, + * it can register a 'inode_xattr_skipcap' hook and return a value of 1 for + * xattrs that it wants to avoid the capability check, leaving the LSM fully + * responsible for enforcing the access control for the specific xattr. If all + * of the enabled LSMs refrain from registering a 'inode_xattr_skipcap' hook, + * or return a 0 (the default return value), the capability check is still + * performed. If no 'inode_xattr_skipcap' hooks are registered the capability + * check is performed. * * Return: Returns 0 if permission is granted. */ int security_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name) { - int ret; + int rc; if (unlikely(IS_PRIVATE(d_backing_inode(dentry)))) return 0; - /* - * SELinux and Smack integrate the cap call, - * so assume that all LSMs supplying this call do so. - */ - ret = call_int_hook(inode_removexattr, idmap, dentry, name); - if (ret == 1) - ret = cap_inode_removexattr(idmap, dentry, name); - return ret; + + /* enforce the capability checks at the lsm layer, if needed */ + if (!call_int_hook(inode_xattr_skipcap, name)) { + rc = cap_inode_removexattr(idmap, dentry, name); + if (rc) + return rc; + } + + return call_int_hook(inode_removexattr, idmap, dentry, name); } /** diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7eed331e90f0..17e6640bbf5b 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3177,6 +3177,23 @@ static bool has_cap_mac_admin(bool audit) return true; } +/** + * selinux_inode_xattr_skipcap - Skip the xattr capability checks? + * @name: name of the xattr + * + * Returns 1 to indicate that SELinux "owns" the access control rights to xattrs + * named @name; the LSM layer should avoid enforcing any traditional + * capability based access controls on this xattr. Returns 0 to indicate that + * SELinux does not "own" the access control rights to xattrs named @name and is + * deferring to the LSM layer for further access controls, including capability + * based controls. + */ +static int selinux_inode_xattr_skipcap(const char *name) +{ + /* require capability check if not a selinux xattr */ + return !strcmp(name, XATTR_NAME_SELINUX); +} + static int selinux_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, const void *value, size_t size, int flags) @@ -3188,15 +3205,9 @@ static int selinux_inode_setxattr(struct mnt_idmap *idmap, u32 newsid, sid = current_sid(); int rc = 0; - if (strcmp(name, XATTR_NAME_SELINUX)) { - rc = cap_inode_setxattr(dentry, name, value, size, flags); - if (rc) - return rc; - - /* Not an attribute we recognize, so just check the - ordinary setattr permission. */ + /* if not a selinux xattr, only check the ordinary setattr perm */ + if (strcmp(name, XATTR_NAME_SELINUX)) return dentry_has_perm(current_cred(), dentry, FILE__SETATTR); - } if (!selinux_initialized()) return (inode_owner_or_capable(idmap, inode) ? 0 : -EPERM); @@ -7175,6 +7186,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_permission, selinux_inode_permission), LSM_HOOK_INIT(inode_setattr, selinux_inode_setattr), LSM_HOOK_INIT(inode_getattr, selinux_inode_getattr), + LSM_HOOK_INIT(inode_xattr_skipcap, selinux_inode_xattr_skipcap), LSM_HOOK_INIT(inode_setxattr, selinux_inode_setxattr), LSM_HOOK_INIT(inode_post_setxattr, selinux_inode_post_setxattr), LSM_HOOK_INIT(inode_getxattr, selinux_inode_getxattr), diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 70ba2841e181..1833bd50d97a 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1282,6 +1282,33 @@ static int smack_inode_getattr(const struct path *path) return rc; } +/** + * smack_inode_xattr_skipcap - Skip the xattr capability checks? + * @name: name of the xattr + * + * Returns 1 to indicate that Smack "owns" the access control rights to xattrs + * named @name; the LSM layer should avoid enforcing any traditional + * capability based access controls on this xattr. Returns 0 to indicate that + * Smack does not "own" the access control rights to xattrs named @name and is + * deferring to the LSM layer for further access controls, including capability + * based controls. + */ +static int smack_inode_xattr_skipcap(const char *name) +{ + if (strncmp(name, XATTR_SMACK_SUFFIX, strlen(XATTR_SMACK_SUFFIX))) + return 0; + + if (strcmp(name, XATTR_NAME_SMACK) == 0 || + strcmp(name, XATTR_NAME_SMACKIPIN) == 0 || + strcmp(name, XATTR_NAME_SMACKIPOUT) == 0 || + strcmp(name, XATTR_NAME_SMACKEXEC) == 0 || + strcmp(name, XATTR_NAME_SMACKMMAP) == 0 || + strcmp(name, XATTR_NAME_SMACKTRANSMUTE) == 0) + return 1; + + return 0; +} + /** * smack_inode_setxattr - Smack check for setting xattrs * @idmap: idmap of the mount @@ -1325,8 +1352,7 @@ static int smack_inode_setxattr(struct mnt_idmap *idmap, size != TRANS_TRUE_SIZE || strncmp(value, TRANS_TRUE, TRANS_TRUE_SIZE) != 0) rc = -EINVAL; - } else - rc = cap_inode_setxattr(dentry, name, value, size, flags); + } if (check_priv && !smack_privileged(CAP_MAC_ADMIN)) rc = -EPERM; @@ -5051,6 +5077,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_permission, smack_inode_permission), LSM_HOOK_INIT(inode_setattr, smack_inode_setattr), LSM_HOOK_INIT(inode_getattr, smack_inode_getattr), + LSM_HOOK_INIT(inode_xattr_skipcap, smack_inode_xattr_skipcap), LSM_HOOK_INIT(inode_setxattr, smack_inode_setxattr), LSM_HOOK_INIT(inode_post_setxattr, smack_inode_post_setxattr), LSM_HOOK_INIT(inode_getxattr, smack_inode_getxattr), -- cgit v1.2.3 From 32d99593bdc91442fe6183d97b060210cc9c8193 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 16 Mar 2024 00:10:15 -0700 Subject: rcu: Add lockdep_assert_in_rcu_read_lock() and friends There is no direct RCU counterpart to lockdep_assert_irqs_disabled() and friends. Although it is possible to construct them, it would be more convenient to have the following lockdep assertions: lockdep_assert_in_rcu_read_lock() lockdep_assert_in_rcu_read_lock_bh() lockdep_assert_in_rcu_read_lock_sched() lockdep_assert_in_rcu_reader() This commit therefore creates them. Reported-by: Jens Axboe Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..8470a85f6563 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -421,11 +421,71 @@ static inline void rcu_preempt_sleep_check(void) { } "Illegal context switch in RCU-sched read-side critical section"); \ } while (0) +// See RCU_LOCKDEP_WARN() for an explanation of the double call to +// debug_lockdep_rcu_enabled(). +static inline bool lockdep_assert_rcu_helper(bool c) +{ + return debug_lockdep_rcu_enabled() && + (c || !rcu_is_watching() || !rcu_lockdep_current_cpu_online()) && + debug_lockdep_rcu_enabled(); +} + +/** + * lockdep_assert_in_rcu_read_lock - WARN if not protected by rcu_read_lock() + * + * Splats if lockdep is enabled and there is no rcu_read_lock() in effect. + */ +#define lockdep_assert_in_rcu_read_lock() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map))) + +/** + * lockdep_assert_in_rcu_read_lock_bh - WARN if not protected by rcu_read_lock_bh() + * + * Splats if lockdep is enabled and there is no rcu_read_lock_bh() in effect. + * Note that local_bh_disable() and friends do not suffice here, instead an + * actual rcu_read_lock_bh() is required. + */ +#define lockdep_assert_in_rcu_read_lock_bh() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_bh_lock_map))) + +/** + * lockdep_assert_in_rcu_read_lock_sched - WARN if not protected by rcu_read_lock_sched() + * + * Splats if lockdep is enabled and there is no rcu_read_lock_sched() + * in effect. Note that preempt_disable() and friends do not suffice here, + * instead an actual rcu_read_lock_sched() is required. + */ +#define lockdep_assert_in_rcu_read_lock_sched() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_sched_lock_map))) + +/** + * lockdep_assert_in_rcu_reader - WARN if not within some type of RCU reader + * + * Splats if lockdep is enabled and there is no RCU reader of any + * type in effect. Note that regions of code protected by things like + * preempt_disable, local_bh_disable(), and local_irq_disable() all qualify + * as RCU readers. + * + * Note that this will never trigger in PREEMPT_NONE or PREEMPT_VOLUNTARY + * kernels that are not also built with PREEMPT_COUNT. But if you have + * lockdep enabled, you might as well also enable PREEMPT_COUNT. + */ +#define lockdep_assert_in_rcu_reader() \ + WARN_ON_ONCE(lockdep_assert_rcu_helper(!lock_is_held(&rcu_lock_map) && \ + !lock_is_held(&rcu_bh_lock_map) && \ + !lock_is_held(&rcu_sched_lock_map) && \ + preemptible())) + #else /* #ifdef CONFIG_PROVE_RCU */ #define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c)) #define rcu_sleep_check() do { } while (0) +#define lockdep_assert_in_rcu_read_lock() do { } while (0) +#define lockdep_assert_in_rcu_read_lock_bh() do { } while (0) +#define lockdep_assert_in_rcu_read_lock_sched() do { } while (0) +#define lockdep_assert_in_rcu_reader() do { } while (0) + #endif /* #else #ifdef CONFIG_PROVE_RCU */ /* -- cgit v1.2.3 From ce418966a83320837f59f04a646fa4716e132dc8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:18:32 +0200 Subject: rcu/nocb: Fix segcblist state machine comments about bypass The parts explaining the bypass lifecycle in (de-)offloading are out of date and/or wrong. Bypass is simply enabled whenever SEGCBLIST_RCU_CORE flag is off. Fix the comments accordingly. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 659d13a7ddaa..1e5b4ef167ca 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -141,7 +141,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | - * | handles callbacks concurrently. Bypass enqueue is enabled. | + * | handles callbacks concurrently. Bypass enqueue is disabled. | * | Invoke RCU core so we make sure not to preempt it in the middle with | * | leaving some urgent work unattended within a jiffy. | * ---------------------------------------------------------------------------- @@ -154,8 +154,7 @@ struct rcu_cblist { * | SEGCBLIST_KTHREAD_GP | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. Disable | - * | bypass enqueue. | + * | holding nocb_lock. Wake up CB and GP kthreads if necessary. | * ---------------------------------------------------------------------------- * | * v @@ -185,7 +184,7 @@ struct rcu_cblist { * | | * | Callbacks processed by rcu_core() from softirqs or local | * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | - * | Flush pending nocb_timer. Flush nocb bypass callbacks. | + * | Flush pending nocb_timer. | * ---------------------------------------------------------------------------- * | * v -- cgit v1.2.3 From aa97b9a56906f5965a7c5752790d174cadc8b820 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:18:33 +0200 Subject: rcu/nocb: Fix segcblist state machine stale comments about timers The (de-)offloading process used to take care about the NOCB timer when it depended on the per-rdp NOCB locking. However this isn't the case anymore for a long while. It can now safely be armed and run during the (de-)offloading process, which doesn't care about it anymore. Update the comments accordingly. Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 1e5b4ef167ca..8018045989af 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -80,8 +80,7 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | * | | * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads, | - * | allowing nocb_timer to be armed. | + * | rcuc kthread, while holding nocb_lock. Waking up CB and GP kthreads. | * ---------------------------------------------------------------------------- * | * v @@ -183,8 +182,7 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | * | | * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. Forbid nocb_timer to be armed. | - * | Flush pending nocb_timer. | + * | rcuc kthread, while holding nocb_lock. | * ---------------------------------------------------------------------------- * | * v -- cgit v1.2.3 From 483d5bf23125a9127ffcb3f7a3b3539b34df67d4 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:18:34 +0200 Subject: rcu/nocb: Use kthread parking instead of ad-hoc implementation Upon NOCB deoffloading, the rcuo kthread must be forced to sleep until the corresponding rdp is ever offloaded again. The deoffloader clears the SEGCBLIST_OFFLOADED flag, wakes up the rcuo kthread which then notices that change and clears in turn its SEGCBLIST_KTHREAD_CB flag before going to sleep, until it ever sees the SEGCBLIST_OFFLOADED flag again, should a re-offloading happen. Upon NOCB offloading, the rcuo kthread must be forced to wake up and handle callbacks until the corresponding rdp is ever deoffloaded again. The offloader sets the SEGCBLIST_OFFLOADED flag, wakes up the rcuo kthread which then notices that change and sets in turn its SEGCBLIST_KTHREAD_CB flag before going to check callbacks, until it ever sees the SEGCBLIST_OFFLOADED flag cleared again, should a de-offloading happen again. This is all a crude ad-hoc and error-prone kthread (un-)parking re-implementation. Consolidate the behaviour with the appropriate API instead. [ paulmck: Apply Qiang Zhang feedback provided in Link: below. ] Link: https://lore.kernel.org/all/20240509074046.15629-1-qiang.zhang1211@gmail.com/ Signed-off-by: Frederic Weisbecker Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 81 +++++++++++++---------------- kernel/rcu/tree_nocb.h | 115 +++++++++++++----------------------------- kernel/rcu/tree_plugin.h | 4 +- 3 files changed, 72 insertions(+), 128 deletions(-) (limited to 'include') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 8018045989af..ba95c06675e1 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -84,31 +84,31 @@ struct rcu_cblist { * ---------------------------------------------------------------------------- * | * v - * ----------------------------------- - * | | - * v v - * --------------------------------------- ----------------------------------| - * | SEGCBLIST_RCU_CORE | | | SEGCBLIST_RCU_CORE | | - * | SEGCBLIST_LOCKING | | | SEGCBLIST_LOCKING | | - * | SEGCBLIST_OFFLOADED | | | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | SEGCBLIST_KTHREAD_GP | - * | | | | - * | | | | - * | CB kthread woke up and | | GP kthread woke up and | - * | acknowledged SEGCBLIST_OFFLOADED. | | acknowledged SEGCBLIST_OFFLOADED| - * | Processes callbacks concurrently | | | - * | with rcu_core(), holding | | | - * | nocb_lock. | | | - * --------------------------------------- ----------------------------------- - * | | - * ----------------------------------- + * ---------------------------------------------------------------------------- + * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED | + * | + unparked CB kthread | + * | | + * | CB kthread got unparked and processes callbacks concurrently with | + * | rcu_core(), holding nocb_lock. | + * --------------------------------------------------------------------------- + * | + * v + * ---------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | + * | | + * | GP kthread woke up and acknowledged nocb_lock. | + * ---------------------------------------- ----------------------------------- * | * v * |--------------------------------------------------------------------------| - * | SEGCBLIST_LOCKING | | - * | SEGCBLIST_OFFLOADED | | + * | SEGCBLIST_LOCKING | | + * | SEGCBLIST_OFFLOADED | | * | SEGCBLIST_KTHREAD_GP | | - * | SEGCBLIST_KTHREAD_CB | + * | + unparked CB kthread | * | | * | Kthreads handle callbacks holding nocb_lock, local rcu_core() stops | * | handling callbacks. Enable bypass queueing. | @@ -124,8 +124,8 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_LOCKING | | * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | * | ignores callbacks. Bypass enqueue is enabled. | @@ -136,8 +136,8 @@ struct rcu_cblist { * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | * | SEGCBLIST_OFFLOADED | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core() | * | handles callbacks concurrently. Bypass enqueue is disabled. | @@ -149,40 +149,31 @@ struct rcu_cblist { * |--------------------------------------------------------------------------| * | SEGCBLIST_RCU_CORE | | * | SEGCBLIST_LOCKING | | - * | SEGCBLIST_KTHREAD_CB | | * | SEGCBLIST_KTHREAD_GP | + * | + unparked CB kthread | * | | * | CB/GP kthreads and local rcu_core() handle callbacks concurrently | - * | holding nocb_lock. Wake up CB and GP kthreads if necessary. | + * | holding nocb_lock. Wake up GP kthread if necessary. | * ---------------------------------------------------------------------------- * | * v - * ----------------------------------- - * | | - * v v - * ---------------------------------------------------------------------------| - * | | | - * | SEGCBLIST_RCU_CORE | | SEGCBLIST_RCU_CORE | | - * | SEGCBLIST_LOCKING | | SEGCBLIST_LOCKING | | - * | SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP | - * | | | - * | GP kthread woke up and | CB kthread woke up and | - * | acknowledged the fact that | acknowledged the fact that | - * | SEGCBLIST_OFFLOADED got cleared. | SEGCBLIST_OFFLOADED got cleared. | - * | | The CB kthread goes to sleep | - * | The callbacks from the target CPU | until it ever gets re-offloaded. | - * | will be ignored from the GP kthread | | - * | loop. | | + * |--------------------------------------------------------------------------| + * | SEGCBLIST_RCU_CORE | | + * | SEGCBLIST_LOCKING | | + * | + unparked CB kthread | + * | | + * | GP kthread woke up and acknowledged the fact that SEGCBLIST_OFFLOADED | + * | got cleared. The callbacks from the target CPU will be ignored from the| + * | GP kthread loop. | * ---------------------------------------------------------------------------- - * | | - * ----------------------------------- * | * v * ---------------------------------------------------------------------------- * | SEGCBLIST_RCU_CORE | SEGCBLIST_LOCKING | + * | + parked CB kthread | * | | - * | Callbacks processed by rcu_core() from softirqs or local | - * | rcuc kthread, while holding nocb_lock. | + * | CB kthread is parked. Callbacks processed by rcu_core() from softirqs or | + * | local rcuc kthread, while holding nocb_lock. | * ---------------------------------------------------------------------------- * | * v diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 3f85577bddd4..808c9a19fe1d 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -635,8 +635,7 @@ static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, } } -static int nocb_gp_toggle_rdp(struct rcu_data *rdp, - bool *wake_state) +static int nocb_gp_toggle_rdp(struct rcu_data *rdp) { struct rcu_segcblist *cblist = &rdp->cblist; unsigned long flags; @@ -650,8 +649,6 @@ static int nocb_gp_toggle_rdp(struct rcu_data *rdp, * We will handle this rdp until it ever gets de-offloaded. */ rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_GP); - if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) - *wake_state = true; ret = 1; } else if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED) && rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) { @@ -660,8 +657,6 @@ static int nocb_gp_toggle_rdp(struct rcu_data *rdp, * We will ignore this rdp until it ever gets re-offloaded. */ rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_GP); - if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) - *wake_state = true; ret = 0; } else { WARN_ON_ONCE(1); @@ -877,16 +872,15 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) } if (rdp_toggling) { - bool wake_state = false; int ret; - ret = nocb_gp_toggle_rdp(rdp_toggling, &wake_state); + ret = nocb_gp_toggle_rdp(rdp_toggling); if (ret == 1) list_add_tail(&rdp_toggling->nocb_entry_rdp, &my_rdp->nocb_head_rdp); else if (ret == 0) list_del(&rdp_toggling->nocb_entry_rdp); - if (wake_state) - swake_up_one(&rdp_toggling->nocb_state_wq); + + swake_up_one(&rdp_toggling->nocb_state_wq); } my_rdp->nocb_gp_seq = -1; @@ -913,16 +907,9 @@ static int rcu_nocb_gp_kthread(void *arg) return 0; } -static inline bool nocb_cb_can_run(struct rcu_data *rdp) -{ - u8 flags = SEGCBLIST_OFFLOADED | SEGCBLIST_KTHREAD_CB; - - return rcu_segcblist_test_flags(&rdp->cblist, flags); -} - static inline bool nocb_cb_wait_cond(struct rcu_data *rdp) { - return nocb_cb_can_run(rdp) && !READ_ONCE(rdp->nocb_cb_sleep); + return !READ_ONCE(rdp->nocb_cb_sleep) || kthread_should_park(); } /* @@ -934,21 +921,19 @@ static void nocb_cb_wait(struct rcu_data *rdp) struct rcu_segcblist *cblist = &rdp->cblist; unsigned long cur_gp_seq; unsigned long flags; - bool needwake_state = false; bool needwake_gp = false; - bool can_sleep = true; struct rcu_node *rnp = rdp->mynode; - do { - swait_event_interruptible_exclusive(rdp->nocb_cb_wq, - nocb_cb_wait_cond(rdp)); - - if (READ_ONCE(rdp->nocb_cb_sleep)) { - WARN_ON(signal_pending(current)); - trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty")); - } - } while (!nocb_cb_can_run(rdp)); + swait_event_interruptible_exclusive(rdp->nocb_cb_wq, + nocb_cb_wait_cond(rdp)); + if (kthread_should_park()) { + kthread_parkme(); + } else if (READ_ONCE(rdp->nocb_cb_sleep)) { + WARN_ON(signal_pending(current)); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty")); + } + WARN_ON_ONCE(!rcu_rdp_is_offloaded(rdp)); local_irq_save(flags); rcu_momentary_dyntick_idle(); @@ -971,37 +956,16 @@ static void nocb_cb_wait(struct rcu_data *rdp) raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ } - if (rcu_segcblist_test_flags(cblist, SEGCBLIST_OFFLOADED)) { - if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)) { - rcu_segcblist_set_flags(cblist, SEGCBLIST_KTHREAD_CB); - if (rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) - needwake_state = true; - } - if (rcu_segcblist_ready_cbs(cblist)) - can_sleep = false; + if (!rcu_segcblist_ready_cbs(cblist)) { + WRITE_ONCE(rdp->nocb_cb_sleep, true); + trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep")); } else { - /* - * De-offloading. Clear our flag and notify the de-offload worker. - * We won't touch the callbacks and keep sleeping until we ever - * get re-offloaded. - */ - WARN_ON_ONCE(!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB)); - rcu_segcblist_clear_flags(cblist, SEGCBLIST_KTHREAD_CB); - if (!rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)) - needwake_state = true; + WRITE_ONCE(rdp->nocb_cb_sleep, false); } - WRITE_ONCE(rdp->nocb_cb_sleep, can_sleep); - - if (rdp->nocb_cb_sleep) - trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("CBSleep")); - rcu_nocb_unlock_irqrestore(rdp, flags); if (needwake_gp) rcu_gp_kthread_wake(); - - if (needwake_state) - swake_up_one(&rdp->nocb_state_wq); } /* @@ -1094,17 +1058,8 @@ static int rdp_offload_toggle(struct rcu_data *rdp, bool wake_gp = false; rcu_segcblist_offload(cblist, offload); - - if (rdp->nocb_cb_sleep) - rdp->nocb_cb_sleep = false; rcu_nocb_unlock_irqrestore(rdp, flags); - /* - * Ignore former value of nocb_cb_sleep and force wake up as it could - * have been spuriously set to false already. - */ - swake_up_one(&rdp->nocb_cb_wq); - raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); // Queue this rdp for add/del to/from the list to iterate on rcuog WRITE_ONCE(rdp_gp->nocb_toggling_rdp, rdp); @@ -1161,19 +1116,11 @@ static long rcu_nocb_rdp_deoffload(void *arg) if (wake_gp) wake_up_process(rdp_gp->nocb_gp_kthread); - /* - * If rcuo[p] kthread spawn failed, directly remove SEGCBLIST_KTHREAD_CB. - * Just wait SEGCBLIST_KTHREAD_GP to be cleared by rcuog. - */ - if (!rdp->nocb_cb_kthread) { - rcu_nocb_lock_irqsave(rdp, flags); - rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB); - rcu_nocb_unlock_irqrestore(rdp, flags); - } - swait_event_exclusive(rdp->nocb_state_wq, - !rcu_segcblist_test_flags(cblist, - SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP)); + !rcu_segcblist_test_flags(cblist, + SEGCBLIST_KTHREAD_GP)); + if (rdp->nocb_cb_kthread) + kthread_park(rdp->nocb_cb_kthread); } else { /* * No kthread to clear the flags for us or remove the rdp from the nocb list @@ -1181,8 +1128,7 @@ static long rcu_nocb_rdp_deoffload(void *arg) * but we stick to paranoia in this rare path. */ rcu_nocb_lock_irqsave(rdp, flags); - rcu_segcblist_clear_flags(&rdp->cblist, - SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP); + rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP); rcu_nocb_unlock_irqrestore(rdp, flags); list_del(&rdp->nocb_entry_rdp); @@ -1282,8 +1228,10 @@ static long rcu_nocb_rdp_offload(void *arg) wake_gp = rdp_offload_toggle(rdp, true, flags); if (wake_gp) wake_up_process(rdp_gp->nocb_gp_kthread); + + kthread_unpark(rdp->nocb_cb_kthread); + swait_event_exclusive(rdp->nocb_state_wq, - rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB) && rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_GP)); /* @@ -1468,7 +1416,7 @@ void __init rcu_init_nohz(void) if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); rcu_segcblist_offload(&rdp->cblist, true); - rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_CB | SEGCBLIST_KTHREAD_GP); + rcu_segcblist_set_flags(&rdp->cblist, SEGCBLIST_KTHREAD_GP); rcu_segcblist_clear_flags(&rdp->cblist, SEGCBLIST_RCU_CORE); } rcu_organize_nocb_kthreads(); @@ -1526,11 +1474,16 @@ static void rcu_spawn_cpu_nocb_kthread(int cpu) mutex_unlock(&rdp_gp->nocb_gp_kthread_mutex); /* Spawn the kthread for this CPU. */ - t = kthread_run(rcu_nocb_cb_kthread, rdp, - "rcuo%c/%d", rcu_state.abbr, cpu); + t = kthread_create(rcu_nocb_cb_kthread, rdp, + "rcuo%c/%d", rcu_state.abbr, cpu); if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo CB kthread, OOM is now expected behavior\n", __func__)) goto end; + if (rcu_rdp_is_offloaded(rdp)) + wake_up_process(t); + else + kthread_park(t); + if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_CB_BOOST) && kthread_prio) sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 340bbefe5f65..1facbb365dc2 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -28,8 +28,8 @@ static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) !(lockdep_is_held(&rcu_state.barrier_mutex) || (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || rcu_lockdep_is_held_nocb(rdp) || - (rdp == this_cpu_ptr(&rcu_data) && - !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) || + (!(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible()) && + rdp == this_cpu_ptr(&rcu_data)) || rcu_current_is_nocb_kthread(rdp)), "Unsafe read of RCU_NOCB offloaded state" ); -- cgit v1.2.3 From 9855c37edf0009cc276cecfee09f7e76e2380212 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 25 Apr 2024 16:24:04 +0200 Subject: Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()" This reverts commit 28319d6dc5e2ffefa452c2377dd0f71621b5bff0. The race it fixed was subject to conditions that don't exist anymore since: 1612160b9127 ("rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks") This latter commit removes the use of SRCU that used to cover the RCU-tasks blind spot on exit between the tasklist's removal and the final preemption disabling. The task is now placed instead into a temporary list inside which voluntary sleeps are accounted as RCU-tasks quiescent states. This would disarm the deadlock initially reported against PID namespace exit. Signed-off-by: Frederic Weisbecker Reviewed-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 -- kernel/pid_namespace.c | 17 ----------------- kernel/rcu/tasks.h | 16 +++------------- 3 files changed, 3 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index dfd2399f2cde..61cb3de236af 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void); #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false) void exit_tasks_rcu_start(void); -void exit_tasks_rcu_stop(void); void exit_tasks_rcu_finish(void); #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ #define rcu_tasks_classic_qs(t, preempt) do { } while (0) @@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void); #define call_rcu_tasks call_rcu #define synchronize_rcu_tasks synchronize_rcu static inline void exit_tasks_rcu_start(void) { } -static inline void exit_tasks_rcu_stop(void) { } static inline void exit_tasks_rcu_finish(void) { } #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index dc48fecfa1dc..b566ae827cfc 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -248,24 +248,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) set_current_state(TASK_INTERRUPTIBLE); if (pid_ns->pid_allocated == init_pids) break; - /* - * Release tasks_rcu_exit_srcu to avoid following deadlock: - * - * 1) TASK A unshare(CLONE_NEWPID) - * 2) TASK A fork() twice -> TASK B (child reaper for new ns) - * and TASK C - * 3) TASK B exits, kills TASK C, waits for TASK A to reap it - * 4) TASK A calls synchronize_rcu_tasks() - * -> synchronize_srcu(tasks_rcu_exit_srcu) - * 5) *DEADLOCK* - * - * It is considered safe to release tasks_rcu_exit_srcu here - * because we assume the current task can not be concurrently - * reaped at this point. - */ - exit_tasks_rcu_stop(); schedule(); - exit_tasks_rcu_start(); } __set_current_state(TASK_RUNNING); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index e1bf33018e6d..4dc56b6e27c0 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -858,7 +858,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) // not know to synchronize with this RCU Tasks grace period) have // completed exiting. The synchronize_rcu() in rcu_tasks_postgp() // will take care of any tasks stuck in the non-preemptible region -// of do_exit() following its call to exit_tasks_rcu_stop(). +// of do_exit() following its call to exit_tasks_rcu_finish(). // check_all_holdout_tasks(), repeatedly until holdout list is empty: // Scans the holdout list, attempting to identify a quiescent state // for each task on the list. If there is a quiescent state, the @@ -1220,7 +1220,7 @@ void exit_tasks_rcu_start(void) * Remove the task from the "yet another list" because do_exit() is now * non-preemptible, allowing synchronize_rcu() to wait beyond this point. */ -void exit_tasks_rcu_stop(void) +void exit_tasks_rcu_finish(void) { unsigned long flags; struct rcu_tasks_percpu *rtpcp; @@ -1231,22 +1231,12 @@ void exit_tasks_rcu_stop(void) raw_spin_lock_irqsave_rcu_node(rtpcp, flags); list_del_init(&t->rcu_tasks_exit_list); raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); -} -/* - * Contribute to protect against tasklist scan blind spot while the - * task is exiting and may be removed from the tasklist. See - * corresponding synchronize_srcu() for further details. - */ -void exit_tasks_rcu_finish(void) -{ - exit_tasks_rcu_stop(); - exit_tasks_rcu_finish_trace(current); + exit_tasks_rcu_finish_trace(t); } #else /* #ifdef CONFIG_TASKS_RCU */ void exit_tasks_rcu_start(void) { } -void exit_tasks_rcu_stop(void) { } void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); } #endif /* #else #ifdef CONFIG_TASKS_RCU */ -- cgit v1.2.3 From 4ce6e8a859f0503d97aac6869bc3b1a24b15601d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 28 May 2024 13:33:29 -0700 Subject: hwmon: Add PEC attribute support to hardware monitoring core Several hardware monitoring chips optionally support Packet Error Checking (PEC). For some chips, PEC support can be enabled simply by setting I2C_CLIENT_PEC in the i2c client data structure. Others require chip specific code to enable or disable PEC support. Introduce hwmon_chip_pec and HWMON_C_PEC to simplify adding configurable PEC support for hardware monitoring drivers. A driver can set HWMON_C_PEC in its chip information data to indicate PEC support. If a chip requires chip specific code to enable or disable PEC support, the driver only needs to implement support for the hwmon_chip_pec attribute to its write function. Packet Error Checking is only supported for SMBus devices. HWMON_C_PEC must therefore only be set by a driver if the parent device is an I2C device. Attempts to set HWMON_C_PEC on any other device type is not supported and rejected. The code calls i2c_check_functionality() to check if PEC is supported by the I2C/SMBus controller. This function is only available if CONFIG_I2C is enabled and reachable. For this reason, the added code needs to depend on reachability of CONFIG_I2C. Cc: Radu Sabau Acked-by: Nuno Sa Signed-off-by: Guenter Roeck --- drivers/hwmon/hwmon.c | 147 ++++++++++++++++++++++++++++++++++++++++++++------ include/linux/hwmon.h | 2 + 2 files changed, 133 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 3b259c425ab7..8aee253b5829 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -309,6 +310,114 @@ static int hwmon_attr_base(enum hwmon_sensor_types type) return 1; } +#if IS_REACHABLE(CONFIG_I2C) + +/* + * PEC support + * + * The 'pec' attribute is attached to I2C client devices. It is only provided + * if the i2c controller supports PEC. + * + * The mutex ensures that PEC configuration between i2c device and the hardware + * is consistent. Use a single mutex because attribute writes are supposed to be + * rare, and maintaining a separate mutex for each hardware monitoring device + * would add substantial complexity to the driver for little if any gain. + * + * The hardware monitoring device is identified as child of the i2c client + * device. This assumes that only a single hardware monitoring device is + * attached to an i2c client device. + */ + +static DEFINE_MUTEX(hwmon_pec_mutex); + +static int hwmon_match_device(struct device *dev, void *data) +{ + return dev->class == &hwmon_class; +} + +static ssize_t pec_show(struct device *dev, struct device_attribute *dummy, + char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + + return sysfs_emit(buf, "%d\n", !!(client->flags & I2C_CLIENT_PEC)); +} + +static ssize_t pec_store(struct device *dev, struct device_attribute *devattr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct hwmon_device *hwdev; + struct device *hdev; + bool val; + int err; + + err = kstrtobool(buf, &val); + if (err < 0) + return err; + + hdev = device_find_child(dev, NULL, hwmon_match_device); + if (!hdev) + return -ENODEV; + + mutex_lock(&hwmon_pec_mutex); + + /* + * If there is no write function, we assume that chip specific + * handling is not required. + */ + hwdev = to_hwmon_device(hdev); + if (hwdev->chip->ops->write) { + err = hwdev->chip->ops->write(hdev, hwmon_chip, hwmon_chip_pec, 0, val); + if (err && err != -EOPNOTSUPP) + goto unlock; + } + + if (!val) + client->flags &= ~I2C_CLIENT_PEC; + else + client->flags |= I2C_CLIENT_PEC; + + err = count; +unlock: + mutex_unlock(&hwmon_pec_mutex); + put_device(hdev); + + return err; +} + +static DEVICE_ATTR_RW(pec); + +static void hwmon_remove_pec(void *dev) +{ + device_remove_file(dev, &dev_attr_pec); +} + +static int hwmon_pec_register(struct device *hdev) +{ + struct i2c_client *client = i2c_verify_client(hdev->parent); + int err; + + if (!client) + return -EINVAL; + + if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_PEC)) + return 0; + + err = device_create_file(&client->dev, &dev_attr_pec); + if (err) + return err; + + return devm_add_action_or_reset(hdev, hwmon_remove_pec, &client->dev); +} + +#else /* CONFIG_I2C */ +static int hwmon_pec_register(struct device *hdev) +{ + return -EINVAL; +} +#endif /* CONFIG_I2C */ + /* sysfs attribute management */ static ssize_t hwmon_attr_show(struct device *dev, @@ -397,10 +506,6 @@ static struct attribute *hwmon_genattr(const void *drvdata, const char *name; bool is_string = is_string_attr(type, attr); - /* The attribute is invisible if there is no template string */ - if (!template) - return ERR_PTR(-ENOENT); - mode = ops->is_visible(drvdata, type, attr, index); if (!mode) return ERR_PTR(-ENOENT); @@ -712,8 +817,8 @@ static int hwmon_genattrs(const void *drvdata, attr = __ffs(attr_mask); attr_mask &= ~BIT(attr); - if (attr >= template_size) - return -EINVAL; + if (attr >= template_size || !templates[attr]) + continue; /* attribute is invisible */ a = hwmon_genattr(drvdata, info->type, attr, i, templates[attr], ops); if (IS_ERR(a)) { @@ -849,16 +954,26 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, INIT_LIST_HEAD(&hwdev->tzdata); if (hdev->of_node && chip && chip->ops->read && - chip->info[0]->type == hwmon_chip && - (chip->info[0]->config[0] & HWMON_C_REGISTER_TZ)) { - err = hwmon_thermal_register_sensors(hdev); - if (err) { - device_unregister(hdev); - /* - * Don't worry about hwdev; hwmon_dev_release(), called - * from device_unregister(), will free it. - */ - goto ida_remove; + chip->info[0]->type == hwmon_chip) { + u32 config = chip->info[0]->config[0]; + + if (config & HWMON_C_REGISTER_TZ) { + err = hwmon_thermal_register_sensors(hdev); + if (err) { + device_unregister(hdev); + /* + * Don't worry about hwdev; hwmon_dev_release(), + * called from device_unregister(), will free it. + */ + goto ida_remove; + } + } + if (config & HWMON_C_PEC) { + err = hwmon_pec_register(hdev); + if (err) { + device_unregister(hdev); + goto ida_remove; + } } } diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index edf96f249eb5..e94314760aab 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -45,6 +45,7 @@ enum hwmon_chip_attributes { hwmon_chip_power_samples, hwmon_chip_temp_samples, hwmon_chip_beep_enable, + hwmon_chip_pec, }; #define HWMON_C_TEMP_RESET_HISTORY BIT(hwmon_chip_temp_reset_history) @@ -60,6 +61,7 @@ enum hwmon_chip_attributes { #define HWMON_C_POWER_SAMPLES BIT(hwmon_chip_power_samples) #define HWMON_C_TEMP_SAMPLES BIT(hwmon_chip_temp_samples) #define HWMON_C_BEEP_ENABLE BIT(hwmon_chip_beep_enable) +#define HWMON_C_PEC BIT(hwmon_chip_pec) enum hwmon_temp_attributes { hwmon_temp_enable, -- cgit v1.2.3 From d0edc54455398248154062664f7d1b35e6ec6e01 Mon Sep 17 00:00:00 2001 From: Bingbu Cao Date: Fri, 10 May 2024 14:27:47 +0800 Subject: media: ipu-bridge: add mod_devicetable.h header inclusion ACPI_ID_LEN is defined in mod_devicetable.h, so the header should be guaranteed to included in ipu-bridge.h instead of the source files which include ipu-bridge.h. Signed-off-by: Bingbu Cao Reviewed-by: Andy Shevchenko Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/ipu-bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/media/ipu-bridge.h b/include/media/ipu-bridge.h index 783bda6d5cc3..16fac765456e 100644 --- a/include/media/ipu-bridge.h +++ b/include/media/ipu-bridge.h @@ -3,6 +3,7 @@ #ifndef __IPU_BRIDGE_H #define __IPU_BRIDGE_H +#include #include #include #include -- cgit v1.2.3 From 1d7804281df3f09f0a109d00406e859a00bae7ae Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:07 +0300 Subject: media: subdev: Fix use of sd->enabled_streams in call_s_stream() call_s_stream() uses sd->enabled_streams to track whether streaming has already been enabled. However, v4l2_subdev_enable/disable_streams_fallback(), which was the original user of this field, already uses it, and v4l2_subdev_enable/disable_streams_fallback() will call call_s_stream(). This leads to a conflict as both functions set the field. Afaics, both functions set the field to the same value, so it won't cause a runtime bug, but it's still wrong and if we, e.g., change how v4l2_subdev_enable/disable_streams_fallback() operates we might easily cause bugs. Fix this by adding a new field, 's_stream_enabled', for call_s_stream(). Reviewed-by: Umang Jain Reviewed-by: Laurent Pinchart Tested-by: Umang Jain Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 8 ++------ include/media/v4l2-subdev.h | 3 +++ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 09b4122d9ab7..8675ad0688af 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -451,12 +451,8 @@ static int call_s_stream(struct v4l2_subdev *sd, int enable) * The .s_stream() operation must never be called to start or stop an * already started or stopped subdev. Catch offenders but don't return * an error yet to avoid regressions. - * - * As .s_stream() is mutually exclusive with the .enable_streams() and - * .disable_streams() operation, we can use the enabled_streams field - * to store the subdev streaming state. */ - if (WARN_ON(!!sd->enabled_streams == !!enable)) + if (WARN_ON(sd->s_stream_enabled == !!enable)) return 0; ret = sd->ops->video->s_stream(sd, enable); @@ -467,7 +463,7 @@ static int call_s_stream(struct v4l2_subdev *sd, int enable) } if (!ret) { - sd->enabled_streams = enable ? BIT(0) : 0; + sd->s_stream_enabled = enable; if (enable) v4l2_subdev_enable_privacy_led(sd); diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index e30c463d90e5..d3f15882927b 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1045,6 +1045,8 @@ struct v4l2_subdev_platform_data { * v4l2_subdev_enable_streams() and * v4l2_subdev_disable_streams() helper functions for fallback * cases. + * @s_stream_enabled: Tracks whether streaming has been enabled with s_stream. + * This is only for call_s_stream() internal use. * * Each instance of a subdev driver should create this struct, either * stand-alone or embedded in a larger struct. @@ -1093,6 +1095,7 @@ struct v4l2_subdev { */ struct v4l2_subdev_state *active_state; u64 enabled_streams; + bool s_stream_enabled; }; -- cgit v1.2.3 From 61d6c8c896c1ccde350c281817847a32b0c6b83b Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:08 +0300 Subject: media: subdev: Improve v4l2_subdev_enable/disable_streams_fallback v4l2_subdev_enable/disable_streams_fallback() supports falling back to .s_stream() for subdevs with a single source pad. It also tracks the enabled streams for that one pad in the sd->enabled_streams field. Tracking the enabled streams with sd->enabled_streams does not make sense, as with .s_stream() there can only be a single stream per pad. Thus, as the v4l2_subdev_enable/disable_streams_fallback() only supports a single source pad, all we really need is a boolean which tells whether streaming has been enabled on this pad or not. However, as we only need a true/false state for a pad (instead of tracking which streams have been enabled for a pad), we can easily extend the fallback mechanism to support multiple source pads as we only need to keep track of which pads have been enabled. Change the sd->enabled_streams field to sd->enabled_pads, which is a 64-bit bitmask tracking the enabled source pads. With this change we can remove the restriction that v4l2_subdev_enable/disable_streams_fallback() only supports a single source pad. Reviewed-by: Laurent Pinchart Tested-by: Umang Jain Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 68 ++++++++++++++++++++--------------- include/media/v4l2-subdev.h | 9 +++-- 2 files changed, 44 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 8675ad0688af..0231acf7168e 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -2159,37 +2159,43 @@ static int v4l2_subdev_enable_streams_fallback(struct v4l2_subdev *sd, u32 pad, u64 streams_mask) { struct device *dev = sd->entity.graph_obj.mdev->dev; - unsigned int i; int ret; /* * The subdev doesn't implement pad-based stream enable, fall back - * on the .s_stream() operation. This can only be done for subdevs that - * have a single source pad, as sd->enabled_streams is global to the - * subdev. + * to the .s_stream() operation. */ if (!(sd->entity.pads[pad].flags & MEDIA_PAD_FL_SOURCE)) return -EOPNOTSUPP; - for (i = 0; i < sd->entity.num_pads; ++i) { - if (i != pad && sd->entity.pads[i].flags & MEDIA_PAD_FL_SOURCE) - return -EOPNOTSUPP; - } + /* + * .s_stream() means there is no streams support, so the only allowed + * stream is the implicit stream 0. + */ + if (streams_mask != BIT_ULL(0)) + return -EOPNOTSUPP; + + /* + * We use a 64-bit bitmask for tracking enabled pads, so only subdevices + * with 64 pads or less can be supported. + */ + if (pad >= sizeof(sd->enabled_pads) * BITS_PER_BYTE) + return -EOPNOTSUPP; - if (sd->enabled_streams & streams_mask) { - dev_dbg(dev, "set of streams %#llx already enabled on %s:%u\n", - streams_mask, sd->entity.name, pad); + if (sd->enabled_pads & BIT_ULL(pad)) { + dev_dbg(dev, "pad %u already enabled on %s\n", + pad, sd->entity.name); return -EALREADY; } - /* Start streaming when the first streams are enabled. */ - if (!sd->enabled_streams) { + /* Start streaming when the first pad is enabled. */ + if (!sd->enabled_pads) { ret = v4l2_subdev_call(sd, video, s_stream, 1); if (ret) return ret; } - sd->enabled_streams |= streams_mask; + sd->enabled_pads |= BIT_ULL(pad); return 0; } @@ -2276,37 +2282,43 @@ static int v4l2_subdev_disable_streams_fallback(struct v4l2_subdev *sd, u32 pad, u64 streams_mask) { struct device *dev = sd->entity.graph_obj.mdev->dev; - unsigned int i; int ret; /* - * If the subdev doesn't implement pad-based stream enable, fall back - * on the .s_stream() operation. This can only be done for subdevs that - * have a single source pad, as sd->enabled_streams is global to the - * subdev. + * If the subdev doesn't implement pad-based stream enable, fall back + * to the .s_stream() operation. */ if (!(sd->entity.pads[pad].flags & MEDIA_PAD_FL_SOURCE)) return -EOPNOTSUPP; - for (i = 0; i < sd->entity.num_pads; ++i) { - if (i != pad && sd->entity.pads[i].flags & MEDIA_PAD_FL_SOURCE) - return -EOPNOTSUPP; - } + /* + * .s_stream() means there is no streams support, so the only allowed + * stream is the implicit stream 0. + */ + if (streams_mask != BIT_ULL(0)) + return -EOPNOTSUPP; + + /* + * We use a 64-bit bitmask for tracking enabled pads, so only subdevices + * with 64 pads or less can be supported. + */ + if (pad >= sizeof(sd->enabled_pads) * BITS_PER_BYTE) + return -EOPNOTSUPP; - if ((sd->enabled_streams & streams_mask) != streams_mask) { - dev_dbg(dev, "set of streams %#llx already disabled on %s:%u\n", - streams_mask, sd->entity.name, pad); + if (!(sd->enabled_pads & BIT_ULL(pad))) { + dev_dbg(dev, "pad %u already disabled on %s\n", + pad, sd->entity.name); return -EALREADY; } /* Stop streaming when the last streams are disabled. */ - if (!(sd->enabled_streams & ~streams_mask)) { + if (!(sd->enabled_pads & ~BIT_ULL(pad))) { ret = v4l2_subdev_call(sd, video, s_stream, 0); if (ret) return ret; } - sd->enabled_streams &= ~streams_mask; + sd->enabled_pads &= ~BIT_ULL(pad); return 0; } diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index d3f15882927b..f191cf00c840 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1041,10 +1041,9 @@ struct v4l2_subdev_platform_data { * @active_state: Active state for the subdev (NULL for subdevs tracking the * state internally). Initialized by calling * v4l2_subdev_init_finalize(). - * @enabled_streams: Bitmask of enabled streams used by - * v4l2_subdev_enable_streams() and - * v4l2_subdev_disable_streams() helper functions for fallback - * cases. + * @enabled_pads: Bitmask of enabled pads used by v4l2_subdev_enable_streams() + * and v4l2_subdev_disable_streams() helper functions for + * fallback cases. * @s_stream_enabled: Tracks whether streaming has been enabled with s_stream. * This is only for call_s_stream() internal use. * @@ -1094,7 +1093,7 @@ struct v4l2_subdev { * doesn't support it. */ struct v4l2_subdev_state *active_state; - u64 enabled_streams; + u64 enabled_pads; bool s_stream_enabled; }; -- cgit v1.2.3 From 5f3ce14fae742d1d23061c3122d93edb879ebf53 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:09 +0300 Subject: media: subdev: Add v4l2_subdev_is_streaming() Add a helper function which returns whether the subdevice is streaming, i.e. if .s_stream or .enable_streams has been called successfully. Reviewed-by: Umang Jain Reviewed-by: Laurent Pinchart Tested-by: Umang Jain Signed-off-by: Tomi Valkeinen Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- drivers/media/v4l2-core/v4l2-subdev.c | 25 +++++++++++++++++++++++++ include/media/v4l2-subdev.h | 13 +++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/drivers/media/v4l2-core/v4l2-subdev.c b/drivers/media/v4l2-core/v4l2-subdev.c index 0231acf7168e..f256d65cf90f 100644 --- a/drivers/media/v4l2-core/v4l2-subdev.c +++ b/drivers/media/v4l2-core/v4l2-subdev.c @@ -2474,6 +2474,31 @@ void v4l2_subdev_notify_event(struct v4l2_subdev *sd, } EXPORT_SYMBOL_GPL(v4l2_subdev_notify_event); +bool v4l2_subdev_is_streaming(struct v4l2_subdev *sd) +{ + struct v4l2_subdev_state *state; + + if (!v4l2_subdev_has_op(sd, pad, enable_streams)) + return sd->s_stream_enabled; + + if (!(sd->flags & V4L2_SUBDEV_FL_STREAMS)) + return !!sd->enabled_pads; + + state = v4l2_subdev_get_locked_active_state(sd); + + for (unsigned int i = 0; i < state->stream_configs.num_configs; ++i) { + const struct v4l2_subdev_stream_config *cfg; + + cfg = &state->stream_configs.configs[i]; + + if (cfg->enabled) + return true; + } + + return false; +} +EXPORT_SYMBOL_GPL(v4l2_subdev_is_streaming); + int v4l2_subdev_get_privacy_led(struct v4l2_subdev *sd) { #if IS_REACHABLE(CONFIG_LEDS_CLASS) diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index f191cf00c840..23c970d09870 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1956,4 +1956,17 @@ extern const struct v4l2_subdev_ops v4l2_subdev_call_wrappers; void v4l2_subdev_notify_event(struct v4l2_subdev *sd, const struct v4l2_event *ev); +/** + * v4l2_subdev_is_streaming() - Returns if the subdevice is streaming + * @sd: The subdevice + * + * v4l2_subdev_is_streaming() tells if the subdevice is currently streaming. + * "Streaming" here means whether .s_stream() or .enable_streams() has been + * successfully called, and the streaming has not yet been disabled. + * + * If the subdevice implements .enable_streams() this function must be called + * while holding the active state lock. + */ +bool v4l2_subdev_is_streaming(struct v4l2_subdev *sd); + #endif /* _V4L2_SUBDEV_H */ -- cgit v1.2.3 From f8e9662e4da6d4e394a3d9cf0d335863650d712d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 24 Apr 2024 18:39:14 +0300 Subject: media: subdev: Improve s_stream documentation Now that enable/disable_streams operations are available for single-stream subdevices too, there's no reason to use the old s_stream operation on new drivers. Extend the documentation reflecting this. Signed-off-by: Tomi Valkeinen Reviewed-by: Umang Jain Reviewed-by: Laurent Pinchart Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index 23c970d09870..df66365576dd 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -450,6 +450,15 @@ enum v4l2_subdev_pre_streamon_flags { * already started or stopped subdev. Also see call_s_stream wrapper in * v4l2-subdev.c. * + * New drivers should instead implement &v4l2_subdev_pad_ops.enable_streams + * and &v4l2_subdev_pad_ops.disable_streams operations, and use + * v4l2_subdev_s_stream_helper for the &v4l2_subdev_video_ops.s_stream + * operation to support legacy users. + * + * Drivers should also not call the .s_stream() subdev operation directly, + * but use the v4l2_subdev_enable_streams() and + * v4l2_subdev_disable_streams() helpers. + * * @g_pixelaspect: callback to return the pixelaspect ratio. * * @s_rx_buffer: set a host allocated memory buffer for the subdev. The subdev -- cgit v1.2.3 From e73412fdeb541e777b3fd50b665781c1856422b5 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 9 May 2024 00:40:43 +0300 Subject: media: v4l2-subdev: Fix v4l2_subdev_state_get_format() documentation The documentation of the v4l2_subdev_state_get_format() macro incorrectly references __v4l2_subdev_state_get_format() instead of __v4l2_subdev_state_gen_call(). Fix it, and also update the list of similar macros to add the missing v4l2_subdev_state_get_interval(). Suggested-by: Sakari Ailus Signed-off-by: Laurent Pinchart Reviewed-by: Sakari Ailus Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index df66365576dd..ed339f0116bf 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1351,12 +1351,12 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd); */ /* * Wrap v4l2_subdev_state_get_format(), allowing the function to be called with - * two or three arguments. The purpose of the __v4l2_subdev_state_get_format() + * two or three arguments. The purpose of the __v4l2_subdev_state_gen_call() * macro below is to come up with the name of the function or macro to call, * using the last two arguments (_stream and _pad). The selected function or * macro is then called using the arguments specified by the caller. A similar - * arrangement is used for v4l2_subdev_state_crop() and - * v4l2_subdev_state_compose() below. + * arrangement is used for v4l2_subdev_state_crop(), v4l2_subdev_state_compose() + * and v4l2_subdev_state_get_interval() below. */ #define v4l2_subdev_state_get_format(state, pad, ...) \ __v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad) \ -- cgit v1.2.3 From 85af84852f115d3c9d4b45b0500315e630baa82c Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Thu, 9 May 2024 00:40:43 +0300 Subject: media: v4l2-subdev: Provide const-aware subdev state accessors It would be useful to mark instances of v4l2_subdev_state structures as const when code needs to access them read-only. This isn't currently possible, as the v4l2_subdev_state_get_*() accessor functions take a non-const pointer to the state. Use _Generic() to provide two different versions of the accessors, for const and non-const states respectively. The former returns a const pointer to the requested format, rectangle or interval, implementing const-correctness. The latter returns a non-const pointer, preserving the current behaviour for drivers. Signed-off-by: Laurent Pinchart Reviewed-by: Sakari Ailus Reviewed-by: Tomi Valkeinen [Sakari Ailus: Drop the word "below" from the text.] Signed-off-by: Sakari Ailus Signed-off-by: Hans Verkuil --- include/media/v4l2-subdev.h | 54 ++++++++++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/media/v4l2-subdev.h b/include/media/v4l2-subdev.h index ed339f0116bf..1b74e037e440 100644 --- a/include/media/v4l2-subdev.h +++ b/include/media/v4l2-subdev.h @@ -1337,6 +1337,16 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd); #define __v4l2_subdev_state_gen_call(NAME, _1, ARG, ...) \ __v4l2_subdev_state_get_ ## NAME ## ARG +/* + * A macro to constify the return value of the state accessors when the state + * parameter is const. + */ +#define __v4l2_subdev_state_constify_ret(state, value) \ + _Generic(state, \ + const struct v4l2_subdev_state *: (const typeof(*(value)) *)(value), \ + struct v4l2_subdev_state *: (value) \ + ) + /** * v4l2_subdev_state_get_format() - Get pointer to a stream format * @state: subdevice state @@ -1352,15 +1362,20 @@ void v4l2_subdev_cleanup(struct v4l2_subdev *sd); /* * Wrap v4l2_subdev_state_get_format(), allowing the function to be called with * two or three arguments. The purpose of the __v4l2_subdev_state_gen_call() - * macro below is to come up with the name of the function or macro to call, - * using the last two arguments (_stream and _pad). The selected function or - * macro is then called using the arguments specified by the caller. A similar - * arrangement is used for v4l2_subdev_state_crop(), v4l2_subdev_state_compose() - * and v4l2_subdev_state_get_interval() below. - */ -#define v4l2_subdev_state_get_format(state, pad, ...) \ - __v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) + * macro is to come up with the name of the function or macro to call, using + * the last two arguments (_stream and _pad). The selected function or macro is + * then called using the arguments specified by the caller. The + * __v4l2_subdev_state_constify_ret() macro constifies the returned pointer + * when the state is const, allowing the state accessors to guarantee + * const-correctness in all cases. + * + * A similar arrangement is used for v4l2_subdev_state_crop(), + * v4l2_subdev_state_compose() and v4l2_subdev_state_get_interval() below. + */ +#define v4l2_subdev_state_get_format(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(format, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_format_pad(state, pad) \ __v4l2_subdev_state_get_format(state, pad, 0) struct v4l2_mbus_framefmt * @@ -1379,9 +1394,10 @@ __v4l2_subdev_state_get_format(struct v4l2_subdev_state *state, * For stream-unaware drivers the crop rectangle for the corresponding pad is * returned. If the pad does not exist, NULL is returned. */ -#define v4l2_subdev_state_get_crop(state, pad, ...) \ - __v4l2_subdev_state_gen_call(crop, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) +#define v4l2_subdev_state_get_crop(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(crop, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_crop_pad(state, pad) \ __v4l2_subdev_state_get_crop(state, pad, 0) struct v4l2_rect * @@ -1400,9 +1416,10 @@ __v4l2_subdev_state_get_crop(struct v4l2_subdev_state *state, unsigned int pad, * For stream-unaware drivers the compose rectangle for the corresponding pad is * returned. If the pad does not exist, NULL is returned. */ -#define v4l2_subdev_state_get_compose(state, pad, ...) \ - __v4l2_subdev_state_gen_call(compose, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) +#define v4l2_subdev_state_get_compose(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(compose, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_compose_pad(state, pad) \ __v4l2_subdev_state_get_compose(state, pad, 0) struct v4l2_rect * @@ -1421,9 +1438,10 @@ __v4l2_subdev_state_get_compose(struct v4l2_subdev_state *state, * For stream-unaware drivers the frame interval for the corresponding pad is * returned. If the pad does not exist, NULL is returned. */ -#define v4l2_subdev_state_get_interval(state, pad, ...) \ - __v4l2_subdev_state_gen_call(interval, ##__VA_ARGS__, , _pad) \ - (state, pad, ##__VA_ARGS__) +#define v4l2_subdev_state_get_interval(state, pad, ...) \ + __v4l2_subdev_state_constify_ret(state, \ + __v4l2_subdev_state_gen_call(interval, ##__VA_ARGS__, , _pad) \ + ((struct v4l2_subdev_state *)state, pad, ##__VA_ARGS__)) #define __v4l2_subdev_state_get_interval_pad(state, pad) \ __v4l2_subdev_state_get_interval(state, pad, 0) struct v4l2_fract * -- cgit v1.2.3 From 1d8491d3e726984343dd8c3cdbe2f2b47cfdd928 Mon Sep 17 00:00:00 2001 From: Paolo Pisati Date: Sat, 1 Jun 2024 17:32:54 +0200 Subject: m68k: amiga: Turn off Warp1260 interrupts during boot On an Amiga 1200 equipped with a Warp1260 accelerator, an interrupt storm coming from the accelerator board causes the machine to crash in local_irq_enable() or auto_irq_enable(). Disabling interrupts for the Warp1260 in amiga_parse_bootinfo() fixes the problem. Link: https://lore.kernel.org/r/ZkjwzVwYeQtyAPrL@amaterasu.local Cc: stable Signed-off-by: Paolo Pisati Reviewed-by: Michael Schmitz Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20240601153254.186225-1-p.pisati@gmail.com Signed-off-by: Geert Uytterhoeven --- arch/m68k/amiga/config.c | 9 +++++++++ include/uapi/linux/zorro_ids.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'include') diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index d4b170c861bf..0147130dc34e 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -180,6 +180,15 @@ int __init amiga_parse_bootinfo(const struct bi_record *record) dev->slotsize = be16_to_cpu(cd->cd_SlotSize); dev->boardaddr = be32_to_cpu(cd->cd_BoardAddr); dev->boardsize = be32_to_cpu(cd->cd_BoardSize); + + /* CS-LAB Warp 1260 workaround */ + if (be16_to_cpu(dev->rom.er_Manufacturer) == ZORRO_MANUF(ZORRO_PROD_CSLAB_WARP_1260) && + dev->rom.er_Product == ZORRO_PROD(ZORRO_PROD_CSLAB_WARP_1260)) { + + /* turn off all interrupts */ + pr_info("Warp 1260 card detected: applying interrupt storm workaround\n"); + *(uint32_t *)(dev->boardaddr + 0x1000) = 0xfff; + } } else pr_warn("amiga_parse_bootinfo: too many AutoConfig devices\n"); #endif /* CONFIG_ZORRO */ diff --git a/include/uapi/linux/zorro_ids.h b/include/uapi/linux/zorro_ids.h index 6e574d7b7d79..393f2ee9c042 100644 --- a/include/uapi/linux/zorro_ids.h +++ b/include/uapi/linux/zorro_ids.h @@ -449,6 +449,9 @@ #define ZORRO_PROD_VMC_ISDN_BLASTER_Z2 ZORRO_ID(VMC, 0x01, 0) #define ZORRO_PROD_VMC_HYPERCOM_4 ZORRO_ID(VMC, 0x02, 0) +#define ZORRO_MANUF_CSLAB 0x1400 +#define ZORRO_PROD_CSLAB_WARP_1260 ZORRO_ID(CSLAB, 0x65, 0) + #define ZORRO_MANUF_INFORMATION 0x157C #define ZORRO_PROD_INFORMATION_ISDN_ENGINE_I ZORRO_ID(INFORMATION, 0x64, 0) -- cgit v1.2.3 From 0e6be855a96dd44effce97b6b8d0cf0d0d0b7303 Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 May 2024 16:27:23 +0800 Subject: dt-bindings: clock: add Amlogic C3 PLL clock controller Add the PLL clock controller dt-bindings for Amlogic C3 SoC family. Reviewed-by: Krzysztof Kozlowski Co-developed-by: Chuan Liu Signed-off-by: Chuan Liu Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240522082727.3029656-2-xianwei.zhao@amlogic.com Signed-off-by: Jerome Brunet --- .../bindings/clock/amlogic,c3-pll-clkc.yaml | 59 ++++++++++++++++++++++ include/dt-bindings/clock/amlogic,c3-pll-clkc.h | 40 +++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml create mode 100644 include/dt-bindings/clock/amlogic,c3-pll-clkc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml new file mode 100644 index 000000000000..43de3c6fc1cf --- /dev/null +++ b/Documentation/devicetree/bindings/clock/amlogic,c3-pll-clkc.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/amlogic,c3-pll-clkc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic C3 series PLL Clock Controller + +maintainers: + - Neil Armstrong + - Jerome Brunet + - Chuan Liu + - Xianwei Zhao + +properties: + compatible: + const: amlogic,c3-pll-clkc + + reg: + maxItems: 1 + + clocks: + items: + - description: input top pll + - description: input mclk pll + + clock-names: + items: + - const: top + - const: mclk + + "#clock-cells": + const: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - "#clock-cells" + +additionalProperties: false + +examples: + - | + apb { + #address-cells = <2>; + #size-cells = <2>; + + clock-controller@8000 { + compatible = "amlogic,c3-pll-clkc"; + reg = <0x0 0x8000 0x0 0x1a4>; + clocks = <&scmi_clk 2>, + <&scmi_clk 5>; + clock-names = "top", "mclk"; + #clock-cells = <1>; + }; + }; diff --git a/include/dt-bindings/clock/amlogic,c3-pll-clkc.h b/include/dt-bindings/clock/amlogic,c3-pll-clkc.h new file mode 100644 index 000000000000..fcdc558715e8 --- /dev/null +++ b/include/dt-bindings/clock/amlogic,c3-pll-clkc.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (c) 2023 Amlogic, Inc. All rights reserved. + * Author: Chuan Liu + */ + +#ifndef _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H +#define _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H + +#define CLKID_FCLK_50M_EN 0 +#define CLKID_FCLK_50M 1 +#define CLKID_FCLK_DIV2_DIV 2 +#define CLKID_FCLK_DIV2 3 +#define CLKID_FCLK_DIV2P5_DIV 4 +#define CLKID_FCLK_DIV2P5 5 +#define CLKID_FCLK_DIV3_DIV 6 +#define CLKID_FCLK_DIV3 7 +#define CLKID_FCLK_DIV4_DIV 8 +#define CLKID_FCLK_DIV4 9 +#define CLKID_FCLK_DIV5_DIV 10 +#define CLKID_FCLK_DIV5 11 +#define CLKID_FCLK_DIV7_DIV 12 +#define CLKID_FCLK_DIV7 13 +#define CLKID_GP0_PLL_DCO 14 +#define CLKID_GP0_PLL 15 +#define CLKID_HIFI_PLL_DCO 16 +#define CLKID_HIFI_PLL 17 +#define CLKID_MCLK_PLL_DCO 18 +#define CLKID_MCLK_PLL_OD 19 +#define CLKID_MCLK_PLL 20 +#define CLKID_MCLK0_SEL 21 +#define CLKID_MCLK0_SEL_EN 22 +#define CLKID_MCLK0_DIV 23 +#define CLKID_MCLK0 24 +#define CLKID_MCLK1_SEL 25 +#define CLKID_MCLK1_SEL_EN 26 +#define CLKID_MCLK1_DIV 27 +#define CLKID_MCLK1 28 + +#endif /* _DT_BINDINGS_CLOCK_AMLOGIC_C3_PLL_CLKC_H */ -- cgit v1.2.3 From d309989a0a0aff3b8ad5259aa11f119b15336c1a Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 May 2024 16:27:24 +0800 Subject: dt-bindings: clock: add Amlogic C3 SCMI clock controller support Add the SCMI clock controller dt-bindings for Amlogic C3 SoC family Acked-by: Rob Herring (Arm) Co-developed-by: Chuan Liu Signed-off-by: Chuan Liu Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240522082727.3029656-3-xianwei.zhao@amlogic.com Signed-off-by: Jerome Brunet --- include/dt-bindings/clock/amlogic,c3-scmi-clkc.h | 27 ++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 include/dt-bindings/clock/amlogic,c3-scmi-clkc.h (limited to 'include') diff --git a/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h b/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h new file mode 100644 index 000000000000..663c9b349275 --- /dev/null +++ b/include/dt-bindings/clock/amlogic,c3-scmi-clkc.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (c) 2023 Amlogic, Inc. All rights reserved. + * Author: Chuan Liu + */ + +#ifndef __AMLOGIC_C3_SCMI_CLKC_H +#define __AMLOGIC_C3_SCMI_CLKC_H + +#define CLKID_DDR_PLL_OSC 0 +#define CLKID_DDR_PHY 1 +#define CLKID_TOP_PLL_OSC 2 +#define CLKID_USB_PLL_OSC 3 +#define CLKID_MIPIISP_VOUT 4 +#define CLKID_MCLK_PLL_OSC 5 +#define CLKID_USB_CTRL 6 +#define CLKID_ETH_PLL_OSC 7 +#define CLKID_OSC 8 +#define CLKID_SYS_CLK 9 +#define CLKID_AXI_CLK 10 +#define CLKID_CPU_CLK 11 +#define CLKID_FIXED_PLL_OSC 12 +#define CLKID_GP1_PLL_OSC 13 +#define CLKID_SYS_PLL_DIV16 14 +#define CLKID_CPU_CLK_DIV16 15 + +#endif /* __AMLOGIC_C3_SCMI_CLKC_H */ -- cgit v1.2.3 From fc1c7f941c71460a730a449f76764d883e270cba Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 22 May 2024 16:27:25 +0800 Subject: dt-bindings: clock: add Amlogic C3 peripherals clock controller Add the peripherals clock controller dt-bindings for Amlogic C3 SoC family Reviewed-by: Rob Herring (Arm) Co-developed-by: Chuan Liu Signed-off-by: Chuan Liu Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240522082727.3029656-4-xianwei.zhao@amlogic.com Signed-off-by: Jerome Brunet --- .../clock/amlogic,c3-peripherals-clkc.yaml | 120 ++++++++++++ .../clock/amlogic,c3-peripherals-clkc.h | 212 +++++++++++++++++++++ 2 files changed, 332 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml create mode 100644 include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml new file mode 100644 index 000000000000..98e30b8c0529 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/amlogic,c3-peripherals-clkc.yaml @@ -0,0 +1,120 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +# Copyright (C) 2022-2023 Amlogic, Inc. All rights reserved +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/amlogic,c3-peripherals-clkc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Amlogic C3 series Peripheral Clock Controller + +maintainers: + - Neil Armstrong + - Jerome Brunet + - Xianwei Zhao + - Chuan Liu + +properties: + compatible: + const: amlogic,c3-peripherals-clkc + + reg: + maxItems: 1 + + clocks: + minItems: 16 + items: + - description: input oscillator (usually at 24MHz) + - description: input oscillators multiplexer + - description: input fix pll + - description: input fclk div 2 + - description: input fclk div 2p5 + - description: input fclk div 3 + - description: input fclk div 4 + - description: input fclk div 5 + - description: input fclk div 7 + - description: input gp0 pll + - description: input gp1 pll + - description: input hifi pll + - description: input sys clk + - description: input axi clk + - description: input sys pll div 16 + - description: input cpu clk div 16 + - description: input pad clock for rtc clk (optional) + + clock-names: + minItems: 16 + items: + - const: xtal_24m + - const: oscin + - const: fix + - const: fdiv2 + - const: fdiv2p5 + - const: fdiv3 + - const: fdiv4 + - const: fdiv5 + - const: fdiv7 + - const: gp0 + - const: gp1 + - const: hifi + - const: sysclk + - const: axiclk + - const: sysplldiv16 + - const: cpudiv16 + - const: pad_osc + + "#clock-cells": + const: 1 + +required: + - compatible + - reg + - clocks + - clock-names + - "#clock-cells" + +additionalProperties: false + +examples: + - | + apb { + #address-cells = <2>; + #size-cells = <2>; + + clock-controller@0 { + compatible = "amlogic,c3-peripherals-clkc"; + reg = <0x0 0x0 0x0 0x49c>; + #clock-cells = <1>; + clocks = <&xtal_24m>, + <&scmi_clk 8>, + <&scmi_clk 12>, + <&clkc_pll 3>, + <&clkc_pll 5>, + <&clkc_pll 7>, + <&clkc_pll 9>, + <&clkc_pll 11>, + <&clkc_pll 13>, + <&clkc_pll 15>, + <&scmi_clk 13>, + <&clkc_pll 17>, + <&scmi_clk 9>, + <&scmi_clk 10>, + <&scmi_clk 14>, + <&scmi_clk 15>; + clock-names = "xtal_24m", + "oscin", + "fix", + "fdiv2", + "fdiv2p5", + "fdiv3", + "fdiv4", + "fdiv5", + "fdiv7", + "gp0", + "gp1", + "hifi", + "sysclk", + "axiclk", + "sysplldiv16", + "cpudiv16"; + }; + }; diff --git a/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h b/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h new file mode 100644 index 000000000000..d115c741c255 --- /dev/null +++ b/include/dt-bindings/clock/amlogic,c3-peripherals-clkc.h @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (c) 2023 Amlogic, Inc. All rights reserved. + * Author: Chuan Liu + */ + +#ifndef _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H +#define _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H + +#define CLKID_RTC_XTAL_CLKIN 0 +#define CLKID_RTC_32K_DIV 1 +#define CLKID_RTC_32K_MUX 2 +#define CLKID_RTC_32K 3 +#define CLKID_RTC_CLK 4 +#define CLKID_SYS_RESET_CTRL 5 +#define CLKID_SYS_PWR_CTRL 6 +#define CLKID_SYS_PAD_CTRL 7 +#define CLKID_SYS_CTRL 8 +#define CLKID_SYS_TS_PLL 9 +#define CLKID_SYS_DEV_ARB 10 +#define CLKID_SYS_MMC_PCLK 11 +#define CLKID_SYS_CPU_CTRL 12 +#define CLKID_SYS_JTAG_CTRL 13 +#define CLKID_SYS_IR_CTRL 14 +#define CLKID_SYS_IRQ_CTRL 15 +#define CLKID_SYS_MSR_CLK 16 +#define CLKID_SYS_ROM 17 +#define CLKID_SYS_UART_F 18 +#define CLKID_SYS_CPU_ARB 19 +#define CLKID_SYS_RSA 20 +#define CLKID_SYS_SAR_ADC 21 +#define CLKID_SYS_STARTUP 22 +#define CLKID_SYS_SECURE 23 +#define CLKID_SYS_SPIFC 24 +#define CLKID_SYS_NNA 25 +#define CLKID_SYS_ETH_MAC 26 +#define CLKID_SYS_GIC 27 +#define CLKID_SYS_RAMA 28 +#define CLKID_SYS_BIG_NIC 29 +#define CLKID_SYS_RAMB 30 +#define CLKID_SYS_AUDIO_PCLK 31 +#define CLKID_SYS_PWM_KL 32 +#define CLKID_SYS_PWM_IJ 33 +#define CLKID_SYS_USB 34 +#define CLKID_SYS_SD_EMMC_A 35 +#define CLKID_SYS_SD_EMMC_C 36 +#define CLKID_SYS_PWM_AB 37 +#define CLKID_SYS_PWM_CD 38 +#define CLKID_SYS_PWM_EF 39 +#define CLKID_SYS_PWM_GH 40 +#define CLKID_SYS_SPICC_1 41 +#define CLKID_SYS_SPICC_0 42 +#define CLKID_SYS_UART_A 43 +#define CLKID_SYS_UART_B 44 +#define CLKID_SYS_UART_C 45 +#define CLKID_SYS_UART_D 46 +#define CLKID_SYS_UART_E 47 +#define CLKID_SYS_I2C_M_A 48 +#define CLKID_SYS_I2C_M_B 49 +#define CLKID_SYS_I2C_M_C 50 +#define CLKID_SYS_I2C_M_D 51 +#define CLKID_SYS_I2S_S_A 52 +#define CLKID_SYS_RTC 53 +#define CLKID_SYS_GE2D 54 +#define CLKID_SYS_ISP 55 +#define CLKID_SYS_GPV_ISP_NIC 56 +#define CLKID_SYS_GPV_CVE_NIC 57 +#define CLKID_SYS_MIPI_DSI_HOST 58 +#define CLKID_SYS_MIPI_DSI_PHY 59 +#define CLKID_SYS_ETH_PHY 60 +#define CLKID_SYS_ACODEC 61 +#define CLKID_SYS_DWAP 62 +#define CLKID_SYS_DOS 63 +#define CLKID_SYS_CVE 64 +#define CLKID_SYS_VOUT 65 +#define CLKID_SYS_VC9000E 66 +#define CLKID_SYS_PWM_MN 67 +#define CLKID_SYS_SD_EMMC_B 68 +#define CLKID_AXI_SYS_NIC 69 +#define CLKID_AXI_ISP_NIC 70 +#define CLKID_AXI_CVE_NIC 71 +#define CLKID_AXI_RAMB 72 +#define CLKID_AXI_RAMA 73 +#define CLKID_AXI_CPU_DMC 74 +#define CLKID_AXI_NIC 75 +#define CLKID_AXI_DMA 76 +#define CLKID_AXI_MUX_NIC 77 +#define CLKID_AXI_CVE 78 +#define CLKID_AXI_DEV1_DMC 79 +#define CLKID_AXI_DEV0_DMC 80 +#define CLKID_AXI_DSP_DMC 81 +#define CLKID_12_24M_IN 82 +#define CLKID_12M_24M 83 +#define CLKID_FCLK_25M_DIV 84 +#define CLKID_FCLK_25M 85 +#define CLKID_GEN_SEL 86 +#define CLKID_GEN_DIV 87 +#define CLKID_GEN 88 +#define CLKID_SARADC_SEL 89 +#define CLKID_SARADC_DIV 90 +#define CLKID_SARADC 91 +#define CLKID_PWM_A_SEL 92 +#define CLKID_PWM_A_DIV 93 +#define CLKID_PWM_A 94 +#define CLKID_PWM_B_SEL 95 +#define CLKID_PWM_B_DIV 96 +#define CLKID_PWM_B 97 +#define CLKID_PWM_C_SEL 98 +#define CLKID_PWM_C_DIV 99 +#define CLKID_PWM_C 100 +#define CLKID_PWM_D_SEL 101 +#define CLKID_PWM_D_DIV 102 +#define CLKID_PWM_D 103 +#define CLKID_PWM_E_SEL 104 +#define CLKID_PWM_E_DIV 105 +#define CLKID_PWM_E 106 +#define CLKID_PWM_F_SEL 107 +#define CLKID_PWM_F_DIV 108 +#define CLKID_PWM_F 109 +#define CLKID_PWM_G_SEL 110 +#define CLKID_PWM_G_DIV 111 +#define CLKID_PWM_G 112 +#define CLKID_PWM_H_SEL 113 +#define CLKID_PWM_H_DIV 114 +#define CLKID_PWM_H 115 +#define CLKID_PWM_I_SEL 116 +#define CLKID_PWM_I_DIV 117 +#define CLKID_PWM_I 118 +#define CLKID_PWM_J_SEL 119 +#define CLKID_PWM_J_DIV 120 +#define CLKID_PWM_J 121 +#define CLKID_PWM_K_SEL 122 +#define CLKID_PWM_K_DIV 123 +#define CLKID_PWM_K 124 +#define CLKID_PWM_L_SEL 125 +#define CLKID_PWM_L_DIV 126 +#define CLKID_PWM_L 127 +#define CLKID_PWM_M_SEL 128 +#define CLKID_PWM_M_DIV 129 +#define CLKID_PWM_M 130 +#define CLKID_PWM_N_SEL 131 +#define CLKID_PWM_N_DIV 132 +#define CLKID_PWM_N 133 +#define CLKID_SPICC_A_SEL 134 +#define CLKID_SPICC_A_DIV 135 +#define CLKID_SPICC_A 136 +#define CLKID_SPICC_B_SEL 137 +#define CLKID_SPICC_B_DIV 138 +#define CLKID_SPICC_B 139 +#define CLKID_SPIFC_SEL 140 +#define CLKID_SPIFC_DIV 141 +#define CLKID_SPIFC 142 +#define CLKID_SD_EMMC_A_SEL 143 +#define CLKID_SD_EMMC_A_DIV 144 +#define CLKID_SD_EMMC_A 145 +#define CLKID_SD_EMMC_B_SEL 146 +#define CLKID_SD_EMMC_B_DIV 147 +#define CLKID_SD_EMMC_B 148 +#define CLKID_SD_EMMC_C_SEL 149 +#define CLKID_SD_EMMC_C_DIV 150 +#define CLKID_SD_EMMC_C 151 +#define CLKID_TS_DIV 152 +#define CLKID_TS 153 +#define CLKID_ETH_125M_DIV 154 +#define CLKID_ETH_125M 155 +#define CLKID_ETH_RMII_DIV 156 +#define CLKID_ETH_RMII 157 +#define CLKID_MIPI_DSI_MEAS_SEL 158 +#define CLKID_MIPI_DSI_MEAS_DIV 159 +#define CLKID_MIPI_DSI_MEAS 160 +#define CLKID_DSI_PHY_SEL 161 +#define CLKID_DSI_PHY_DIV 162 +#define CLKID_DSI_PHY 163 +#define CLKID_VOUT_MCLK_SEL 164 +#define CLKID_VOUT_MCLK_DIV 165 +#define CLKID_VOUT_MCLK 166 +#define CLKID_VOUT_ENC_SEL 167 +#define CLKID_VOUT_ENC_DIV 168 +#define CLKID_VOUT_ENC 169 +#define CLKID_HCODEC_0_SEL 170 +#define CLKID_HCODEC_0_DIV 171 +#define CLKID_HCODEC_0 172 +#define CLKID_HCODEC_1_SEL 173 +#define CLKID_HCODEC_1_DIV 174 +#define CLKID_HCODEC_1 175 +#define CLKID_HCODEC 176 +#define CLKID_VC9000E_ACLK_SEL 177 +#define CLKID_VC9000E_ACLK_DIV 178 +#define CLKID_VC9000E_ACLK 179 +#define CLKID_VC9000E_CORE_SEL 180 +#define CLKID_VC9000E_CORE_DIV 181 +#define CLKID_VC9000E_CORE 182 +#define CLKID_CSI_PHY0_SEL 183 +#define CLKID_CSI_PHY0_DIV 184 +#define CLKID_CSI_PHY0 185 +#define CLKID_DEWARPA_SEL 186 +#define CLKID_DEWARPA_DIV 187 +#define CLKID_DEWARPA 188 +#define CLKID_ISP0_SEL 189 +#define CLKID_ISP0_DIV 190 +#define CLKID_ISP0 191 +#define CLKID_NNA_CORE_SEL 192 +#define CLKID_NNA_CORE_DIV 193 +#define CLKID_NNA_CORE 194 +#define CLKID_GE2D_SEL 195 +#define CLKID_GE2D_DIV 196 +#define CLKID_GE2D 197 +#define CLKID_VAPB_SEL 198 +#define CLKID_VAPB_DIV 199 +#define CLKID_VAPB 200 + +#endif /* _DT_BINDINGS_CLOCK_AMLOGIC_C3_PERIPHERALS_CLKC_H */ -- cgit v1.2.3 From 668b6a2ef832a878494cc1b12a881c8ec0494b25 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:34 +0200 Subject: flow_dissector: add support for tunnel control flags Dissect [no]csum, [no]dontfrag, [no]oam, [no]crit flags from skb metadata. This is a prerequisite for matching these control flags using TC flower. Suggested-by: Ilya Maximets Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/net/flow_dissector.h | 9 +++++++++ include/net/ip_tunnels.h | 12 ++++++++++++ net/core/flow_dissector.c | 16 +++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 9ab376d1a677..99626475c3f4 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -329,6 +329,14 @@ struct flow_dissector_key_cfm { #define FLOW_DIS_CFM_MDL_MASK GENMASK(7, 5) #define FLOW_DIS_CFM_MDL_MAX 7 +/** + * struct flow_dissector_key_enc_flags: tunnel metadata control flags + * @flags: tunnel control flags + */ +struct flow_dissector_key_enc_flags { + u32 flags; +}; + enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_CONTROL, /* struct flow_dissector_key_control */ FLOW_DISSECTOR_KEY_BASIC, /* struct flow_dissector_key_basic */ @@ -363,6 +371,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_L2TPV3, /* struct flow_dissector_key_l2tpv3 */ FLOW_DISSECTOR_KEY_CFM, /* struct flow_dissector_key_cfm */ FLOW_DISSECTOR_KEY_IPSEC, /* struct flow_dissector_key_ipsec */ + FLOW_DISSECTOR_KEY_ENC_FLAGS, /* struct flow_dissector_key_enc_flags */ FLOW_DISSECTOR_KEY_MAX, }; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..5a530d4fb02c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -247,6 +247,18 @@ static inline bool ip_tunnel_is_options_present(const unsigned long *flags) return ip_tunnel_flags_intersect(flags, present); } +static inline void ip_tunnel_set_encflags_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_CSUM_BIT, present); + __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, present); + __set_bit(IP_TUNNEL_OAM_BIT, present); + __set_bit(IP_TUNNEL_CRIT_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) { IP_TUNNEL_DECLARE_FLAGS(supp) = { }; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f82e9a7d3b37..59fe46077b3c 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -382,7 +382,9 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_IP) && !dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_ENC_OPTS)) + FLOW_DISSECTOR_KEY_ENC_OPTS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_FLAGS)) return; info = skb_tunnel_info(skb); @@ -475,6 +477,18 @@ skb_flow_dissect_tunnel_info(const struct sk_buff *skb, IP_TUNNEL_GENEVE_OPT_BIT); enc_opt->dst_opt_type = val < __IP_TUNNEL_FLAG_NUM ? val : 0; } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_FLAGS)) { + struct flow_dissector_key_enc_flags *enc_flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = {}; + + enc_flags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_FLAGS, + target_container); + ip_tunnel_set_encflags_present(flags); + ip_tunnel_flags_and(flags, flags, info->key.tun_flags); + enc_flags->flags = bitmap_read(flags, IP_TUNNEL_CSUM_BIT, 32); + } } EXPORT_SYMBOL(skb_flow_dissect_tunnel_info); -- cgit v1.2.3 From 1d17568e74dedbcb54d36af0662a15128297d681 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Thu, 30 May 2024 19:08:35 +0200 Subject: net/sched: cls_flower: add support for matching tunnel control flags extend cls_flower to match TUNNEL_FLAGS_PRESENT bits in tunnel metadata. Suggested-by: Ilya Maximets Acked-by: Jamal Hadi Salim Signed-off-by: Davide Caratti Reviewed-by: Simon Horman Signed-off-by: Paolo Abeni --- include/uapi/linux/pkt_cls.h | 3 +++ net/sched/cls_flower.c | 56 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 229fc925ec3a..b6d38f5fd7c0 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -554,6 +554,9 @@ enum { TCA_FLOWER_KEY_SPI, /* be32 */ TCA_FLOWER_KEY_SPI_MASK, /* be32 */ + TCA_FLOWER_KEY_ENC_FLAGS, /* u32 */ + TCA_FLOWER_KEY_ENC_FLAGS_MASK, /* u32 */ + __TCA_FLOWER_MAX, }; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fd9a6f20b60b..eef570c577ac 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -41,6 +41,12 @@ #define TCA_FLOWER_KEY_CT_FLAGS_MASK \ (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) +#define TUNNEL_FLAGS_PRESENT (\ + _BITUL(IP_TUNNEL_CSUM_BIT) | \ + _BITUL(IP_TUNNEL_DONT_FRAGMENT_BIT) | \ + _BITUL(IP_TUNNEL_OAM_BIT) | \ + _BITUL(IP_TUNNEL_CRIT_OPT_BIT)) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -75,6 +81,7 @@ struct fl_flow_key { struct flow_dissector_key_l2tpv3 l2tpv3; struct flow_dissector_key_ipsec ipsec; struct flow_dissector_key_cfm cfm; + struct flow_dissector_key_enc_flags enc_flags; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -732,6 +739,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_SPI_MASK] = { .type = NLA_U32 }, [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1), [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED }, + [TCA_FLOWER_KEY_ENC_FLAGS] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), + [TCA_FLOWER_KEY_ENC_FLAGS_MASK] = NLA_POLICY_MASK(NLA_U32, + TUNNEL_FLAGS_PRESENT), }; static const struct nla_policy @@ -1825,6 +1836,21 @@ static int fl_set_key_cfm(struct nlattr **tb, return 0; } +static int fl_set_key_enc_flags(struct nlattr **tb, u32 *flags_key, + u32 *flags_mask, struct netlink_ext_ack *extack) +{ + /* mask is mandatory for flags */ + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, TCA_FLOWER_KEY_ENC_FLAGS_MASK)) { + NL_SET_ERR_MSG(extack, "missing enc_flags mask"); + return -EINVAL; + } + + *flags_key = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS]); + *flags_mask = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_FLAGS_MASK]); + + return 0; +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask, struct netlink_ext_ack *extack) @@ -2059,9 +2085,16 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (ret) return ret; - if (tb[TCA_FLOWER_KEY_FLAGS]) + if (tb[TCA_FLOWER_KEY_FLAGS]) { ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags, extack); + if (ret) + return ret; + } + + if (tb[TCA_FLOWER_KEY_ENC_FLAGS]) + ret = fl_set_key_enc_flags(tb, &key->enc_flags.flags, + &mask->enc_flags.flags, extack); return ret; } @@ -2175,6 +2208,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_IPSEC, ipsec); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_CFM, cfm); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_ENC_FLAGS, enc_flags); skb_flow_dissector_init(dissector, keys, cnt); } @@ -3291,6 +3326,22 @@ err_cfm_opts: return err; } +static int fl_dump_key_enc_flags(struct sk_buff *skb, + struct flow_dissector_key_enc_flags *key, + struct flow_dissector_key_enc_flags *mask) +{ + if (!memchr_inv(mask, 0, sizeof(*mask))) + return 0; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS, key->flags)) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_FLAGS_MASK, mask->flags)) + return -EMSGSIZE; + + return 0; +} + static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type, struct flow_dissector_key_enc_opts *enc_opts) { @@ -3592,6 +3643,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm)) goto nla_put_failure; + if (fl_dump_key_enc_flags(skb, &key->enc_flags, &mask->enc_flags)) + goto nla_put_failure; + return 0; nla_put_failure: -- cgit v1.2.3 From 071115301838c6c265065dd5d6bf43a9a987a550 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:14 -0700 Subject: tcp: wrap mptcp and decrypted checks into tcp_skb_can_collapse_rx() tcp_skb_can_collapse() checks for conditions which don't make sense on input. Because of this we ended up sprinkling a few pairs of mptcp_skb_can_collapse() and skb_cmp_decrypted() calls on the input path. Group them in a new helper. This should make it less likely that someone will check mptcp and not decrypted or vice versa when adding new code. This implicitly adds a decrypted check early in tcp_collapse(). AFAIU this will very slightly increase our ability to collapse packets under memory pressure, not a real bug. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 7 +++++++ net/ipv4/tcp_input.c | 11 +++-------- net/ipv4/tcp_ipv4.c | 3 +-- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32815a40dea1..32741856da01 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1071,6 +1071,13 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, skb_pure_zcopy_same(to, from)); } +static inline bool tcp_skb_can_collapse_rx(const struct sk_buff *to, + const struct sk_buff *from) +{ + return likely(mptcp_skb_can_collapse(to, from) && + !skb_cmp_decrypted(to, from)); +} + /* Events passed to congestion control interface */ enum tcp_ca_event { CA_EVENT_TX_START, /* first transmit when no packets in flight */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5aadf64e554d..212b6fd0caf7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4813,10 +4813,7 @@ static bool tcp_try_coalesce(struct sock *sk, if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq) return false; - if (!mptcp_skb_can_collapse(to, from)) - return false; - - if (skb_cmp_decrypted(from, to)) + if (!tcp_skb_can_collapse_rx(to, from)) return false; if (!skb_try_coalesce(to, from, fragstolen, &delta)) @@ -5372,7 +5369,7 @@ restart: break; } - if (n && n != tail && mptcp_skb_can_collapse(skb, n) && + if (n && n != tail && tcp_skb_can_collapse_rx(skb, n) && TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { end_of_skbs = false; break; @@ -5423,11 +5420,9 @@ restart: skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || - !mptcp_skb_can_collapse(nskb, skb) || + !tcp_skb_can_collapse_rx(nskb, skb) || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) goto end; - if (skb_cmp_decrypted(skb, nskb)) - goto end; } } } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59d5b064f233..04044605cadf 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2044,8 +2044,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || - !mptcp_skb_can_collapse(tail, skb) || - skb_cmp_decrypted(tail, skb) || + !tcp_skb_can_collapse_rx(tail, skb) || thtail->doff != th->doff || memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) goto no_coalesce; -- cgit v1.2.3 From 1be68a87ab333af37b02ad928a724a722a5a8203 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 May 2024 16:36:15 -0700 Subject: tcp: add a helper for setting EOR on tail skb TLS (and hopefully soon PSP will) use EOR to prevent skbs with different decrypted state from getting merged, without adding new tests to the skb handling. In both cases once the connection switches to an "encrypted" state, all subsequent skbs will be encrypted, so a single "EOR fence" is sufficient to prevent mixing. Add a helper for setting the EOR bit, to make this arrangement more explicit. Signed-off-by: Jakub Kicinski Reviewed-by: Eric Dumazet Reviewed-by: Willem de Bruijn Signed-off-by: Paolo Abeni --- include/net/tcp.h | 9 +++++++++ net/tls/tls_device.c | 11 ++--------- 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index 32741856da01..08c3b99501cf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1066,6 +1066,7 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb) static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { + /* skb_cmp_decrypted() not needed, use tcp_write_collapse_fence() */ return likely(tcp_skb_can_collapse_to(to) && mptcp_skb_can_collapse(to, from) && skb_pure_zcopy_same(to, from)); @@ -2102,6 +2103,14 @@ static inline void tcp_rtx_queue_unlink_and_free(struct sk_buff *skb, struct soc tcp_wmem_free_skb(sk, skb); } +static inline void tcp_write_collapse_fence(struct sock *sk) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + + if (skb) + TCP_SKB_CB(skb)->eor = 1; +} + static inline void tcp_push_pending_frames(struct sock *sk) { if (tcp_send_head(sk)) { diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index ab6e694f7bc2..dc063c2c7950 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -231,14 +231,10 @@ static void tls_device_resync_tx(struct sock *sk, struct tls_context *tls_ctx, u32 seq) { struct net_device *netdev; - struct sk_buff *skb; int err = 0; u8 *rcd_sn; - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; - + tcp_write_collapse_fence(sk); rcd_sn = tls_ctx->tx.rec_seq; trace_tls_device_tx_resync_send(sk, seq, rcd_sn); @@ -1067,7 +1063,6 @@ int tls_set_device_offload(struct sock *sk) struct tls_prot_info *prot; struct net_device *netdev; struct tls_context *ctx; - struct sk_buff *skb; char *iv, *rec_seq; int rc; @@ -1138,9 +1133,7 @@ int tls_set_device_offload(struct sock *sk) * SKBs where only part of the payload needs to be encrypted. * So mark the last skb in the write queue as end of record. */ - skb = tcp_write_queue_tail(sk); - if (skb) - TCP_SKB_CB(skb)->eor = 1; + tcp_write_collapse_fence(sk); /* Avoid offloading if the device is down * We don't want to offload new flows after -- cgit v1.2.3 From f85d39dd7ed89ffdd622bc1de247ffba8d961504 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Mon, 27 May 2024 19:35:38 +0200 Subject: kcov, usb: disable interrupts in kcov_remote_start_usb_softirq After commit 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue"), usb_giveback_urb_bh() runs in the BH workqueue with interrupts enabled. Thus, the remote coverage collection section in usb_giveback_urb_bh()-> __usb_hcd_giveback_urb() might be interrupted, and the interrupt handler might invoke __usb_hcd_giveback_urb() again. This breaks KCOV, as it does not support nested remote coverage collection sections within the same context (neither in task nor in softirq). Update kcov_remote_start/stop_usb_softirq() to disable interrupts for the duration of the coverage collection section to avoid nested sections in the softirq context (in addition to such in the task context, which are already handled). Reported-by: Tetsuo Handa Closes: https://lore.kernel.org/linux-usb/0f4d1964-7397-485b-bc48-11c01e2fcbca@I-love.SAKURA.ne.jp/ Closes: https://syzkaller.appspot.com/bug?extid=0438378d6f157baae1a2 Suggested-by: Alan Stern Fixes: 8fea0c8fda30 ("usb: core: hcd: Convert from tasklet to BH workqueue") Cc: stable@vger.kernel.org Acked-by: Dmitry Vyukov Signed-off-by: Andrey Konovalov Link: https://lore.kernel.org/r/20240527173538.4989-1-andrey.konovalov@linux.dev Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 12 +++++++----- include/linux/kcov.h | 47 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index e3366f4d82b9..1ff7d901fede 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1623,6 +1623,7 @@ static void __usb_hcd_giveback_urb(struct urb *urb) struct usb_hcd *hcd = bus_to_hcd(urb->dev->bus); struct usb_anchor *anchor = urb->anchor; int status = urb->unlinked; + unsigned long flags; urb->hcpriv = NULL; if (unlikely((urb->transfer_flags & URB_SHORT_NOT_OK) && @@ -1640,13 +1641,14 @@ static void __usb_hcd_giveback_urb(struct urb *urb) /* pass ownership to the completion handler */ urb->status = status; /* - * This function can be called in task context inside another remote - * coverage collection section, but kcov doesn't support that kind of - * recursion yet. Only collect coverage in softirq context for now. + * Only collect coverage in the softirq context and disable interrupts + * to avoid scenarios with nested remote coverage collection sections + * that KCOV does not support. + * See the comment next to kcov_remote_start_usb_softirq() for details. */ - kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); + flags = kcov_remote_start_usb_softirq((u64)urb->dev->bus->busnum); urb->complete(urb); - kcov_remote_stop_softirq(); + kcov_remote_stop_softirq(flags); usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b851ba415e03..1068a7318d89 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -55,21 +55,47 @@ static inline void kcov_remote_start_usb(u64 id) /* * The softirq flavor of kcov_remote_*() functions is introduced as a temporary - * work around for kcov's lack of nested remote coverage sections support in - * task context. Adding support for nested sections is tracked in: - * https://bugzilla.kernel.org/show_bug.cgi?id=210337 + * workaround for KCOV's lack of nested remote coverage sections support. + * + * Adding support is tracked in https://bugzilla.kernel.org/show_bug.cgi?id=210337. + * + * kcov_remote_start_usb_softirq(): + * + * 1. Only collects coverage when called in the softirq context. This allows + * avoiding nested remote coverage collection sections in the task context. + * For example, USB/IP calls usb_hcd_giveback_urb() in the task context + * within an existing remote coverage collection section. Thus, KCOV should + * not attempt to start collecting coverage within the coverage collection + * section in __usb_hcd_giveback_urb() in this case. + * + * 2. Disables interrupts for the duration of the coverage collection section. + * This allows avoiding nested remote coverage collection sections in the + * softirq context (a softirq might occur during the execution of a work in + * the BH workqueue, which runs with in_serving_softirq() > 0). + * For example, usb_giveback_urb_bh() runs in the BH workqueue with + * interrupts enabled, so __usb_hcd_giveback_urb() might be interrupted in + * the middle of its remote coverage collection section, and the interrupt + * handler might invoke __usb_hcd_giveback_urb() again. */ -static inline void kcov_remote_start_usb_softirq(u64 id) +static inline unsigned long kcov_remote_start_usb_softirq(u64 id) { - if (in_serving_softirq()) + unsigned long flags = 0; + + if (in_serving_softirq()) { + local_irq_save(flags); kcov_remote_start_usb(id); + } + + return flags; } -static inline void kcov_remote_stop_softirq(void) +static inline void kcov_remote_stop_softirq(unsigned long flags) { - if (in_serving_softirq()) + if (in_serving_softirq()) { kcov_remote_stop(); + local_irq_restore(flags); + } } #ifdef CONFIG_64BIT @@ -103,8 +129,11 @@ static inline u64 kcov_common_handle(void) } static inline void kcov_remote_start_common(u64 id) {} static inline void kcov_remote_start_usb(u64 id) {} -static inline void kcov_remote_start_usb_softirq(u64 id) {} -static inline void kcov_remote_stop_softirq(void) {} +static inline unsigned long kcov_remote_start_usb_softirq(u64 id) +{ + return 0; +} +static inline void kcov_remote_stop_softirq(unsigned long flags) {} #endif /* CONFIG_KCOV */ #endif /* _LINUX_KCOV_H */ -- cgit v1.2.3 From fe8db0bbe04d31ab17dc60e205c4a3365c92e67c Mon Sep 17 00:00:00 2001 From: Abhishek Pandit-Subedi Date: Fri, 10 May 2024 20:12:42 +0000 Subject: usb: typec: Update sysfs when setting ops When adding altmode ops, update the sysfs group so that visibility is also recalculated. Reviewed-by: Heikki Krogerus Reviewed-by: Benson Leung Signed-off-by: Abhishek Pandit-Subedi Signed-off-by: Jameson Thies Reviewed-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240510201244.2968152-3-jthies@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/altmodes/displayport.c | 2 +- drivers/usb/typec/class.c | 18 +++++++++++++++++- drivers/usb/typec/ucsi/displayport.c | 2 +- include/linux/usb/typec.h | 3 +++ 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c index 596cd4806018..92cc1b136120 100644 --- a/drivers/usb/typec/altmodes/displayport.c +++ b/drivers/usb/typec/altmodes/displayport.c @@ -746,7 +746,7 @@ int dp_altmode_probe(struct typec_altmode *alt) dp->alt = alt; alt->desc = "DisplayPort"; - alt->ops = &dp_altmode_ops; + typec_altmode_set_ops(alt, &dp_altmode_ops); if (plug) { plug->desc = "Displayport"; diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c index 9610e647a8d4..9262fcd4144f 100644 --- a/drivers/usb/typec/class.c +++ b/drivers/usb/typec/class.c @@ -467,6 +467,22 @@ static const struct attribute_group *typec_altmode_groups[] = { NULL }; +/** + * typec_altmode_set_ops - Set ops for altmode + * @adev: Handle to the alternate mode + * @ops: Ops for the alternate mode + * + * After setting ops, attribute visiblity needs to be refreshed if the alternate + * mode can be activated. + */ +void typec_altmode_set_ops(struct typec_altmode *adev, + const struct typec_altmode_ops *ops) +{ + adev->ops = ops; + sysfs_update_group(&adev->dev.kobj, &typec_altmode_group); +} +EXPORT_SYMBOL_GPL(typec_altmode_set_ops); + static int altmode_id_get(struct device *dev) { struct ida *ids; @@ -2317,7 +2333,7 @@ void typec_port_register_altmodes(struct typec_port *port, continue; } - alt->ops = ops; + typec_altmode_set_ops(alt, ops); typec_altmode_set_drvdata(alt, drvdata); altmodes[index] = alt; index++; diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 8be92fc1d12c..420af5139c70 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -333,7 +333,7 @@ struct typec_altmode *ucsi_register_displayport(struct ucsi_connector *con, dp->con = con; dp->alt = alt; - alt->ops = &ucsi_displayport_ops; + typec_altmode_set_ops(alt, &ucsi_displayport_ops); typec_altmode_set_drvdata(alt, dp); return alt; diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h index b35b427561ab..549275f8ac1b 100644 --- a/include/linux/usb/typec.h +++ b/include/linux/usb/typec.h @@ -167,6 +167,9 @@ struct typec_port *typec_altmode2port(struct typec_altmode *alt); void typec_altmode_update_active(struct typec_altmode *alt, bool active); +void typec_altmode_set_ops(struct typec_altmode *alt, + const struct typec_altmode_ops *ops); + enum typec_plug_index { TYPEC_PLUG_SOP_P, TYPEC_PLUG_SOP_PP, -- cgit v1.2.3 From 971187350602d03c4a27c0783ff412502b95720a Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 4 Jul 2023 14:17:19 +0100 Subject: driver core: remove devm_device_add_groups() There is no more in-kernel users of this function, and no driver should ever be using it, so remove it from the kernel. Acked-by: Dmitry Torokhov Acked-by: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20230704131715.44454-8-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 45 --------------------------------------------- include/linux/device.h | 2 -- 2 files changed, 47 deletions(-) (limited to 'include') diff --git a/drivers/base/core.c b/drivers/base/core.c index 131d96c6090b..2e776fcc31b6 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2845,15 +2845,6 @@ static void devm_attr_group_remove(struct device *dev, void *res) sysfs_remove_group(&dev->kobj, group); } -static void devm_attr_groups_remove(struct device *dev, void *res) -{ - union device_attr_group_devres *devres = res; - const struct attribute_group **groups = devres->groups; - - dev_dbg(dev, "%s: removing groups %p\n", __func__, groups); - sysfs_remove_groups(&dev->kobj, groups); -} - /** * devm_device_add_group - given a device, create a managed attribute group * @dev: The device to create the group for @@ -2886,42 +2877,6 @@ int devm_device_add_group(struct device *dev, const struct attribute_group *grp) } EXPORT_SYMBOL_GPL(devm_device_add_group); -/** - * devm_device_add_groups - create a bunch of managed attribute groups - * @dev: The device to create the group for - * @groups: The attribute groups to create, NULL terminated - * - * This function creates a bunch of managed attribute groups. If an error - * occurs when creating a group, all previously created groups will be - * removed, unwinding everything back to the original state when this - * function was called. It will explicitly warn and error if any of the - * attribute files being created already exist. - * - * Returns 0 on success or error code from sysfs_create_group on failure. - */ -int devm_device_add_groups(struct device *dev, - const struct attribute_group **groups) -{ - union device_attr_group_devres *devres; - int error; - - devres = devres_alloc(devm_attr_groups_remove, - sizeof(*devres), GFP_KERNEL); - if (!devres) - return -ENOMEM; - - error = sysfs_create_groups(&dev->kobj, groups); - if (error) { - devres_free(devres); - return error; - } - - devres->groups = groups; - devres_add(dev, devres); - return 0; -} -EXPORT_SYMBOL_GPL(devm_device_add_groups); - static int device_add_attrs(struct device *dev) { const struct class *class = dev->class; diff --git a/include/linux/device.h b/include/linux/device.h index fc3bd7116ab9..ace039151cb8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1220,8 +1220,6 @@ static inline void device_remove_group(struct device *dev, return device_remove_groups(dev, groups); } -int __must_check devm_device_add_groups(struct device *dev, - const struct attribute_group **groups); int __must_check devm_device_add_group(struct device *dev, const struct attribute_group *grp); -- cgit v1.2.3 From 44a45be57f85165761fdabf072f9a97aa026ff61 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 23 May 2024 13:00:00 +0200 Subject: sysfs: Unbreak the build around sysfs_bin_attr_simple_read() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Günter reports build breakage for m68k "m5208evb_defconfig" plus CONFIG_BLK_DEV_INITRD=y caused by commit 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper"). The defconfig disables CONFIG_SYSFS, so sysfs_bin_attr_simple_read() is not compiled into the kernel. But init/initramfs.c references that function in the initializer of a struct bin_attribute. Add an empty static inline to avoid the build breakage. Fixes: 66bc1a173328 ("treewide: Use sysfs_bin_attr_simple_read() helper") Reported-by: Guenter Roeck Closes: https://lore.kernel.org/r/e12b0027-b199-4de7-b83d-668171447ccc@roeck-us.net Signed-off-by: Lukas Wunner Tested-by: Guenter Roeck Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/05f4290439a58730738a15b0c99cd8576c4aa0d9.1716461752.git.lukas@wunner.de Signed-off-by: Greg Kroah-Hartman --- include/linux/sysfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index a7d725fbf739..c4e64dc11206 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -750,6 +750,15 @@ static inline int sysfs_emit_at(char *buf, int at, const char *fmt, ...) { return 0; } + +static inline ssize_t sysfs_bin_attr_simple_read(struct file *file, + struct kobject *kobj, + struct bin_attribute *attr, + char *buf, loff_t off, + size_t count) +{ + return 0; +} #endif /* CONFIG_SYSFS */ static inline int __must_check sysfs_create_file(struct kobject *kobj, -- cgit v1.2.3 From 42675b723b4842bca7bfb0f209aa9a493a10324a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:05 -0400 Subject: function_graph: Convert ret_stack to a series of longs In order to make it possible to have multiple callbacks registered with the function_graph tracer, the retstack needs to be converted from an array of ftrace_ret_stack structures to an array of longs. This will allow to store the list of callbacks on the stack for the return side of the functions. Link: https://lore.kernel.org/linux-trace-kernel/171509092742.162236.4427737821399314856.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.073111754@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/sched.h | 2 +- kernel/trace/fgraph.c | 136 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 83 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..352939dab3a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1402,7 +1402,7 @@ struct task_struct { int curr_ret_depth; /* Stack of return addresses for return function tracing: */ - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index a130b2d898f7..c62e6db718a0 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -25,6 +25,30 @@ #define ASSIGN_OPS_HASH(opsname, val) #endif +/* + * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack + * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame + * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack + * SHADOW_STACK_OFFSET: The size in long words of the shadow stack + * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added + */ +#define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack) +#define FGRAPH_FRAME_OFFSET (ALIGN(FGRAPH_FRAME_SIZE, sizeof(long)) / sizeof(long)) +#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_OFFSET \ + (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long)) +/* Leave on a buffer at the end */ +#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_OFFSET - FGRAPH_FRAME_OFFSET) + +/* + * RET_STACK(): Return the frame from a given @offset from task @t + * RET_STACK_INC(): Reserve one frame size on the stack. + * RET_STACK_DEC(): Remove one frame size from the stack. + */ +#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index])) +#define RET_STACK_INC(c) ({ c += FGRAPH_FRAME_OFFSET; }) +#define RET_STACK_DEC(c) ({ c -= FGRAPH_FRAME_OFFSET; }) + DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; @@ -69,6 +93,7 @@ static int ftrace_push_return_trace(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { + struct ftrace_ret_stack *ret_stack; unsigned long long calltime; int index; @@ -85,23 +110,25 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, smp_rmb(); /* The return trace stack is full */ - if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { + if (current->curr_ret_stack >= SHADOW_STACK_MAX_INDEX) { atomic_inc(¤t->trace_overrun); return -EBUSY; } calltime = trace_clock_local(); - index = ++current->curr_ret_stack; + index = current->curr_ret_stack; + RET_STACK_INC(current->curr_ret_stack); + ret_stack = RET_STACK(current, index); barrier(); - current->ret_stack[index].ret = ret; - current->ret_stack[index].func = func; - current->ret_stack[index].calltime = calltime; + ret_stack->ret = ret; + ret_stack->func = func; + ret_stack->calltime = calltime; #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - current->ret_stack[index].fp = frame_pointer; + ret_stack->fp = frame_pointer; #endif #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - current->ret_stack[index].retp = retp; + ret_stack->retp = retp; #endif return 0; } @@ -137,7 +164,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, return 0; out_ret: - current->curr_ret_stack--; + RET_STACK_DEC(current->curr_ret_stack); out: current->curr_ret_depth--; return -EBUSY; @@ -148,11 +175,13 @@ static void ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, unsigned long frame_pointer) { + struct ftrace_ret_stack *ret_stack; int index; index = current->curr_ret_stack; + RET_STACK_DEC(index); - if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) { + if (unlikely(index < 0 || index > SHADOW_STACK_MAX_INDEX)) { ftrace_graph_stop(); WARN_ON(1); /* Might as well panic, otherwise we have no where to go */ @@ -160,6 +189,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, return; } + ret_stack = RET_STACK(current, index); #ifdef HAVE_FUNCTION_GRAPH_FP_TEST /* * The arch may choose to record the frame pointer used @@ -175,22 +205,22 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, * Note, -mfentry does not use frame pointers, and this test * is not needed if CC_USING_FENTRY is set. */ - if (unlikely(current->ret_stack[index].fp != frame_pointer)) { + if (unlikely(ret_stack->fp != frame_pointer)) { ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" " from func %ps return to %lx\n", current->ret_stack[index].fp, frame_pointer, - (void *)current->ret_stack[index].func, - current->ret_stack[index].ret); + (void *)ret_stack->func, + ret_stack->ret); *ret = (unsigned long)panic; return; } #endif - *ret = current->ret_stack[index].ret; - trace->func = current->ret_stack[index].func; - trace->calltime = current->ret_stack[index].calltime; + *ret = ret_stack->ret; + trace->func = ret_stack->func; + trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); trace->depth = current->curr_ret_depth--; /* @@ -251,7 +281,7 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs * curr_ret_stack is after that. */ barrier(); - current->curr_ret_stack--; + RET_STACK_DEC(current->curr_ret_stack); if (unlikely(!ret)) { ftrace_graph_stop(); @@ -294,12 +324,13 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int idx) { - idx = task->curr_ret_stack - idx; + int index = task->curr_ret_stack; - if (idx >= 0 && idx <= task->curr_ret_stack) - return &task->ret_stack[idx]; + index -= FGRAPH_FRAME_OFFSET * (idx + 1); + if (index < 0) + return NULL; - return NULL; + return RET_STACK(task, index); } /** @@ -321,18 +352,20 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx) unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { + struct ftrace_ret_stack *ret_stack; int index = task->curr_ret_stack; int i; if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) return ret; - if (index < 0) - return ret; + RET_STACK_DEC(index); - for (i = 0; i <= index; i++) - if (task->ret_stack[i].retp == retp) - return task->ret_stack[i].ret; + for (i = index; i >= 0; RET_STACK_DEC(i)) { + ret_stack = RET_STACK(task, i); + if (ret_stack->retp == retp) + return ret_stack->ret; + } return ret; } @@ -346,14 +379,15 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, return ret; task_idx = task->curr_ret_stack; + RET_STACK_DEC(task_idx); if (!task->ret_stack || task_idx < *idx) return ret; task_idx -= *idx; - (*idx)++; + RET_STACK_INC(*idx); - return task->ret_stack[task_idx].ret; + return RET_STACK(task, task_idx); } #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ @@ -391,7 +425,7 @@ trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub; static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub; /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */ -static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) +static int alloc_retstack_tasklist(unsigned long **ret_stack_list) { int i; int ret = 0; @@ -399,10 +433,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) struct task_struct *g, *t; for (i = 0; i < FTRACE_RETSTACK_ALLOC_SIZE; i++) { - ret_stack_list[i] = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack_list[i] = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list[i]) { start = 0; end = i; @@ -420,9 +451,9 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list) if (t->ret_stack == NULL) { atomic_set(&t->trace_overrun, 0); - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; - /* Make sure the tasks see the -1 first: */ + /* Make sure the tasks see the 0 first: */ smp_wmb(); t->ret_stack = ret_stack_list[start++]; } @@ -442,6 +473,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, struct task_struct *next, unsigned int prev_state) { + struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; int index; @@ -466,8 +498,11 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, */ timestamp -= next->ftrace_timestamp; - for (index = next->curr_ret_stack; index >= 0; index--) - next->ret_stack[index].calltime += timestamp; + for (index = next->curr_ret_stack - FGRAPH_FRAME_OFFSET; index >= 0; ) { + ret_stack = RET_STACK(next, index); + ret_stack->calltime += timestamp; + index -= FGRAPH_FRAME_OFFSET; + } } static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace) @@ -510,10 +545,10 @@ void update_function_graph_func(void) ftrace_graph_entry = __ftrace_graph_entry; } -static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack); +static DEFINE_PER_CPU(unsigned long *, idle_ret_stack); static void -graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) +graph_init_task(struct task_struct *t, unsigned long *ret_stack) { atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; @@ -528,7 +563,7 @@ graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack) */ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; /* * The idle task has no parent, it either has its own @@ -538,14 +573,11 @@ void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu)); if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; ret_stack = per_cpu(idle_ret_stack, cpu); if (!ret_stack) { - ret_stack = - kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; per_cpu(idle_ret_stack, cpu) = ret_stack; @@ -559,15 +591,13 @@ void ftrace_graph_init_task(struct task_struct *t) { /* Make sure we do not use the parent ret_stack */ t->ret_stack = NULL; - t->curr_ret_stack = -1; + t->curr_ret_stack = 0; t->curr_ret_depth = -1; if (ftrace_graph_active) { - struct ftrace_ret_stack *ret_stack; + unsigned long *ret_stack; - ret_stack = kmalloc_array(FTRACE_RETFUNC_DEPTH, - sizeof(struct ftrace_ret_stack), - GFP_KERNEL); + ret_stack = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack) return; graph_init_task(t, ret_stack); @@ -576,7 +606,7 @@ void ftrace_graph_init_task(struct task_struct *t) void ftrace_graph_exit_task(struct task_struct *t) { - struct ftrace_ret_stack *ret_stack = t->ret_stack; + unsigned long *ret_stack = t->ret_stack; t->ret_stack = NULL; /* NULL must become visible to IRQs before we free it: */ @@ -588,12 +618,10 @@ void ftrace_graph_exit_task(struct task_struct *t) /* Allocate a return stack for each task */ static int start_graph_tracing(void) { - struct ftrace_ret_stack **ret_stack_list; + unsigned long **ret_stack_list; int ret, cpu; - ret_stack_list = kmalloc_array(FTRACE_RETSTACK_ALLOC_SIZE, - sizeof(struct ftrace_ret_stack *), - GFP_KERNEL); + ret_stack_list = kmalloc(SHADOW_STACK_SIZE, GFP_KERNEL); if (!ret_stack_list) return -ENOMEM; -- cgit v1.2.3 From 7aa1eaef9f4282c9acd39588b1fdc9dda7e73f34 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:08 -0400 Subject: function_graph: Allow multiple users to attach to function graph Allow for multiple users to attach to function graph tracer at the same time. Only 16 simultaneous users can attach to the tracer. This is because there's an array that stores the pointers to the attached fgraph_ops. When a function being traced is entered, each of the ftrace_ops entryfunc is called and if it returns non zero, its index into the array will be added to the shadow stack. On exit of the function being traced, the shadow stack will contain the indexes of the ftrace_ops on the array that want their retfunc to be called. Because a function may sleep for a long time (if a task sleeps itself), the return of the function may be literally days later. If the ftrace_ops is removed, its place on the array is replaced with a ftrace_ops that contains the stub functions and that will be called when the function finally returns. If another ftrace_ops is added that happens to get the same index into the array, its return function may be called. But that's actually the way things current work with the old function graph tracer. If one tracer is removed and another is added, the new one will get the return calls of the function traced by the previous one, thus this is not a regression. This can be fixed by adding a counter to each time the array item is updated and save that on the shadow stack as well, such that it won't be called if the index saved does not match the index on the array. Note, being able to filter functions when both are called is not completely handled yet, but that shouldn't be too hard to manage. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509096221.162236.8806372072523195752.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190821.555493396@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 +- kernel/trace/fgraph.c | 379 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 304 insertions(+), 78 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 800995c425e0..8f00a7415bd5 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1038,6 +1038,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + int idx; }; /* @@ -1072,7 +1073,7 @@ function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp); struct ftrace_ret_stack * -ftrace_graph_get_ret_stack(struct task_struct *task, int idx); +ftrace_graph_get_ret_stack(struct task_struct *task, int skip); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d2ce5d651cf0..aae51f746828 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -7,6 +7,7 @@ * * Highly modified by Steven Rostedt (VMware). */ +#include #include #include #include @@ -28,35 +29,177 @@ /* * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame - * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack - * SHADOW_STACK_OFFSET: The size in long words of the shadow stack - * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added */ #define FGRAPH_FRAME_SIZE sizeof(struct ftrace_ret_stack) #define FGRAPH_FRAME_OFFSET DIV_ROUND_UP(FGRAPH_FRAME_SIZE, sizeof(long)) -#define SHADOW_STACK_SIZE (PAGE_SIZE) -#define SHADOW_STACK_OFFSET \ - (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long)) -/* Leave on a buffer at the end */ -#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_OFFSET - FGRAPH_FRAME_OFFSET) /* - * RET_STACK(): Return the frame from a given @offset from task @t - * RET_STACK_INC(): Reserve one frame size on the stack. - * RET_STACK_DEC(): Remove one frame size from the stack. + * On entry to a function (via function_graph_enter()), a new fgraph frame + * (ftrace_ret_stack) is pushed onto the stack as well as a word that + * holds a bitmask and a type (called "bitmap"). The bitmap is defined as: + * + * bits: 0 - 9 offset in words from the previous ftrace_ret_stack + * Currently, this will always be set to FGRAPH_FRAME_OFFSET + * to get to the fgraph frame. + * + * bits: 10 - 11 Type of storage + * 0 - reserved + * 1 - bitmap of fgraph_array index + * + * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP): + * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index + * That is, it's a bitmask of 0-15 (16 bits) + * where if a corresponding ops in the fgraph_array[] + * expects a callback from the return of the function + * it's corresponding bit will be set. + * + * + * The top of the ret_stack (when not empty) will always have a reference + * word that points to the last fgraph frame that was saved. + * + * That is, at the end of function_graph_enter, if the first and forth + * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called + * on the return of the function being traced, this is what will be on the + * task's shadow ret_stack: (the stack grows upward) + * + * ret_stack[SHADOW_STACK_MAX_OFFSET] + * ... + * | | <- task->curr_ret_stack + * +--------------------------------------------+ + * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ | + * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ | + * | (offset:FGRAPH_FRAME_OFFSET) | <- the offset is from here + * +--------------------------------------------+ + * | struct ftrace_ret_stack | + * | (stores the saved ret pointer) | <- the offset points here + * +--------------------------------------------+ + * | (X) | (N) | ( N words away from + * | | previous ret_stack) + * ... + * ret_stack[0] + * + * If a backtrace is required, and the real return pointer needs to be + * fetched, then it looks at the task's curr_ret_stack offset, if it + * is greater than zero (reserved, or right before popped), it would mask + * the value by FGRAPH_FRAME_OFFSET_MASK to get the offset of the + * ftrace_ret_stack structure stored on the shadow stack. + */ + +/* + * The following is for the top word on the stack: + * + * FGRAPH_FRAME_OFFSET (0-9) holds the offset delta to the fgraph frame + * FGRAPH_TYPE (10-11) holds the type of word this is. + * (RESERVED or BITMAP) + */ +#define FGRAPH_FRAME_OFFSET_BITS 10 +#define FGRAPH_FRAME_OFFSET_MASK GENMASK(FGRAPH_FRAME_OFFSET_BITS - 1, 0) + +#define FGRAPH_TYPE_BITS 2 +#define FGRAPH_TYPE_MASK GENMASK(FGRAPH_TYPE_BITS - 1, 0) +#define FGRAPH_TYPE_SHIFT FGRAPH_FRAME_OFFSET_BITS + +enum { + FGRAPH_TYPE_RESERVED = 0, + FGRAPH_TYPE_BITMAP = 1, +}; + +/* + * For BITMAP type: + * FGRAPH_INDEX (12-27) bits holding the gops index wanting return callback called + */ +#define FGRAPH_INDEX_BITS 16 +#define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0) +#define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) + +#define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS + +/* + * SHADOW_STACK_SIZE: The size in bytes of the entire shadow stack + * SHADOW_STACK_OFFSET: The size in long words of the shadow stack + * SHADOW_STACK_MAX_OFFSET: The max offset of the stack for a new frame to be added */ -#define RET_STACK(t, index) ((struct ftrace_ret_stack *)(&(t)->ret_stack[index])) -#define RET_STACK_INC(c) ({ c += FGRAPH_FRAME_OFFSET; }) -#define RET_STACK_DEC(c) ({ c -= FGRAPH_FRAME_OFFSET; }) +#define SHADOW_STACK_SIZE (PAGE_SIZE) +#define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) +/* Leave on a buffer at the end */ +#define SHADOW_STACK_MAX_OFFSET (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1)) + +/* RET_STACK(): Return the frame from a given @offset from task @t */ +#define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset])) DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; static int fgraph_array_cnt; -#define FGRAPH_ARRAY_SIZE 16 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE]; +/* Get the FRAME_OFFSET from the word from the @offset on ret_stack */ +static inline int get_frame_offset(struct task_struct *t, int offset) +{ + return t->ret_stack[offset] & FGRAPH_FRAME_OFFSET_MASK; +} + +/* Get FGRAPH_TYPE from the word from the @offset at ret_stack */ +static inline int get_fgraph_type(struct task_struct *t, int offset) +{ + return (t->ret_stack[offset] >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK; +} + +/* For BITMAP type: get the bitmask from the @offset at ret_stack */ +static inline unsigned long +get_bitmap_bits(struct task_struct *t, int offset) +{ + return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK; +} + +/* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */ +static inline void +set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) +{ + t->ret_stack[offset] = (bitmap << FGRAPH_INDEX_SHIFT) | + (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; +} + +/* + * @offset: The offset into @t->ret_stack to find the ret_stack entry + * @frame_offset: Where to place the offset into @t->ret_stack of that entry + * + * Returns a pointer to the previous ret_stack below @offset or NULL + * when it reaches the bottom of the stack. + * + * Calling this with: + * + * offset = task->curr_ret_stack; + * do { + * ret_stack = get_ret_stack(task, offset, &offset); + * } while (ret_stack); + * + * Will iterate through all the ret_stack entries from curr_ret_stack + * down to the first one. + */ +static inline struct ftrace_ret_stack * +get_ret_stack(struct task_struct *t, int offset, int *frame_offset) +{ + int offs; + + BUILD_BUG_ON(FGRAPH_FRAME_SIZE % sizeof(long)); + + if (unlikely(offset <= 0)) + return NULL; + + offs = get_frame_offset(t, --offset); + if (WARN_ON_ONCE(offs <= 0 || offs > offset)) + return NULL; + + offset -= offs; + + *frame_offset = offset; + return RET_STACK(t, offset); +} + /* Both enabled by default (can be cleared by function_graph tracer flags */ static bool fgraph_sleep_time = true; @@ -110,11 +253,13 @@ void ftrace_graph_stop(void) /* Add a function return address to the trace stack on thread info.*/ static int ftrace_push_return_trace(unsigned long ret, unsigned long func, - unsigned long frame_pointer, unsigned long *retp) + unsigned long frame_pointer, unsigned long *retp, + int fgraph_idx) { struct ftrace_ret_stack *ret_stack; unsigned long long calltime; - int index; + unsigned long val; + int offset; if (unlikely(ftrace_graph_is_dead())) return -EBUSY; @@ -124,24 +269,57 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long)); + /* Set val to "reserved" with the delta to the new fgraph frame */ + val = (FGRAPH_TYPE_RESERVED << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; + /* * We must make sure the ret_stack is tested before we read * anything else. */ smp_rmb(); - /* The return trace stack is full */ - if (current->curr_ret_stack >= SHADOW_STACK_MAX_INDEX) { + /* + * Check if there's room on the shadow stack to fit a fraph frame + * and a bitmap word. + */ + if (current->curr_ret_stack + FGRAPH_FRAME_OFFSET + 1 >= SHADOW_STACK_MAX_OFFSET) { atomic_inc(¤t->trace_overrun); return -EBUSY; } calltime = trace_clock_local(); - index = current->curr_ret_stack; - RET_STACK_INC(current->curr_ret_stack); - ret_stack = RET_STACK(current, index); + offset = READ_ONCE(current->curr_ret_stack); + ret_stack = RET_STACK(current, offset); + offset += FGRAPH_FRAME_OFFSET; + + /* ret offset = FGRAPH_FRAME_OFFSET ; type = reserved */ + current->ret_stack[offset] = val; + ret_stack->ret = ret; + /* + * The unwinders expect curr_ret_stack to point to either zero + * or an offset where to find the next ret_stack. Even though the + * ret stack might be bogus, we want to write the ret and the + * offset to find the ret_stack before we increment the stack point. + * If an interrupt comes in now before we increment the curr_ret_stack + * it may blow away what we wrote. But that's fine, because the + * offset will still be correct (even though the 'ret' won't be). + * What we worry about is the offset being correct after we increment + * the curr_ret_stack and before we update that offset, as if an + * interrupt comes in and does an unwind stack dump, it will need + * at least a correct offset! + */ barrier(); + WRITE_ONCE(current->curr_ret_stack, offset + 1); + /* + * This next barrier is to ensure that an interrupt coming in + * will not corrupt what we are about to write. + */ + barrier(); + + /* Still keep it reserved even if an interrupt came in */ + current->ret_stack[offset] = val; + ret_stack->ret = ret; ret_stack->func = func; ret_stack->calltime = calltime; @@ -151,7 +329,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR ret_stack->retp = retp; #endif - return 0; + return offset; } /* @@ -168,49 +346,67 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, # define MCOUNT_INSN_SIZE 0 #endif +/* If the caller does not use ftrace, call this function. */ int function_graph_enter(unsigned long ret, unsigned long func, unsigned long frame_pointer, unsigned long *retp) { struct ftrace_graph_ent trace; + unsigned long bitmap = 0; + int offset; + int i; trace.func = func; trace.depth = ++current->curr_ret_depth; - if (ftrace_push_return_trace(ret, func, frame_pointer, retp)) + offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, 0); + if (offset < 0) goto out; - /* Only trace if the calling function expects to */ - if (!fgraph_array[0]->entryfunc(&trace)) + for (i = 0; i < fgraph_array_cnt; i++) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (gops == &fgraph_stub) + continue; + + if (gops->entryfunc(&trace)) + bitmap |= BIT(i); + } + + if (!bitmap) goto out_ret; + /* + * Since this function uses fgraph_idx = 0 as a tail-call checking + * flag, set that bit always. + */ + set_bitmap(current, offset, bitmap | BIT(0)); + return 0; out_ret: - RET_STACK_DEC(current->curr_ret_stack); + current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; out: current->curr_ret_depth--; return -EBUSY; } /* Retrieve a function return address to the trace stack on thread info.*/ -static void +static struct ftrace_ret_stack * ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, - unsigned long frame_pointer) + unsigned long frame_pointer, int *offset) { struct ftrace_ret_stack *ret_stack; - int index; - index = current->curr_ret_stack; - RET_STACK_DEC(index); + ret_stack = get_ret_stack(current, current->curr_ret_stack, offset); - if (unlikely(index < 0 || index > SHADOW_STACK_MAX_INDEX)) { + if (unlikely(!ret_stack)) { ftrace_graph_stop(); - WARN_ON(1); + WARN(1, "Bad function graph ret_stack pointer: %d", + current->curr_ret_stack); /* Might as well panic, otherwise we have no where to go */ *ret = (unsigned long)panic; - return; + return NULL; } - ret_stack = RET_STACK(current, index); #ifdef HAVE_FUNCTION_GRAPH_FP_TEST /* * The arch may choose to record the frame pointer used @@ -230,26 +426,29 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret, ftrace_graph_stop(); WARN(1, "Bad frame pointer: expected %lx, received %lx\n" " from func %ps return to %lx\n", - current->ret_stack[index].fp, + ret_stack->fp, frame_pointer, (void *)ret_stack->func, ret_stack->ret); *ret = (unsigned long)panic; - return; + return NULL; } #endif + *offset += FGRAPH_FRAME_OFFSET; *ret = ret_stack->ret; trace->func = ret_stack->func; trace->calltime = ret_stack->calltime; trace->overrun = atomic_read(¤t->trace_overrun); - trace->depth = current->curr_ret_depth--; + trace->depth = current->curr_ret_depth; /* * We still want to trace interrupts coming in if * max_depth is set to 1. Make sure the decrement is * seen before ftrace_graph_return. */ barrier(); + + return ret_stack; } /* @@ -287,30 +486,47 @@ struct fgraph_ret_regs; static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs, unsigned long frame_pointer) { + struct ftrace_ret_stack *ret_stack; struct ftrace_graph_ret trace; + unsigned long bitmap; unsigned long ret; + int offset; + int i; + + ret_stack = ftrace_pop_return_trace(&trace, &ret, frame_pointer, &offset); + + if (unlikely(!ret_stack)) { + ftrace_graph_stop(); + WARN_ON(1); + /* Might as well panic. What else to do? */ + return (unsigned long)panic; + } - ftrace_pop_return_trace(&trace, &ret, frame_pointer); + trace.rettime = trace_clock_local(); #ifdef CONFIG_FUNCTION_GRAPH_RETVAL trace.retval = fgraph_ret_regs_return_value(ret_regs); #endif - trace.rettime = trace_clock_local(); - fgraph_array[0]->retfunc(&trace); + + bitmap = get_bitmap_bits(current, offset); + for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) { + struct fgraph_ops *gops = fgraph_array[i]; + + if (!(bitmap & BIT(i))) + continue; + if (gops == &fgraph_stub) + continue; + + gops->retfunc(&trace); + } + /* * The ftrace_graph_return() may still access the current * ret_stack structure, we need to make sure the update of * curr_ret_stack is after that. */ barrier(); - RET_STACK_DEC(current->curr_ret_stack); - - if (unlikely(!ret)) { - ftrace_graph_stop(); - WARN_ON(1); - /* Might as well panic. What else to do? */ - ret = (unsigned long)panic; - } - + current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; + current->curr_ret_depth--; return ret; } @@ -333,7 +549,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) /** * ftrace_graph_get_ret_stack - return the entry of the shadow stack - * @task: The task to read the shadow stack from + * @task: The task to read the shadow stack from. * @idx: Index down the shadow stack * * Return the ret_struct on the shadow stack of the @task at the @@ -345,15 +561,17 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer) struct ftrace_ret_stack * ftrace_graph_get_ret_stack(struct task_struct *task, int idx) { - int index = task->curr_ret_stack; + struct ftrace_ret_stack *ret_stack = NULL; + int offset = task->curr_ret_stack; - BUILD_BUG_ON(FGRAPH_FRAME_SIZE % sizeof(long)); - - index -= FGRAPH_FRAME_OFFSET * (idx + 1); - if (index < 0) + if (offset < 0) return NULL; - return RET_STACK(task, index); + do { + ret_stack = get_ret_stack(task, offset, &offset); + } while (ret_stack && --idx >= 0); + + return ret_stack; } /** @@ -376,16 +594,15 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { struct ftrace_ret_stack *ret_stack; - int index = task->curr_ret_stack; - int i; + int i = task->curr_ret_stack; if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) return ret; - RET_STACK_DEC(index); - - for (i = index; i >= 0; RET_STACK_DEC(i)) { - ret_stack = RET_STACK(task, i); + while (i > 0) { + ret_stack = get_ret_stack(current, i, &i); + if (!ret_stack) + break; if (ret_stack->retp == retp) return ret_stack->ret; } @@ -396,21 +613,26 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { - int task_idx; + struct ftrace_ret_stack *ret_stack; + int offset = task->curr_ret_stack; + int i; if (ret != (unsigned long)dereference_kernel_function_descriptor(return_to_handler)) return ret; - task_idx = task->curr_ret_stack; - RET_STACK_DEC(task_idx); - - if (!task->ret_stack || task_idx < *idx) + if (!idx) return ret; - task_idx -= *idx; - RET_STACK_INC(*idx); + i = *idx; + do { + ret_stack = get_ret_stack(task, offset, &offset); + i--; + } while (i >= 0 && ret_stack); + + if (ret_stack) + return ret_stack->ret; - return RET_STACK(task, task_idx); + return ret; } #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ @@ -493,7 +715,7 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, { struct ftrace_ret_stack *ret_stack; unsigned long long timestamp; - int index; + int offset; /* * Does the user want to count the time a function was asleep. @@ -516,10 +738,10 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt, */ timestamp -= next->ftrace_timestamp; - for (index = next->curr_ret_stack - FGRAPH_FRAME_OFFSET; index >= 0; ) { - ret_stack = RET_STACK(next, index); - ret_stack->calltime += timestamp; - index -= FGRAPH_FRAME_OFFSET; + for (offset = next->curr_ret_stack; offset > 0; ) { + ret_stack = get_ret_stack(next, offset, &offset); + if (ret_stack) + ret_stack->calltime += timestamp; } } @@ -570,6 +792,8 @@ graph_init_task(struct task_struct *t, unsigned long *ret_stack) { atomic_set(&t->trace_overrun, 0); t->ftrace_timestamp = 0; + t->curr_ret_stack = 0; + t->curr_ret_depth = -1; /* make curr_ret_stack visible before we add the ret_stack */ smp_wmb(); t->ret_stack = ret_stack; @@ -691,6 +915,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) fgraph_array[i] = gops; if (i + 1 > fgraph_array_cnt) fgraph_array_cnt = i + 1; + gops->idx = i; ftrace_graph_active++; -- cgit v1.2.3 From 37238abe3cb47b8daaa8706c9949f67b2a705cf1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:11 -0400 Subject: ftrace/function_graph: Pass fgraph_ops to function graph callbacks Pass the fgraph_ops structure to the function graph callbacks. This will allow callbacks to add a descriptor to a fgraph_ops private field that wil be added in the future and use it for the callbacks. This will be useful when more than one callback can be registered to the function graph tracer. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509098588.162236.4787930115997357578.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.035147698@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 10 +++++++--- kernel/trace/fgraph.c | 16 +++++++++------- kernel/trace/ftrace.c | 6 ++++-- kernel/trace/trace.h | 4 ++-- kernel/trace/trace_functions_graph.c | 11 +++++++---- kernel/trace/trace_irqsoff.c | 6 ++++-- kernel/trace/trace_sched_wakeup.c | 6 ++++-- kernel/trace/trace_selftest.c | 5 +++-- 8 files changed, 40 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8f00a7415bd5..4d817b85ac0d 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1027,11 +1027,15 @@ struct ftrace_graph_ret { unsigned long long rettime; } __packed; +struct fgraph_ops; + /* Type of the callback handlers for tracing function graph*/ -typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */ -typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */ +typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *, + struct fgraph_ops *); /* return */ +typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, + struct fgraph_ops *); /* entry */ -extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace); +extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 2b52afa03ab4..54ed2ed2036b 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -164,13 +164,13 @@ set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) } /* ftrace_graph_entry set to this to tell some archs to run function graph */ -static int entry_run(struct ftrace_graph_ent *trace) +static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) { return 0; } /* ftrace_graph_return set to this to tell some archs to run function graph */ -static void return_run(struct ftrace_graph_ret *trace) +static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops) { } @@ -234,12 +234,14 @@ int __weak ftrace_disable_ftrace_graph_caller(void) } #endif -int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace) +int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { return 0; } -static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace) +static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { } @@ -379,7 +381,7 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (gops == &fgraph_stub) continue; - if (gops->entryfunc(&trace)) + if (gops->entryfunc(&trace, gops)) bitmap |= BIT(i); } @@ -527,7 +529,7 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs if (gops == &fgraph_stub) continue; - gops->retfunc(&trace); + gops->retfunc(&trace, gops); } /* @@ -681,7 +683,7 @@ void ftrace_graph_sleep_time_control(bool enable) * Simply points to ftrace_stub, but with the proper protocol. * Defined by the linker script in linux/vmlinux.lds.h */ -extern void ftrace_stub_graph(struct ftrace_graph_ret *); +void ftrace_stub_graph(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); /* The callbacks that hook a function */ trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 789950a4f977..d18387c0642d 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -815,7 +815,8 @@ void ftrace_graph_graph_time_control(bool enable) fgraph_graph_time = enable; } -static int profile_graph_entry(struct ftrace_graph_ent *trace) +static int profile_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct ftrace_ret_stack *ret_stack; @@ -832,7 +833,8 @@ static int profile_graph_entry(struct ftrace_graph_ent *trace) return 1; } -static void profile_graph_return(struct ftrace_graph_ret *trace) +static void profile_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct ftrace_ret_stack *ret_stack; struct ftrace_profile_stat *stat; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 749a182dab48..2575ec243350 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -679,8 +679,8 @@ void trace_latency_header(struct seq_file *m); void trace_default_header(struct seq_file *m); void print_trace_header(struct seq_file *m, struct trace_iterator *iter); -void trace_graph_return(struct ftrace_graph_ret *trace); -int trace_graph_entry(struct ftrace_graph_ent *trace); +void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); +int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index c35fbaab2a47..b7b142b65299 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -129,7 +129,8 @@ static inline int ftrace_graph_ignore_irqs(void) return in_hardirq(); } -int trace_graph_entry(struct ftrace_graph_ent *trace) +int trace_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct trace_array *tr = graph_array; struct trace_array_cpu *data; @@ -238,7 +239,8 @@ void __trace_graph_return(struct trace_array *tr, trace_buffer_unlock_commit_nostack(buffer, event); } -void trace_graph_return(struct ftrace_graph_ret *trace) +void trace_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct trace_array *tr = graph_array; struct trace_array_cpu *data; @@ -275,7 +277,8 @@ void set_graph_array(struct trace_array *tr) smp_mb(); } -static void trace_graph_thresh_return(struct ftrace_graph_ret *trace) +static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { ftrace_graph_addr_finish(trace); @@ -288,7 +291,7 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace) (trace->rettime - trace->calltime < tracing_thresh)) return; else - trace_graph_return(trace); + trace_graph_return(trace, gops); } static struct fgraph_ops funcgraph_thresh_ops = { diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index ba37f768e2f2..5478f4c4f708 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -175,7 +175,8 @@ static int irqsoff_display_graph(struct trace_array *tr, int set) return start_irqsoff_tracer(irqsoff_trace, set); } -static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) +static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; @@ -205,7 +206,8 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace) return ret; } -static void irqsoff_graph_return(struct ftrace_graph_ret *trace) +static void irqsoff_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct trace_array *tr = irqsoff_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 0469a04a355f..49bcc812652c 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -112,7 +112,8 @@ static int wakeup_display_graph(struct trace_array *tr, int set) return start_func_tracer(tr, set); } -static int wakeup_graph_entry(struct ftrace_graph_ent *trace) +static int wakeup_graph_entry(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; @@ -141,7 +142,8 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace) return ret; } -static void wakeup_graph_return(struct ftrace_graph_ret *trace) +static void wakeup_graph_return(struct ftrace_graph_ret *trace, + struct fgraph_ops *gops) { struct trace_array *tr = wakeup_trace; struct trace_array_cpu *data; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index e9c5058a8efd..56f269c0560a 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -762,7 +762,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) static unsigned int graph_hang_thresh; /* Wrap the real function entry probe to avoid possible hanging */ -static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) +static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) { /* This is harmlessly racy, we want to approximately detect a hang */ if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) { @@ -776,7 +777,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace) return 0; } - return trace_graph_entry(trace); + return trace_graph_entry(trace, gops); } static struct fgraph_ops fgraph_ops __initdata = { -- cgit v1.2.3 From 26dda5631d1bb2f254f4c94aa87ee6c92a89cfdb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:12 -0400 Subject: ftrace: Allow function_graph tracer to be enabled in instances Now that function graph tracing can handle more than one user, allow it to be enabled in the ftrace instances. Note, the filtering of the functions is still joined by the top level set_ftrace_filter and friends, as well as the graph and nograph files. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509099743.162236.1699959255446248163.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.190630762@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + kernel/trace/ftrace.c | 1 + kernel/trace/trace.h | 13 +++++++- kernel/trace/trace_functions.c | 8 +++++ kernel/trace/trace_functions_graph.c | 65 ++++++++++++++++++++++-------------- kernel/trace/trace_selftest.c | 4 +-- 6 files changed, 64 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4d817b85ac0d..fd656e6d6b7c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1042,6 +1042,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + void *private; int idx; }; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d18387c0642d..b85f00b0ffe7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7327,6 +7327,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr) tr->ops = &global_ops; tr->ops->private = tr; ftrace_init_trace_array(tr); + init_array_fgraph_ops(tr); } void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 2575ec243350..a5070f9b977b 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -397,6 +397,9 @@ struct trace_array { struct ftrace_ops *ops; struct trace_pid_list __rcu *function_pids; struct trace_pid_list __rcu *function_no_pids; +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + struct fgraph_ops *gops; +#endif #ifdef CONFIG_DYNAMIC_FTRACE /* All of these are protected by the ftrace_lock */ struct list_head func_probes; @@ -681,7 +684,6 @@ void print_trace_header(struct seq_file *m, struct trace_iterator *iter); void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops); int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); -void set_graph_array(struct trace_array *tr); void tracing_start_cmdline_record(void); void tracing_stop_cmdline_record(void); @@ -892,6 +894,9 @@ extern int __trace_graph_entry(struct trace_array *tr, extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx); +extern void init_array_fgraph_ops(struct trace_array *tr); +extern int allocate_fgraph_ops(struct trace_array *tr); +extern void free_fgraph_ops(struct trace_array *tr); #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; @@ -1004,6 +1009,12 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } +static inline void init_array_fgraph_ops(struct trace_array *tr) { } +static inline int allocate_fgraph_ops(struct trace_array *tr) +{ + return 0; +} +static inline void free_fgraph_ops(struct trace_array *tr) { } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 9f1bfbe105e8..8e8da0d0ee52 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -80,6 +80,7 @@ void ftrace_free_ftrace_ops(struct trace_array *tr) int ftrace_create_function_files(struct trace_array *tr, struct dentry *parent) { + int ret; /* * The top level array uses the "global_ops", and the files are * created on boot up. @@ -90,6 +91,12 @@ int ftrace_create_function_files(struct trace_array *tr, if (!tr->ops) return -EINVAL; + ret = allocate_fgraph_ops(tr); + if (ret) { + kfree(tr->ops); + return ret; + } + ftrace_create_filter_files(tr->ops, parent); return 0; @@ -99,6 +106,7 @@ void ftrace_destroy_function_files(struct trace_array *tr) { ftrace_destroy_filter_files(tr->ops); ftrace_free_ftrace_ops(tr); + free_fgraph_ops(tr); } static ftrace_func_t select_trace_function(u32 flags_val) diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index b7b142b65299..9ccc904a7703 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -83,8 +83,6 @@ static struct tracer_flags tracer_flags = { .opts = trace_opts }; -static struct trace_array *graph_array; - /* * DURATION column is being also used to display IRQ signs, * following values are used by print_graph_irq and others @@ -132,7 +130,7 @@ static inline int ftrace_graph_ignore_irqs(void) int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { - struct trace_array *tr = graph_array; + struct trace_array *tr = gops->private; struct trace_array_cpu *data; unsigned long flags; unsigned int trace_ctx; @@ -242,7 +240,7 @@ void __trace_graph_return(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { - struct trace_array *tr = graph_array; + struct trace_array *tr = gops->private; struct trace_array_cpu *data; unsigned long flags; unsigned int trace_ctx; @@ -268,15 +266,6 @@ void trace_graph_return(struct ftrace_graph_ret *trace, local_irq_restore(flags); } -void set_graph_array(struct trace_array *tr) -{ - graph_array = tr; - - /* Make graph_array visible before we start tracing */ - - smp_mb(); -} - static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { @@ -294,25 +283,53 @@ static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, trace_graph_return(trace, gops); } -static struct fgraph_ops funcgraph_thresh_ops = { - .entryfunc = &trace_graph_entry, - .retfunc = &trace_graph_thresh_return, -}; - static struct fgraph_ops funcgraph_ops = { .entryfunc = &trace_graph_entry, .retfunc = &trace_graph_return, }; +int allocate_fgraph_ops(struct trace_array *tr) +{ + struct fgraph_ops *gops; + + gops = kzalloc(sizeof(*gops), GFP_KERNEL); + if (!gops) + return -ENOMEM; + + gops->entryfunc = &trace_graph_entry; + gops->retfunc = &trace_graph_return; + + tr->gops = gops; + gops->private = tr; + return 0; +} + +void free_fgraph_ops(struct trace_array *tr) +{ + kfree(tr->gops); +} + +__init void init_array_fgraph_ops(struct trace_array *tr) +{ + tr->gops = &funcgraph_ops; + funcgraph_ops.private = tr; +} + static int graph_trace_init(struct trace_array *tr) { int ret; - set_graph_array(tr); + tr->gops->entryfunc = trace_graph_entry; + if (tracing_thresh) - ret = register_ftrace_graph(&funcgraph_thresh_ops); + tr->gops->retfunc = trace_graph_thresh_return; else - ret = register_ftrace_graph(&funcgraph_ops); + tr->gops->retfunc = trace_graph_return; + + /* Make gops functions are visible before we start tracing */ + smp_mb(); + + ret = register_ftrace_graph(tr->gops); if (ret) return ret; tracing_start_cmdline_record(); @@ -323,10 +340,7 @@ static int graph_trace_init(struct trace_array *tr) static void graph_trace_reset(struct trace_array *tr) { tracing_stop_cmdline_record(); - if (tracing_thresh) - unregister_ftrace_graph(&funcgraph_thresh_ops); - else - unregister_ftrace_graph(&funcgraph_ops); + unregister_ftrace_graph(tr->gops); } static int graph_trace_update_thresh(struct trace_array *tr) @@ -1365,6 +1379,7 @@ static struct tracer graph_trace __tracer_data = { .print_header = print_graph_headers, .flags = &tracer_flags, .set_flag = func_graph_set_flag, + .allow_instances = true, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function_graph, #endif diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 56f269c0560a..f8f55fd79e53 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -813,7 +813,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, * to detect and recover from possible hangs */ tracing_reset_online_cpus(&tr->array_buffer); - set_graph_array(tr); + fgraph_ops.private = tr; ret = register_ftrace_graph(&fgraph_ops); if (ret) { warn_failed_init_tracer(trace, ret); @@ -856,7 +856,7 @@ trace_selftest_startup_function_graph(struct tracer *trace, cond_resched(); tracing_reset_online_cpus(&tr->array_buffer); - set_graph_array(tr); + fgraph_ops.private = tr; /* * Some archs *cough*PowerPC*cough* add characters to the -- cgit v1.2.3 From ab6b84630382914ffcbab59f4913c9a60971d034 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:13 -0400 Subject: ftrace: Allow ftrace startup flags to exist without dynamic ftrace Some of the flags for ftrace_startup() may be exposed even when CONFIG_DYNAMIC_FTRACE is not configured in. This is fine as the difference between dynamic ftrace and static ftrace is done within the internals of ftrace itself. No need to have use cases fail to compile because dynamic ftrace is disabled. This change is needed to move some of the logic of what is passed to ftrace_startup() out of the parameters of ftrace_startup(). Link: https://lore.kernel.org/linux-trace-kernel/171509100890.162236.4362350342549122222.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.350654104@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fd656e6d6b7c..586018744785 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -509,6 +509,15 @@ static inline void stack_tracer_disable(void) { } static inline void stack_tracer_enable(void) { } #endif +enum { + FTRACE_UPDATE_CALLS = (1 << 0), + FTRACE_DISABLE_CALLS = (1 << 1), + FTRACE_UPDATE_TRACE_FUNC = (1 << 2), + FTRACE_START_FUNC_RET = (1 << 3), + FTRACE_STOP_FUNC_RET = (1 << 4), + FTRACE_MAY_SLEEP = (1 << 5), +}; + #ifdef CONFIG_DYNAMIC_FTRACE void ftrace_arch_code_modify_prepare(void); @@ -603,15 +612,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); void ftrace_free_filter(struct ftrace_ops *ops); void ftrace_ops_set_global_filter(struct ftrace_ops *ops); -enum { - FTRACE_UPDATE_CALLS = (1 << 0), - FTRACE_DISABLE_CALLS = (1 << 1), - FTRACE_UPDATE_TRACE_FUNC = (1 << 2), - FTRACE_START_FUNC_RET = (1 << 3), - FTRACE_STOP_FUNC_RET = (1 << 4), - FTRACE_MAY_SLEEP = (1 << 5), -}; - /* * The FTRACE_UPDATE_* enum is used to pass information back * from the ftrace_update_record() and ftrace_test_record() -- cgit v1.2.3 From 5fccc7552ccbc521bad61653ee739b1196b1bc53 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:14 -0400 Subject: ftrace: Add subops logic to allow one ops to manage many There are cases where a single system will use a single function callback to handle multiple users. For example, to allow function_graph tracer to have multiple users where each can trace their own set of functions, it is useful to only have one ftrace_ops registered to ftrace that will call a function by the function_graph tracer to handle the multiplexing with the different registered function_graph tracers. Add a "subop_list" to the ftrace_ops that will hold a list of other ftrace_ops that the top ftrace_ops will manage. The function ftrace_startup_subops() that takes the manager ftrace_ops and a subop ftrace_ops it will manage. If there are no subops with the ftrace_ops yet, it will copy the ftrace_ops subop filters to the manager ftrace_ops and register that with ftrace_startup(), and adds the subop to its subop_list. If the manager ops already has something registered, it will then merge the new subop filters with what it has and enable the new functions that covers all the subops it has. To remove a subop, ftrace_shutdown_subops() is called which will use the subop_list of the manager ops to rebuild all the functions it needs to trace, and update the ftrace records to only call the functions it now has registered. If there are no more functions registered, it will then call ftrace_shutdown() to disable itself completely. Note, it is up to the manager ops callback to always make sure that the subops callbacks are called if its filter matches, as there are times in the update where the callback could be calling more functions than those that are currently registered. This could be updated to handle other systems other than function_graph, for example, fprobes could use this (but will need an interface to call ftrace_startup_subops()). Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.508431129@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + kernel/trace/fgraph.c | 3 +- kernel/trace/ftrace.c | 401 ++++++++++++++++++++++++++++++++++++++++- kernel/trace/ftrace_internal.h | 1 + kernel/trace/trace.h | 1 + 5 files changed, 405 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 586018744785..978a1d3b270a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -334,6 +334,7 @@ struct ftrace_ops { unsigned long trampoline; unsigned long trampoline_size; struct list_head list; + struct list_head subop_list; ftrace_ops_func_t ops_func; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS unsigned long direct_call; diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 54ed2ed2036b..e39042c40937 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -21,7 +21,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define ASSIGN_OPS_HASH(opsname, val) \ .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), + .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), \ + .subop_list = LIST_HEAD_INIT(opsname.subop_list), #else #define ASSIGN_OPS_HASH(opsname, val) #endif diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b85f00b0ffe7..63b8d1fe1dd8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -74,7 +74,8 @@ #ifdef CONFIG_DYNAMIC_FTRACE #define INIT_OPS_HASH(opsname) \ .func_hash = &opsname.local_hash, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), + .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), \ + .subop_list = LIST_HEAD_INIT(opsname.subop_list), #else #define INIT_OPS_HASH(opsname) #endif @@ -161,6 +162,7 @@ static inline void ftrace_ops_init(struct ftrace_ops *ops) #ifdef CONFIG_DYNAMIC_FTRACE if (!(ops->flags & FTRACE_OPS_FL_INITIALIZED)) { mutex_init(&ops->local_hash.regex_lock); + INIT_LIST_HEAD(&ops->subop_list); ops->func_hash = &ops->local_hash; ops->flags |= FTRACE_OPS_FL_INITIALIZED; } @@ -3164,6 +3166,403 @@ out: return 0; } +/* Simply make a copy of @src and return it */ +static struct ftrace_hash *copy_hash(struct ftrace_hash *src) +{ + if (ftrace_hash_empty(src)) + return EMPTY_HASH; + + return alloc_and_copy_ftrace_hash(src->size_bits, src); +} + +/* + * Append @new_hash entries to @hash: + * + * If @hash is the EMPTY_HASH then it traces all functions and nothing + * needs to be done. + * + * If @new_hash is the EMPTY_HASH, then make *hash the EMPTY_HASH so + * that it traces everything. + * + * Otherwise, go through all of @new_hash and add anything that @hash + * doesn't already have, to @hash. + * + * The filter_hash updates uses just the append_hash() function + * and the notrace_hash does not. + */ +static int append_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + /* An empty hash does everything */ + if (ftrace_hash_empty(*hash)) + return 0; + + /* If new_hash has everything make hash have everything */ + if (ftrace_hash_empty(new_hash)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + return 0; + } + + size = 1 << new_hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &new_hash->buckets[i], hlist) { + /* Only add if not already in hash */ + if (!__ftrace_lookup_ip(*hash, entry->ip) && + add_hash_entry(*hash, entry->ip) == NULL) + return -ENOMEM; + } + } + return 0; +} + +/* + * Add to @hash only those that are in both @new_hash1 and @new_hash2 + * + * The notrace_hash updates uses just the intersect_hash() function + * and the filter_hash does not. + */ +static int intersect_hash(struct ftrace_hash **hash, struct ftrace_hash *new_hash1, + struct ftrace_hash *new_hash2) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + /* + * If new_hash1 or new_hash2 is the EMPTY_HASH then make the hash + * empty as well as empty for notrace means none are notraced. + */ + if (ftrace_hash_empty(new_hash1) || ftrace_hash_empty(new_hash2)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + return 0; + } + + size = 1 << new_hash1->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &new_hash1->buckets[i], hlist) { + /* Only add if in both @new_hash1 and @new_hash2 */ + if (__ftrace_lookup_ip(new_hash2, entry->ip) && + add_hash_entry(*hash, entry->ip) == NULL) + return -ENOMEM; + } + } + /* If nothing intersects, make it the empty set */ + if (ftrace_hash_empty(*hash)) { + free_ftrace_hash(*hash); + *hash = EMPTY_HASH; + } + return 0; +} + +/* Return a new hash that has a union of all @ops->filter_hash entries */ +static struct ftrace_hash *append_hashes(struct ftrace_ops *ops) +{ + struct ftrace_hash *new_hash; + struct ftrace_ops *subops; + int ret; + + new_hash = alloc_ftrace_hash(ops->func_hash->filter_hash->size_bits); + if (!new_hash) + return NULL; + + list_for_each_entry(subops, &ops->subop_list, list) { + ret = append_hash(&new_hash, subops->func_hash->filter_hash); + if (ret < 0) { + free_ftrace_hash(new_hash); + return NULL; + } + /* Nothing more to do if new_hash is empty */ + if (ftrace_hash_empty(new_hash)) + break; + } + return new_hash; +} + +/* Make @ops trace evenything except what all its subops do not trace */ +static struct ftrace_hash *intersect_hashes(struct ftrace_ops *ops) +{ + struct ftrace_hash *new_hash = NULL; + struct ftrace_ops *subops; + int size_bits; + int ret; + + list_for_each_entry(subops, &ops->subop_list, list) { + struct ftrace_hash *next_hash; + + if (!new_hash) { + size_bits = subops->func_hash->notrace_hash->size_bits; + new_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->notrace_hash); + if (!new_hash) + return NULL; + continue; + } + size_bits = new_hash->size_bits; + next_hash = new_hash; + new_hash = alloc_ftrace_hash(size_bits); + ret = intersect_hash(&new_hash, next_hash, subops->func_hash->notrace_hash); + free_ftrace_hash(next_hash); + if (ret < 0) { + free_ftrace_hash(new_hash); + return NULL; + } + /* Nothing more to do if new_hash is empty */ + if (ftrace_hash_empty(new_hash)) + break; + } + return new_hash; +} + +static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) +{ + struct ftrace_func_entry *entry; + int size; + int i; + + if (ftrace_hash_empty(A)) + return ftrace_hash_empty(B); + + if (ftrace_hash_empty(B)) + return ftrace_hash_empty(A); + + if (A->count != B->count) + return false; + + size = 1 << A->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &A->buckets[i], hlist) { + if (!__ftrace_lookup_ip(B, entry->ip)) + return false; + } + } + + return true; +} + +static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, + struct ftrace_hash **orig_hash, + struct ftrace_hash *hash, + int enable); + +static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_hash, + struct ftrace_hash *notrace_hash) +{ + int ret; + + if (!ops_equal(filter_hash, ops->func_hash->filter_hash)) { + ret = ftrace_hash_move_and_update_ops(ops, &ops->func_hash->filter_hash, + filter_hash, 1); + if (ret < 0) + return ret; + } + + if (!ops_equal(notrace_hash, ops->func_hash->notrace_hash)) { + ret = ftrace_hash_move_and_update_ops(ops, &ops->func_hash->notrace_hash, + notrace_hash, 0); + if (ret < 0) + return ret; + } + + return 0; +} + +/** + * ftrace_startup_subops - enable tracing for subops of an ops + * @ops: Manager ops (used to pick all the functions of its subops) + * @subops: A new ops to add to @ops + * @command: Extra commands to use to enable tracing + * + * The @ops is a manager @ops that has the filter that includes all the functions + * that its list of subops are tracing. Adding a new @subops will add the + * functions of @subops to @ops. + */ +int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + struct ftrace_hash *filter_hash; + struct ftrace_hash *notrace_hash; + struct ftrace_hash *save_filter_hash; + struct ftrace_hash *save_notrace_hash; + int size_bits; + int ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + ftrace_ops_init(ops); + ftrace_ops_init(subops); + + if (WARN_ON_ONCE(subops->flags & FTRACE_OPS_FL_ENABLED)) + return -EBUSY; + + /* Make everything canonical (Just in case!) */ + if (!ops->func_hash->filter_hash) + ops->func_hash->filter_hash = EMPTY_HASH; + if (!ops->func_hash->notrace_hash) + ops->func_hash->notrace_hash = EMPTY_HASH; + if (!subops->func_hash->filter_hash) + subops->func_hash->filter_hash = EMPTY_HASH; + if (!subops->func_hash->notrace_hash) + subops->func_hash->notrace_hash = EMPTY_HASH; + + /* For the first subops to ops just enable it normally */ + if (list_empty(&ops->subop_list)) { + /* Just use the subops hashes */ + filter_hash = copy_hash(subops->func_hash->filter_hash); + notrace_hash = copy_hash(subops->func_hash->notrace_hash); + if (!filter_hash || !notrace_hash) { + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return -ENOMEM; + } + + save_filter_hash = ops->func_hash->filter_hash; + save_notrace_hash = ops->func_hash->notrace_hash; + + ops->func_hash->filter_hash = filter_hash; + ops->func_hash->notrace_hash = notrace_hash; + list_add(&subops->list, &ops->subop_list); + ret = ftrace_startup(ops, command); + if (ret < 0) { + list_del(&subops->list); + ops->func_hash->filter_hash = save_filter_hash; + ops->func_hash->notrace_hash = save_notrace_hash; + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + } else { + free_ftrace_hash(save_filter_hash); + free_ftrace_hash(save_notrace_hash); + subops->flags |= FTRACE_OPS_FL_ENABLED; + } + return ret; + } + + /* + * Here there's already something attached. Here are the rules: + * o If either filter_hash is empty then the final stays empty + * o Otherwise, the final is a superset of both hashes + * o If either notrace_hash is empty then the final stays empty + * o Otherwise, the final is an intersection between the hashes + */ + if (ftrace_hash_empty(ops->func_hash->filter_hash) || + ftrace_hash_empty(subops->func_hash->filter_hash)) { + filter_hash = EMPTY_HASH; + } else { + size_bits = max(ops->func_hash->filter_hash->size_bits, + subops->func_hash->filter_hash->size_bits); + filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash); + if (!filter_hash) + return -ENOMEM; + ret = append_hash(&filter_hash, subops->func_hash->filter_hash); + if (ret < 0) { + free_ftrace_hash(filter_hash); + return ret; + } + } + + if (ftrace_hash_empty(ops->func_hash->notrace_hash) || + ftrace_hash_empty(subops->func_hash->notrace_hash)) { + notrace_hash = EMPTY_HASH; + } else { + size_bits = max(ops->func_hash->filter_hash->size_bits, + subops->func_hash->filter_hash->size_bits); + notrace_hash = alloc_ftrace_hash(size_bits); + if (!notrace_hash) { + free_ftrace_hash(filter_hash); + return -ENOMEM; + } + + ret = intersect_hash(¬race_hash, ops->func_hash->filter_hash, + subops->func_hash->filter_hash); + if (ret < 0) { + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return ret; + } + } + + list_add(&subops->list, &ops->subop_list); + + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + if (ret < 0) + list_del(&subops->list); + else + subops->flags |= FTRACE_OPS_FL_ENABLED; + + return ret; +} + +/** + * ftrace_shutdown_subops - Remove a subops from a manager ops + * @ops: A manager ops to remove @subops from + * @subops: The subops to remove from @ops + * @command: Any extra command flags to add to modifying the text + * + * Removes the functions being traced by the @subops from @ops. Note, it + * will not affect functions that are being traced by other subops that + * still exist in @ops. + * + * If the last subops is removed from @ops, then @ops is shutdown normally. + */ +int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command) +{ + struct ftrace_hash *filter_hash; + struct ftrace_hash *notrace_hash; + int ret; + + if (unlikely(ftrace_disabled)) + return -ENODEV; + + if (WARN_ON_ONCE(!(subops->flags & FTRACE_OPS_FL_ENABLED))) + return -EINVAL; + + list_del(&subops->list); + + if (list_empty(&ops->subop_list)) { + /* Last one, just disable the current ops */ + + ret = ftrace_shutdown(ops, command); + if (ret < 0) { + list_add(&subops->list, &ops->subop_list); + return ret; + } + + subops->flags &= ~FTRACE_OPS_FL_ENABLED; + + free_ftrace_hash(ops->func_hash->filter_hash); + free_ftrace_hash(ops->func_hash->notrace_hash); + ops->func_hash->filter_hash = EMPTY_HASH; + ops->func_hash->notrace_hash = EMPTY_HASH; + + return 0; + } + + /* Rebuild the hashes without subops */ + filter_hash = append_hashes(ops); + notrace_hash = intersect_hashes(ops); + if (!filter_hash || !notrace_hash) { + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + list_add(&subops->list, &ops->subop_list); + return -ENOMEM; + } + + ret = ftrace_update_ops(ops, filter_hash, notrace_hash); + if (ret < 0) + list_add(&subops->list, &ops->subop_list); + else + subops->flags &= ~FTRACE_OPS_FL_ENABLED; + + free_ftrace_hash(filter_hash); + free_ftrace_hash(notrace_hash); + return ret; +} + static u64 ftrace_update_time; unsigned long ftrace_update_tot_cnt; unsigned long ftrace_number_of_pages; diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index 19eddcb91584..cdfd12c44ab4 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -15,6 +15,7 @@ extern struct ftrace_ops global_ops; int ftrace_startup(struct ftrace_ops *ops, int command); int ftrace_shutdown(struct ftrace_ops *ops, int command); int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs); +int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command); #else /* !CONFIG_DYNAMIC_FTRACE */ diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index a5070f9b977b..9a70beb2cc46 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1136,6 +1136,7 @@ extern int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); extern int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf, int len, int reset); +extern int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command); #else struct ftrace_func_command; -- cgit v1.2.3 From d9bbfbd14f58d2955cc7a3efa8ae6d4e09ee5995 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:15 -0400 Subject: ftrace: Allow subops filtering to be modified The subops filters use a "manager" ops to enable and disable its filters. The manager ops can handle more than one subops, and its filter is what controls what functions get set. Add a ftrace_hash_move_and_update_subops() function that will update the manager ops when the subops filters change. Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.673932251@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 ++ kernel/trace/ftrace.c | 141 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 116 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 978a1d3b270a..63238a9a9270 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -227,6 +227,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ftrace_enabled. * DIRECT - Used by the direct ftrace_ops helper for direct functions * (internal ftrace only, should not be used by others) + * SUBOP - Is controlled by another op in field managed. */ enum { FTRACE_OPS_FL_ENABLED = BIT(0), @@ -247,6 +248,7 @@ enum { FTRACE_OPS_FL_TRACE_ARRAY = BIT(15), FTRACE_OPS_FL_PERMANENT = BIT(16), FTRACE_OPS_FL_DIRECT = BIT(17), + FTRACE_OPS_FL_SUBOP = BIT(18), }; #ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -336,6 +338,7 @@ struct ftrace_ops { struct list_head list; struct list_head subop_list; ftrace_ops_func_t ops_func; + struct ftrace_ops *managed; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS unsigned long direct_call; #endif diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 63b8d1fe1dd8..cbb91b0afcc8 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3343,10 +3343,28 @@ static bool ops_equal(struct ftrace_hash *A, struct ftrace_hash *B) return true; } -static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, - struct ftrace_hash **orig_hash, - struct ftrace_hash *hash, - int enable); +static void ftrace_ops_update_code(struct ftrace_ops *ops, + struct ftrace_ops_hash *old_hash); + +static int __ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, + struct ftrace_hash **orig_hash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops_hash old_hash_ops; + struct ftrace_hash *old_hash; + int ret; + + old_hash = *orig_hash; + old_hash_ops.filter_hash = ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; + ret = ftrace_hash_move(ops, enable, orig_hash, hash); + if (!ret) { + ftrace_ops_update_code(ops, &old_hash_ops); + free_ftrace_hash_rcu(old_hash); + } + return ret; +} static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_hash, struct ftrace_hash *notrace_hash) @@ -3354,15 +3372,15 @@ static int ftrace_update_ops(struct ftrace_ops *ops, struct ftrace_hash *filter_ int ret; if (!ops_equal(filter_hash, ops->func_hash->filter_hash)) { - ret = ftrace_hash_move_and_update_ops(ops, &ops->func_hash->filter_hash, - filter_hash, 1); + ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->filter_hash, + filter_hash, 1); if (ret < 0) return ret; } if (!ops_equal(notrace_hash, ops->func_hash->notrace_hash)) { - ret = ftrace_hash_move_and_update_ops(ops, &ops->func_hash->notrace_hash, - notrace_hash, 0); + ret = __ftrace_hash_move_and_update_ops(ops, &ops->func_hash->notrace_hash, + notrace_hash, 0); if (ret < 0) return ret; } @@ -3435,7 +3453,8 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int } else { free_ftrace_hash(save_filter_hash); free_ftrace_hash(save_notrace_hash); - subops->flags |= FTRACE_OPS_FL_ENABLED; + subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP; + subops->managed = ops; } return ret; } @@ -3489,11 +3508,12 @@ int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int ret = ftrace_update_ops(ops, filter_hash, notrace_hash); free_ftrace_hash(filter_hash); free_ftrace_hash(notrace_hash); - if (ret < 0) + if (ret < 0) { list_del(&subops->list); - else - subops->flags |= FTRACE_OPS_FL_ENABLED; - + } else { + subops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP; + subops->managed = ops; + } return ret; } @@ -3538,6 +3558,8 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in free_ftrace_hash(ops->func_hash->notrace_hash); ops->func_hash->filter_hash = EMPTY_HASH; ops->func_hash->notrace_hash = EMPTY_HASH; + subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP); + subops->managed = NULL; return 0; } @@ -3553,16 +3575,65 @@ int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, in } ret = ftrace_update_ops(ops, filter_hash, notrace_hash); - if (ret < 0) + if (ret < 0) { list_add(&subops->list, &ops->subop_list); - else - subops->flags &= ~FTRACE_OPS_FL_ENABLED; - + } else { + subops->flags &= ~(FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_SUBOP); + subops->managed = NULL; + } free_ftrace_hash(filter_hash); free_ftrace_hash(notrace_hash); return ret; } +static int ftrace_hash_move_and_update_subops(struct ftrace_ops *subops, + struct ftrace_hash **orig_subhash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops *ops = subops->managed; + struct ftrace_hash **orig_hash; + struct ftrace_hash *save_hash; + struct ftrace_hash *new_hash; + int ret; + + /* Manager ops can not be subops (yet) */ + if (WARN_ON_ONCE(!ops || ops->flags & FTRACE_OPS_FL_SUBOP)) + return -EINVAL; + + /* Move the new hash over to the subops hash */ + save_hash = *orig_subhash; + *orig_subhash = __ftrace_hash_move(hash); + if (!*orig_subhash) { + *orig_subhash = save_hash; + return -ENOMEM; + } + + /* Create a new_hash to hold the ops new functions */ + if (enable) { + orig_hash = &ops->func_hash->filter_hash; + new_hash = append_hashes(ops); + } else { + orig_hash = &ops->func_hash->notrace_hash; + new_hash = intersect_hashes(ops); + } + + /* Move the hash over to the new hash */ + ret = __ftrace_hash_move_and_update_ops(ops, orig_hash, new_hash, enable); + + free_ftrace_hash(new_hash); + + if (ret) { + /* Put back the original hash */ + free_ftrace_hash_rcu(*orig_subhash); + *orig_subhash = save_hash; + } else { + free_ftrace_hash_rcu(save_hash); + } + return ret; +} + + static u64 ftrace_update_time; unsigned long ftrace_update_tot_cnt; unsigned long ftrace_number_of_pages; @@ -4779,19 +4850,33 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, struct ftrace_hash *hash, int enable) { - struct ftrace_ops_hash old_hash_ops; - struct ftrace_hash *old_hash; - int ret; + if (ops->flags & FTRACE_OPS_FL_SUBOP) + return ftrace_hash_move_and_update_subops(ops, orig_hash, hash, enable); - old_hash = *orig_hash; - old_hash_ops.filter_hash = ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; - ret = ftrace_hash_move(ops, enable, orig_hash, hash); - if (!ret) { - ftrace_ops_update_code(ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); + /* + * If this ops is not enabled, it could be sharing its filters + * with a subop. If that's the case, update the subop instead of + * this ops. Shared filters are only allowed to have one ops set + * at a time, and if we update the ops that is not enabled, + * it will not affect subops that share it. + */ + if (!(ops->flags & FTRACE_OPS_FL_ENABLED)) { + struct ftrace_ops *op; + + /* Check if any other manager subops maps to this hash */ + do_for_each_ftrace_op(op, ftrace_ops_list) { + struct ftrace_ops *subops; + + list_for_each_entry(subops, &op->subop_list, list) { + if ((subops->flags & FTRACE_OPS_FL_ENABLED) && + subops->func_hash == ops->func_hash) { + return ftrace_hash_move_and_update_subops(subops, orig_hash, hash, enable); + } + } + } while_for_each_ftrace_op(op); } - return ret; + + return __ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); } static bool module_exists(const char *module) -- cgit v1.2.3 From c132be2c4fcc1150ad0791c2a85dd4c9ad0bd0c8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:16 -0400 Subject: function_graph: Have the instances use their own ftrace_ops for filtering Allow for instances to have their own ftrace_ops part of the fgraph_ops that makes the funtion_graph tracer filter on the set_ftrace_filter file of the instance and not the top instance. This uses the new ftrace_startup_subops(), by using graph_ops as the "manager ops" that defines the callback function and adds the functions defined by the filters of the ops for each trace instance. The callback defined by the manager ops will call the registered fgraph ops that were added to the fgraph_array. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.832946261@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + kernel/trace/fgraph.c | 81 +++++++++++++++++++++++------------- kernel/trace/ftrace.c | 2 +- kernel/trace/trace.h | 15 +++---- kernel/trace/trace_functions.c | 2 +- kernel/trace/trace_functions_graph.c | 8 +++- 6 files changed, 68 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 63238a9a9270..8f865689e868 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1046,6 +1046,7 @@ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph struct fgraph_ops { trace_func_graph_ent_t entryfunc; trace_func_graph_ret_t retfunc; + struct ftrace_ops ops; /* for the hash lists */ void *private; int idx; }; diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index e39042c40937..3ef6db53c0bf 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -18,15 +18,6 @@ #include "ftrace_internal.h" #include "trace.h" -#ifdef CONFIG_DYNAMIC_FTRACE -#define ASSIGN_OPS_HASH(opsname, val) \ - .func_hash = val, \ - .local_hash.regex_lock = __MUTEX_INITIALIZER(opsname.local_hash.regex_lock), \ - .subop_list = LIST_HEAD_INIT(opsname.subop_list), -#else -#define ASSIGN_OPS_HASH(opsname, val) -#endif - /* * FGRAPH_FRAME_SIZE: Size in bytes of the meta data on the shadow stack * FGRAPH_FRAME_OFFSET: Size in long words of the meta data frame @@ -156,6 +147,13 @@ get_bitmap_bits(struct task_struct *t, int offset) return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK; } +/* For BITMAP type: set the bits in the bitmap bitmask at @offset on ret_stack */ +static inline void +set_bitmap_bits(struct task_struct *t, int offset, unsigned long bitmap) +{ + t->ret_stack[offset] |= (bitmap << FGRAPH_INDEX_SHIFT); +} + /* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */ static inline void set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) @@ -382,7 +380,8 @@ int function_graph_enter(unsigned long ret, unsigned long func, if (gops == &fgraph_stub) continue; - if (gops->entryfunc(&trace, gops)) + if (ftrace_ops_test(&gops->ops, func, NULL) && + gops->entryfunc(&trace, gops)) bitmap |= BIT(i); } @@ -665,16 +664,28 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, static struct ftrace_ops graph_ops = { .func = ftrace_graph_func, - .flags = FTRACE_OPS_FL_INITIALIZED | - FTRACE_OPS_FL_PID | - FTRACE_OPS_GRAPH_STUB, + .flags = FTRACE_OPS_GRAPH_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, /* trampoline_size is only needed for dynamically allocated tramps */ #endif - ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash) }; +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops) +{ + dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB; + +#ifdef CONFIG_DYNAMIC_FTRACE + if (src_ops) { + dst_ops->func_hash = &src_ops->local_hash; + mutex_init(&dst_ops->local_hash.regex_lock); + INIT_LIST_HEAD(&dst_ops->subop_list); + dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED; + } +#endif +} + void ftrace_graph_sleep_time_control(bool enable) { fgraph_sleep_time = enable; @@ -877,6 +888,7 @@ static int start_graph_tracing(void) int register_ftrace_graph(struct fgraph_ops *gops) { + int command = 0; int ret = 0; int i; @@ -894,7 +906,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) break; } if (i >= FGRAPH_ARRAY_SIZE) { - ret = -EBUSY; + ret = -ENOSPC; goto out; } @@ -908,18 +920,22 @@ int register_ftrace_graph(struct fgraph_ops *gops) if (ftrace_graph_active == 1) { register_pm_notifier(&ftrace_suspend_notifier); ret = start_graph_tracing(); - if (ret) { - ftrace_graph_active--; - goto out; - } + if (ret) + goto error; /* * Some archs just test to see if these are not * the default function */ ftrace_graph_return = return_run; ftrace_graph_entry = entry_run; + command = FTRACE_START_FUNC_RET; + } - ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET); + ret = ftrace_startup_subops(&graph_ops, &gops->ops, command); +error: + if (ret) { + fgraph_array[i] = &fgraph_stub; + ftrace_graph_active--; } out: mutex_unlock(&ftrace_lock); @@ -928,6 +944,7 @@ out: void unregister_ftrace_graph(struct fgraph_ops *gops) { + int command = 0; int i; mutex_lock(&ftrace_lock); @@ -935,25 +952,29 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) if (unlikely(!ftrace_graph_active)) goto out; - for (i = 0; i < fgraph_array_cnt; i++) - if (gops == fgraph_array[i]) - break; - if (i >= fgraph_array_cnt) + if (unlikely(gops->idx < 0 || gops->idx >= fgraph_array_cnt)) goto out; - fgraph_array[i] = &fgraph_stub; - if (i + 1 == fgraph_array_cnt) { - for (; i >= 0; i--) - if (fgraph_array[i] != &fgraph_stub) - break; + WARN_ON_ONCE(fgraph_array[gops->idx] != gops); + + fgraph_array[gops->idx] = &fgraph_stub; + if (gops->idx + 1 == fgraph_array_cnt) { + i = gops->idx; + while (i >= 0 && fgraph_array[i] == &fgraph_stub) + i--; fgraph_array_cnt = i + 1; } ftrace_graph_active--; + + if (!ftrace_graph_active) + command = FTRACE_STOP_FUNC_RET; + + ftrace_shutdown_subops(&graph_ops, &gops->ops, command); + if (!ftrace_graph_active) { ftrace_graph_return = ftrace_stub_graph; ftrace_graph_entry = ftrace_graph_entry_stub; - ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET); unregister_pm_notifier(&ftrace_suspend_notifier); unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cbb91b0afcc8..58e0f4bc0241 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -7811,7 +7811,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr) tr->ops = &global_ops; tr->ops->private = tr; ftrace_init_trace_array(tr); - init_array_fgraph_ops(tr); + init_array_fgraph_ops(tr, tr->ops); } void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9a70beb2cc46..f06b5ddd3580 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -894,8 +894,8 @@ extern int __trace_graph_entry(struct trace_array *tr, extern void __trace_graph_return(struct trace_array *tr, struct ftrace_graph_ret *trace, unsigned int trace_ctx); -extern void init_array_fgraph_ops(struct trace_array *tr); -extern int allocate_fgraph_ops(struct trace_array *tr); +extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); +extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); extern void free_fgraph_ops(struct trace_array *tr); #ifdef CONFIG_DYNAMIC_FTRACE @@ -1003,18 +1003,19 @@ static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) (fgraph_max_depth && trace->depth >= fgraph_max_depth); } +void fgraph_init_ops(struct ftrace_ops *dst_ops, + struct ftrace_ops *src_ops); + #else /* CONFIG_FUNCTION_GRAPH_TRACER */ static inline enum print_line_t print_graph_function_flags(struct trace_iterator *iter, u32 flags) { return TRACE_TYPE_UNHANDLED; } -static inline void init_array_fgraph_ops(struct trace_array *tr) { } -static inline int allocate_fgraph_ops(struct trace_array *tr) -{ - return 0; -} static inline void free_fgraph_ops(struct trace_array *tr) { } +/* ftrace_ops may not be defined */ +#define init_array_fgraph_ops(tr, ops) do { } while (0) +#define allocate_fgraph_ops(tr, ops) ({ 0; }) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ extern struct list_head ftrace_pids; diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8e8da0d0ee52..13bf2415245d 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -91,7 +91,7 @@ int ftrace_create_function_files(struct trace_array *tr, if (!tr->ops) return -EINVAL; - ret = allocate_fgraph_ops(tr); + ret = allocate_fgraph_ops(tr, tr->ops); if (ret) { kfree(tr->ops); return ret; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 9ccc904a7703..7f30652f0e97 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -288,7 +288,7 @@ static struct fgraph_ops funcgraph_ops = { .retfunc = &trace_graph_return, }; -int allocate_fgraph_ops(struct trace_array *tr) +int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops) { struct fgraph_ops *gops; @@ -301,6 +301,9 @@ int allocate_fgraph_ops(struct trace_array *tr) tr->gops = gops; gops->private = tr; + + fgraph_init_ops(&gops->ops, ops); + return 0; } @@ -309,10 +312,11 @@ void free_fgraph_ops(struct trace_array *tr) kfree(tr->gops); } -__init void init_array_fgraph_ops(struct trace_array *tr) +__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops) { tr->gops = &funcgraph_ops; funcgraph_ops.private = tr; + fgraph_init_ops(&tr->gops->ops, ops); } static int graph_trace_init(struct trace_array *tr) -- cgit v1.2.3 From df3ec5da6a1e7f6e142680d7c5266d3af187170b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 3 Jun 2024 15:07:17 -0400 Subject: function_graph: Add pid tracing back to function graph tracer Now that the function_graph has a main callback that handles the function graph subops tracing, it no longer honors the pid filtering of ftrace. Add back this logic in the function_graph code to update the gops callback for the entry function to test if it should trace the current task or not. Link: https://lore.kernel.org/linux-trace-kernel/20240603190822.991720703@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 2 ++ kernel/trace/fgraph.c | 40 ++++++++++++++++++++++++++++++++++++++++ kernel/trace/ftrace.c | 5 +++-- kernel/trace/ftrace_internal.h | 2 ++ 4 files changed, 47 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8f865689e868..e31ec8516de1 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1040,6 +1040,7 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *, struct fgraph_ops *); /* entry */ extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph_ops *gops); +bool ftrace_pids_enabled(struct ftrace_ops *ops); #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -1048,6 +1049,7 @@ struct fgraph_ops { trace_func_graph_ret_t retfunc; struct ftrace_ops ops; /* for the hash lists */ void *private; + trace_func_graph_ent_t saved_func; int idx; }; diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 3ef6db53c0bf..30bed20c655f 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -854,6 +854,41 @@ void ftrace_graph_exit_task(struct task_struct *t) kfree(ret_stack); } +static int fgraph_pid_func(struct ftrace_graph_ent *trace, + struct fgraph_ops *gops) +{ + struct trace_array *tr = gops->ops.private; + int pid; + + if (tr) { + pid = this_cpu_read(tr->array_buffer.data->ftrace_ignore_pid); + if (pid == FTRACE_PID_IGNORE) + return 0; + if (pid != FTRACE_PID_TRACE && + pid != current->pid) + return 0; + } + + return gops->saved_func(trace, gops); +} + +void fgraph_update_pid_func(void) +{ + struct fgraph_ops *gops; + struct ftrace_ops *op; + + if (!(graph_ops.flags & FTRACE_OPS_FL_INITIALIZED)) + return; + + list_for_each_entry(op, &graph_ops.subop_list, list) { + if (op->flags & FTRACE_OPS_FL_PID) { + gops = container_of(op, struct fgraph_ops, ops); + gops->entryfunc = ftrace_pids_enabled(op) ? + fgraph_pid_func : gops->saved_func; + } + } +} + /* Allocate a return stack for each task */ static int start_graph_tracing(void) { @@ -931,11 +966,15 @@ int register_ftrace_graph(struct fgraph_ops *gops) command = FTRACE_START_FUNC_RET; } + /* Always save the function, and reset at unregistering */ + gops->saved_func = gops->entryfunc; + ret = ftrace_startup_subops(&graph_ops, &gops->ops, command); error: if (ret) { fgraph_array[i] = &fgraph_stub; ftrace_graph_active--; + gops->saved_func = NULL; } out: mutex_unlock(&ftrace_lock); @@ -979,5 +1018,6 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL); } out: + gops->saved_func = NULL; mutex_unlock(&ftrace_lock); } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 58e0f4bc0241..da7e6abf48b4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -100,7 +100,7 @@ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; /* What to set function_trace_op to */ static struct ftrace_ops *set_function_trace_op; -static bool ftrace_pids_enabled(struct ftrace_ops *ops) +bool ftrace_pids_enabled(struct ftrace_ops *ops) { struct trace_array *tr; @@ -402,10 +402,11 @@ static void ftrace_update_pid_func(void) if (op->flags & FTRACE_OPS_FL_PID) { op->func = ftrace_pids_enabled(op) ? ftrace_pid_func : op->saved_func; - ftrace_update_trampoline(op); } } while_for_each_ftrace_op(op); + fgraph_update_pid_func(); + update_ftrace_function(); } diff --git a/kernel/trace/ftrace_internal.h b/kernel/trace/ftrace_internal.h index cdfd12c44ab4..bfba10c2fcf1 100644 --- a/kernel/trace/ftrace_internal.h +++ b/kernel/trace/ftrace_internal.h @@ -43,8 +43,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) #ifdef CONFIG_FUNCTION_GRAPH_TRACER extern int ftrace_graph_active; +extern void fgraph_update_pid_func(void); #else /* !CONFIG_FUNCTION_GRAPH_TRACER */ # define ftrace_graph_active 0 +static inline void fgraph_update_pid_func(void) {} #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ #else /* !CONFIG_FUNCTION_TRACER */ -- cgit v1.2.3 From 4497412a1f7b5d9e0849f125652f2cc58cdba562 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:19 -0400 Subject: function_graph: Add "task variables" per task for fgraph_ops Add a "task variables" array on the tasks shadow ret_stack that is the size of longs for each possible registered fgraph_ops. That's a total of 16, taking up 8 * 16 = 128 bytes (out of a page size 4k). This will allow for fgraph_ops to do specific features on a per task basis having a way to maintain state for each task. Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509104383.162236.12239656156685718550.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.308806126@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 1 + kernel/trace/fgraph.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index e31ec8516de1..82cfc8e09cfd 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1089,6 +1089,7 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int skip); unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp); +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops); /* * Sometimes we don't want to trace a function with the function diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 7fd9b03bd170..baa08249ffd5 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -54,6 +54,10 @@ * on the return of the function being traced, this is what will be on the * task's shadow ret_stack: (the stack grows upward) * + * ret_stack[SHADOW_STACK_OFFSET] + * | SHADOW_STACK_TASK_VARS(ret_stack)[15] | + * ... + * | SHADOW_STACK_TASK_VARS(ret_stack)[0] | * ret_stack[SHADOW_STACK_MAX_OFFSET] * ... * | | <- task->curr_ret_stack @@ -116,11 +120,19 @@ enum { #define SHADOW_STACK_SIZE (PAGE_SIZE) #define SHADOW_STACK_OFFSET (SHADOW_STACK_SIZE / sizeof(long)) /* Leave on a buffer at the end */ -#define SHADOW_STACK_MAX_OFFSET (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1)) +#define SHADOW_STACK_MAX_OFFSET \ + (SHADOW_STACK_OFFSET - (FGRAPH_FRAME_OFFSET + 1 + FGRAPH_ARRAY_SIZE)) /* RET_STACK(): Return the frame from a given @offset from task @t */ #define RET_STACK(t, offset) ((struct ftrace_ret_stack *)(&(t)->ret_stack[offset])) +/* + * Each fgraph_ops has a reservered unsigned long at the end (top) of the + * ret_stack to store task specific state. + */ +#define SHADOW_STACK_TASK_VARS(ret_stack) \ + ((unsigned long *)(&(ret_stack)[SHADOW_STACK_OFFSET - FGRAPH_ARRAY_SIZE])) + DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph); int ftrace_graph_active; @@ -211,6 +223,44 @@ static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops) { } +static void ret_stack_set_task_var(struct task_struct *t, int idx, long val) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + gvals[idx] = val; +} + +static unsigned long * +ret_stack_get_task_var(struct task_struct *t, int idx) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack); + + return &gvals[idx]; +} + +static void ret_stack_init_task_vars(unsigned long *ret_stack) +{ + unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack); + + memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE); +} + +/** + * fgraph_get_task_var - retrieve a task specific state variable + * @gops: The ftrace_ops that owns the task specific variable + * + * Every registered fgraph_ops has a task state variable + * reserved on the task's ret_stack. This function returns the + * address to that variable. + * + * Returns the address to the fgraph_ops @gops tasks specific + * unsigned long variable. + */ +unsigned long *fgraph_get_task_var(struct fgraph_ops *gops) +{ + return ret_stack_get_task_var(current, gops->idx); +} + /* * @offset: The offset into @t->ret_stack to find the ret_stack entry * @frame_offset: Where to place the offset into @t->ret_stack of that entry @@ -766,6 +816,7 @@ static int alloc_retstack_tasklist(unsigned long **ret_stack_list) if (t->ret_stack == NULL) { atomic_set(&t->trace_overrun, 0); + ret_stack_init_task_vars(ret_stack_list[start]); t->curr_ret_stack = 0; t->curr_ret_depth = -1; /* Make sure the tasks see the 0 first: */ @@ -826,6 +877,7 @@ static void graph_init_task(struct task_struct *t, unsigned long *ret_stack) { atomic_set(&t->trace_overrun, 0); + ret_stack_init_task_vars(ret_stack); t->ftrace_timestamp = 0; t->curr_ret_stack = 0; t->curr_ret_depth = -1; @@ -959,6 +1011,24 @@ static int start_graph_tracing(void) return ret; } +static void init_task_vars(int idx) +{ + struct task_struct *g, *t; + int cpu; + + for_each_online_cpu(cpu) { + if (idle_task(cpu)->ret_stack) + ret_stack_set_task_var(idle_task(cpu), idx, 0); + } + + read_lock(&tasklist_lock); + for_each_process_thread(g, t) { + if (t->ret_stack) + ret_stack_set_task_var(t, idx, 0); + } + read_unlock(&tasklist_lock); +} + int register_ftrace_graph(struct fgraph_ops *gops) { int command = 0; @@ -997,6 +1067,8 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_return = return_run; ftrace_graph_entry = entry_run; command = FTRACE_START_FUNC_RET; + } else { + init_task_vars(gops->idx); } /* Always save the function, and reset at unregistering */ -- cgit v1.2.3 From 12117f3307b63f287756d7ec8cc4f11b94e1206a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:20 -0400 Subject: function_graph: Move set_graph_function tests to shadow stack global var The use of the task->trace_recursion for the logic used for the set_graph_function was a bit of an abuse of that variable. Now that there exists global vars that are per stack for registered graph traces, use that instead. Link: https://lore.kernel.org/linux-trace-kernel/171509105520.162236.10339831553995971290.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.472955399@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 5 +---- kernel/trace/trace.h | 32 +++++++++++++++++++++----------- kernel/trace/trace_functions_graph.c | 6 +++--- kernel/trace/trace_irqsoff.c | 4 ++-- kernel/trace/trace_sched_wakeup.c | 4 ++-- 5 files changed, 29 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 24ea8ac049b4..02e6afc6d7fe 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,9 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* Set if the function is in the set_graph_function file */ - TRACE_GRAPH_BIT, - /* * In the very unlikely case that an interrupt came in * at a start of graph tracing, and we want to trace @@ -60,7 +57,7 @@ enum { * that preempted a softirq start of a function that * preempted normal context!!!! Luckily, it can't be * greater than 3, so the next two bits are a mask - * of what the depth is when we set TRACE_GRAPH_BIT + * of what the depth is when we set TRACE_GRAPH_FL */ TRACE_GRAPH_DEPTH_START_BIT, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index f06b5ddd3580..73919129e57c 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -898,11 +898,16 @@ extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops); extern void free_fgraph_ops(struct trace_array *tr); +enum { + TRACE_GRAPH_FL = 1, +}; + #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; -static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) +static inline int +ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) { unsigned long addr = trace->func; int ret = 0; @@ -924,12 +929,11 @@ static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) } if (ftrace_lookup_ip(hash, addr)) { - /* * This needs to be cleared on the return functions * when the depth is zero. */ - trace_recursion_set(TRACE_GRAPH_BIT); + *task_var |= TRACE_GRAPH_FL; trace_recursion_set_depth(trace->depth); /* @@ -949,11 +953,14 @@ out: return ret; } -static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +static inline void +ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace) { - if (trace_recursion_test(TRACE_GRAPH_BIT) && + unsigned long *task_var = fgraph_get_task_var(gops); + + if ((*task_var & TRACE_GRAPH_FL) && trace->depth == trace_recursion_depth()) - trace_recursion_clear(TRACE_GRAPH_BIT); + *task_var &= ~TRACE_GRAPH_FL; } static inline int ftrace_graph_notrace_addr(unsigned long addr) @@ -979,7 +986,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) return ret; } #else -static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace) +static inline int ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) { return 1; } @@ -988,17 +995,20 @@ static inline int ftrace_graph_notrace_addr(unsigned long addr) { return 0; } -static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace) +static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace) { } #endif /* CONFIG_DYNAMIC_FTRACE */ extern unsigned int fgraph_max_depth; -static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace) +static inline bool +ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent *trace) { + unsigned long *task_var = fgraph_get_task_var(gops); + /* trace it when it is-nested-in or is a function enabled. */ - return !(trace_recursion_test(TRACE_GRAPH_BIT) || - ftrace_graph_addr(trace)) || + return !((*task_var & TRACE_GRAPH_FL) || + ftrace_graph_addr(task_var, trace)) || (trace->depth < 0) || (fgraph_max_depth && trace->depth >= fgraph_max_depth); } diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 7f30652f0e97..66cce73e94f8 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -160,7 +160,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, if (!ftrace_trace_task(tr)) return 0; - if (ftrace_graph_ignore_func(trace)) + if (ftrace_graph_ignore_func(gops, trace)) return 0; if (ftrace_graph_ignore_irqs()) @@ -247,7 +247,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace, long disabled; int cpu; - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); @@ -269,7 +269,7 @@ void trace_graph_return(struct ftrace_graph_ret *trace, static void trace_graph_thresh_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5478f4c4f708..fce064e20570 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -184,7 +184,7 @@ static int irqsoff_graph_entry(struct ftrace_graph_ent *trace, unsigned int trace_ctx; int ret; - if (ftrace_graph_ignore_func(trace)) + if (ftrace_graph_ignore_func(gops, trace)) return 0; /* * Do not trace a function if it's filtered by set_graph_notrace. @@ -214,7 +214,7 @@ static void irqsoff_graph_return(struct ftrace_graph_ret *trace, unsigned long flags; unsigned int trace_ctx; - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (!func_prolog_dec(tr, &data, &flags)) return; diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 49bcc812652c..130ca7e7787e 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -120,7 +120,7 @@ static int wakeup_graph_entry(struct ftrace_graph_ent *trace, unsigned int trace_ctx; int ret = 0; - if (ftrace_graph_ignore_func(trace)) + if (ftrace_graph_ignore_func(gops, trace)) return 0; /* * Do not trace a function if it's filtered by set_graph_notrace. @@ -149,7 +149,7 @@ static void wakeup_graph_return(struct ftrace_graph_ret *trace, struct trace_array_cpu *data; unsigned int trace_ctx; - ftrace_graph_addr_finish(trace); + ftrace_graph_addr_finish(gops, trace); if (!func_prolog_preempt_disable(tr, &data, &trace_ctx)) return; -- cgit v1.2.3 From 068da098eb504469dc195137ae35eeacfe0c8de9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:21 -0400 Subject: function_graph: Move graph depth stored data to shadow stack global var The use of the task->trace_recursion for the logic used for the function graph depth was a bit of an abuse of that variable. Now that there exists global vars that are per stack for registered graph traces, use that instead. Link: https://lore.kernel.org/linux-trace-kernel/171509106728.162236.2398372644430125344.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.634870264@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 29 ----------------------------- kernel/trace/trace.h | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index 02e6afc6d7fe..fdfb6f66718a 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,25 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* - * In the very unlikely case that an interrupt came in - * at a start of graph tracing, and we want to trace - * the function in that interrupt, the depth can be greater - * than zero, because of the preempted start of a previous - * trace. In an even more unlikely case, depth could be 2 - * if a softirq interrupted the start of graph tracing, - * followed by an interrupt preempting a start of graph - * tracing in the softirq, and depth can even be 3 - * if an NMI came in at the start of an interrupt function - * that preempted a softirq start of a function that - * preempted normal context!!!! Luckily, it can't be - * greater than 3, so the next two bits are a mask - * of what the depth is when we set TRACE_GRAPH_FL - */ - - TRACE_GRAPH_DEPTH_START_BIT, - TRACE_GRAPH_DEPTH_END_BIT, - /* * To implement set_graph_notrace, if this bit is set, we ignore * function graph tracing of called functions, until the return @@ -78,16 +59,6 @@ enum { #define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(1<<(bit)); } while (0) #define trace_recursion_test(bit) ((current)->trace_recursion & (1<<(bit))) -#define trace_recursion_depth() \ - (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3) -#define trace_recursion_set_depth(depth) \ - do { \ - current->trace_recursion &= \ - ~(3 << TRACE_GRAPH_DEPTH_START_BIT); \ - current->trace_recursion |= \ - ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT; \ - } while (0) - #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 73919129e57c..82d879dc63ff 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -900,8 +900,38 @@ extern void free_fgraph_ops(struct trace_array *tr); enum { TRACE_GRAPH_FL = 1, + + /* + * In the very unlikely case that an interrupt came in + * at a start of graph tracing, and we want to trace + * the function in that interrupt, the depth can be greater + * than zero, because of the preempted start of a previous + * trace. In an even more unlikely case, depth could be 2 + * if a softirq interrupted the start of graph tracing, + * followed by an interrupt preempting a start of graph + * tracing in the softirq, and depth can even be 3 + * if an NMI came in at the start of an interrupt function + * that preempted a softirq start of a function that + * preempted normal context!!!! Luckily, it can't be + * greater than 3, so the next two bits are a mask + * of what the depth is when we set TRACE_GRAPH_FL + */ + + TRACE_GRAPH_DEPTH_START_BIT, + TRACE_GRAPH_DEPTH_END_BIT, }; +static inline unsigned long ftrace_graph_depth(unsigned long *task_var) +{ + return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3; +} + +static inline void ftrace_graph_set_depth(unsigned long *task_var, int depth) +{ + *task_var &= ~(3 << TRACE_GRAPH_DEPTH_START_BIT); + *task_var |= (depth & 3) << TRACE_GRAPH_DEPTH_START_BIT; +} + #ifdef CONFIG_DYNAMIC_FTRACE extern struct ftrace_hash __rcu *ftrace_graph_hash; extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash; @@ -934,7 +964,7 @@ ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace) * when the depth is zero. */ *task_var |= TRACE_GRAPH_FL; - trace_recursion_set_depth(trace->depth); + ftrace_graph_set_depth(task_var, trace->depth); /* * If no irqs are to be traced, but a set_graph_function @@ -959,7 +989,7 @@ ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret *trace unsigned long *task_var = fgraph_get_task_var(gops); if ((*task_var & TRACE_GRAPH_FL) && - trace->depth == trace_recursion_depth()) + trace->depth == ftrace_graph_depth(task_var)) *task_var &= ~TRACE_GRAPH_FL; } -- cgit v1.2.3 From b84214890a9bc56f0fe4ec4fc72f2307ed05096d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:22 -0400 Subject: function_graph: Move graph notrace bit to shadow stack global var The use of the task->trace_recursion for the logic used for the function graph no-trace was a bit of an abuse of that variable. Now that there exists global vars that are per stack for registered graph traces, use that instead. Link: https://lore.kernel.org/linux-trace-kernel/171509107907.162236.6564679266777519065.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.796709456@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_recursion.h | 7 ------- kernel/trace/trace.h | 9 +++++++++ kernel/trace/trace_functions_graph.c | 10 ++++++---- 3 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h index fdfb6f66718a..ae04054a1be3 100644 --- a/include/linux/trace_recursion.h +++ b/include/linux/trace_recursion.h @@ -44,13 +44,6 @@ enum { */ TRACE_IRQ_BIT, - /* - * To implement set_graph_notrace, if this bit is set, we ignore - * function graph tracing of called functions, until the return - * function is called to clear it. - */ - TRACE_GRAPH_NOTRACE_BIT, - /* Used to prevent recursion recording from recursing. */ TRACE_RECORD_RECURSION_BIT, }; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 82d879dc63ff..b37402e3f0c9 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -919,8 +919,17 @@ enum { TRACE_GRAPH_DEPTH_START_BIT, TRACE_GRAPH_DEPTH_END_BIT, + + /* + * To implement set_graph_notrace, if this bit is set, we ignore + * function graph tracing of called functions, until the return + * function is called to clear it. + */ + TRACE_GRAPH_NOTRACE_BIT, }; +#define TRACE_GRAPH_NOTRACE (1 << TRACE_GRAPH_NOTRACE_BIT) + static inline unsigned long ftrace_graph_depth(unsigned long *task_var) { return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3; diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 66cce73e94f8..13d0387ac6a6 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -130,6 +130,7 @@ static inline int ftrace_graph_ignore_irqs(void) int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops) { + unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; unsigned long flags; @@ -138,7 +139,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, int ret; int cpu; - if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) + if (*task_var & TRACE_GRAPH_NOTRACE) return 0; /* @@ -149,7 +150,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace, * returning from the function. */ if (ftrace_graph_notrace_addr(trace->func)) { - trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT); + *task_var |= TRACE_GRAPH_NOTRACE_BIT; /* * Need to return 1 to have the return called * that will clear the NOTRACE bit. @@ -240,6 +241,7 @@ void __trace_graph_return(struct trace_array *tr, void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops *gops) { + unsigned long *task_var = fgraph_get_task_var(gops); struct trace_array *tr = gops->private; struct trace_array_cpu *data; unsigned long flags; @@ -249,8 +251,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace, ftrace_graph_addr_finish(gops, trace); - if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) { - trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT); + if (*task_var & TRACE_GRAPH_NOTRACE) { + *task_var &= ~TRACE_GRAPH_NOTRACE; return; } -- cgit v1.2.3 From 91c46b0aa917546432b5b219494859cda0edc39e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Jun 2024 15:07:23 -0400 Subject: function_graph: Implement fgraph_reserve_data() and fgraph_retrieve_data() Added functions that can be called by a fgraph_ops entryfunc and retfunc to store state between the entry of the function being traced to the exit of the same function. The fgraph_ops entryfunc() may call fgraph_reserve_data() to store up to 32 words onto the task's shadow ret_stack and this then can be retrieved by fgraph_retrieve_data() called by the corresponding retfunc(). Co-developed with Masami Hiramatsu: Link: https://lore.kernel.org/linux-trace-kernel/171509109089.162236.11372474169781184034.stgit@devnote2 Link: https://lore.kernel.org/linux-trace-kernel/20240603190823.959703050@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Alexei Starovoitov Cc: Florent Revest Cc: Martin KaFai Lau Cc: bpf Cc: Sven Schnelle Cc: Alexei Starovoitov Cc: Jiri Olsa Cc: Arnaldo Carvalho de Melo Cc: Daniel Borkmann Cc: Alan Maguire Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Guo Ren Reviewed-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (VMware) Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 + kernel/trace/fgraph.c | 191 ++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 186 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 82cfc8e09cfd..9f61556a9491 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1053,6 +1053,9 @@ struct fgraph_ops { int idx; }; +void *fgraph_reserve_data(int idx, int size_bytes); +void *fgraph_retrieve_data(int idx, int *size_bytes); + /* * Stack of return addresses for functions * of a thread. diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index baa08249ffd5..f207b7ae5f46 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -31,12 +31,11 @@ * holds a bitmask and a type (called "bitmap"). The bitmap is defined as: * * bits: 0 - 9 offset in words from the previous ftrace_ret_stack - * Currently, this will always be set to FGRAPH_FRAME_OFFSET - * to get to the fgraph frame. * * bits: 10 - 11 Type of storage * 0 - reserved * 1 - bitmap of fgraph_array index + * 2 - reserved data * * For type with "bitmap of fgraph_array index" (FGRAPH_TYPE_BITMAP): * bits: 12 - 27 The bitmap of fgraph_ops fgraph_array index @@ -49,10 +48,15 @@ * The top of the ret_stack (when not empty) will always have a reference * word that points to the last fgraph frame that was saved. * + * For reserved data: + * bits: 12 - 17 The size in words that is stored + * bits: 18 - 23 The index of fgraph_array, which shows who is stored + * * That is, at the end of function_graph_enter, if the first and forth * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called - * on the return of the function being traced, this is what will be on the - * task's shadow ret_stack: (the stack grows upward) + * on the return of the function being traced, and the forth fgraph_ops + * stored two words of data, this is what will be on the task's shadow + * ret_stack: (the stack grows upward) * * ret_stack[SHADOW_STACK_OFFSET] * | SHADOW_STACK_TASK_VARS(ret_stack)[15] | @@ -62,11 +66,21 @@ * ... * | | <- task->curr_ret_stack * +--------------------------------------------+ + * | (3 << 12) | (3 << 10) | FGRAPH_FRAME_OFFSET| + * | *or put another way* | + * | (3 << FGRAPH_DATA_INDEX_SHIFT)| \ | This is for fgraph_ops[3]. + * | ((2 - 1) << FGRAPH_DATA_SHIFT)| \ | The data size is 2 words. + * | (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT)| \ | + * | (offset2:FGRAPH_FRAME_OFFSET+3) | <- the offset2 is from here + * +--------------------------------------------+ ( It is 4 words from the ret_stack) + * | STORED DATA WORD 2 | + * | STORED DATA WORD 1 | + * +--------------------------------------------+ * | (9 << 12) | (1 << 10) | FGRAPH_FRAME_OFFSET| * | *or put another way* | * | (BIT(3)|BIT(0)) << FGRAPH_INDEX_SHIFT | \ | * | FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT| \ | - * | (offset:FGRAPH_FRAME_OFFSET) | <- the offset is from here + * | (offset1:FGRAPH_FRAME_OFFSET) | <- the offset1 is from here * +--------------------------------------------+ * | struct ftrace_ret_stack | * | (stores the saved ret pointer) | <- the offset points here @@ -100,6 +114,7 @@ enum { FGRAPH_TYPE_RESERVED = 0, FGRAPH_TYPE_BITMAP = 1, + FGRAPH_TYPE_DATA = 2, }; /* @@ -110,6 +125,26 @@ enum { #define FGRAPH_INDEX_MASK GENMASK(FGRAPH_INDEX_BITS - 1, 0) #define FGRAPH_INDEX_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) +/* + * For DATA type: + * FGRAPH_DATA (12-17) bits hold the size of data (in words) + * FGRAPH_INDEX (18-23) bits hold the index for which gops->idx the data is for + * + * Note: + * data_size == 0 means 1 word, and 31 (=2^5 - 1) means 32 words. + */ +#define FGRAPH_DATA_BITS 5 +#define FGRAPH_DATA_MASK GENMASK(FGRAPH_DATA_BITS - 1, 0) +#define FGRAPH_DATA_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_BITS) +#define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_BITS)) + +#define FGRAPH_DATA_INDEX_BITS 4 +#define FGRAPH_DATA_INDEX_MASK GENMASK(FGRAPH_DATA_INDEX_BITS - 1, 0) +#define FGRAPH_DATA_INDEX_SHIFT (FGRAPH_DATA_SHIFT + FGRAPH_DATA_BITS) + +#define FGRAPH_MAX_INDEX \ + ((FGRAPH_INDEX_SIZE << FGRAPH_DATA_BITS) + FGRAPH_RET_INDEX) + #define FGRAPH_ARRAY_SIZE FGRAPH_INDEX_BITS /* @@ -178,16 +213,46 @@ static int fgraph_lru_alloc_index(void) return idx; } +/* Get the offset to the fgraph frame from a ret_stack value */ +static inline int __get_offset(unsigned long val) +{ + return val & FGRAPH_FRAME_OFFSET_MASK; +} + +/* Get the type of word from a ret_stack value */ +static inline int __get_type(unsigned long val) +{ + return (val >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK; +} + +/* Get the data_index for a DATA type ret_stack word */ +static inline int __get_data_index(unsigned long val) +{ + return (val >> FGRAPH_DATA_INDEX_SHIFT) & FGRAPH_DATA_INDEX_MASK; +} + +/* Get the data_size for a DATA type ret_stack word */ +static inline int __get_data_size(unsigned long val) +{ + return ((val >> FGRAPH_DATA_SHIFT) & FGRAPH_DATA_MASK) + 1; +} + +/* Get the word from the ret_stack at @offset */ +static inline unsigned long get_fgraph_entry(struct task_struct *t, int offset) +{ + return t->ret_stack[offset]; +} + /* Get the FRAME_OFFSET from the word from the @offset on ret_stack */ static inline int get_frame_offset(struct task_struct *t, int offset) { - return t->ret_stack[offset] & FGRAPH_FRAME_OFFSET_MASK; + return __get_offset(t->ret_stack[offset]); } /* Get FGRAPH_TYPE from the word from the @offset at ret_stack */ static inline int get_fgraph_type(struct task_struct *t, int offset) { - return (t->ret_stack[offset] >> FGRAPH_TYPE_SHIFT) & FGRAPH_TYPE_MASK; + return __get_type(t->ret_stack[offset]); } /* For BITMAP type: get the bitmask from the @offset at ret_stack */ @@ -212,6 +277,25 @@ set_bitmap(struct task_struct *t, int offset, unsigned long bitmap) (FGRAPH_TYPE_BITMAP << FGRAPH_TYPE_SHIFT) | FGRAPH_FRAME_OFFSET; } +/* For DATA type: get the data saved under the ret_stack word at @offset */ +static inline void *get_data_type_data(struct task_struct *t, int offset) +{ + unsigned long val = t->ret_stack[offset]; + + if (__get_type(val) != FGRAPH_TYPE_DATA) + return NULL; + offset -= __get_data_size(val); + return (void *)&t->ret_stack[offset]; +} + +/* Create the ret_stack word for a DATA type */ +static inline unsigned long make_data_type_val(int idx, int size, int offset) +{ + return (idx << FGRAPH_DATA_INDEX_SHIFT) | + ((size - 1) << FGRAPH_DATA_SHIFT) | + (FGRAPH_TYPE_DATA << FGRAPH_TYPE_SHIFT) | offset; +} + /* ftrace_graph_entry set to this to tell some archs to run function graph */ static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops) { @@ -245,6 +329,91 @@ static void ret_stack_init_task_vars(unsigned long *ret_stack) memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE); } +/** + * fgraph_reserve_data - Reserve storage on the task's ret_stack + * @idx: The index of fgraph_array + * @size_bytes: The size in bytes to reserve + * + * Reserves space of up to FGRAPH_MAX_DATA_SIZE bytes on the + * task's ret_stack shadow stack, for a given fgraph_ops during + * the entryfunc() call. If entryfunc() returns zero, the storage + * is discarded. An entryfunc() can only call this once per iteration. + * The fgraph_ops retfunc() can retrieve this stored data with + * fgraph_retrieve_data(). + * + * Returns: On success, a pointer to the data on the stack. + * Otherwise, NULL if there's not enough space left on the + * ret_stack for the data, or if fgraph_reserve_data() was called + * more than once for a single entryfunc() call. + */ +void *fgraph_reserve_data(int idx, int size_bytes) +{ + unsigned long val; + void *data; + int curr_ret_stack = current->curr_ret_stack; + int data_size; + + if (size_bytes > FGRAPH_MAX_DATA_SIZE) + return NULL; + + /* Convert the data size to number of longs. */ + data_size = (size_bytes + sizeof(long) - 1) >> (sizeof(long) == 4 ? 2 : 3); + + val = get_fgraph_entry(current, curr_ret_stack - 1); + data = ¤t->ret_stack[curr_ret_stack]; + + curr_ret_stack += data_size + 1; + if (unlikely(curr_ret_stack >= SHADOW_STACK_MAX_OFFSET)) + return NULL; + + val = make_data_type_val(idx, data_size, __get_offset(val) + data_size + 1); + + /* Set the last word to be reserved */ + current->ret_stack[curr_ret_stack - 1] = val; + + /* Make sure interrupts see this */ + barrier(); + current->curr_ret_stack = curr_ret_stack; + /* Again sync with interrupts, and reset reserve */ + current->ret_stack[curr_ret_stack - 1] = val; + + return data; +} + +/** + * fgraph_retrieve_data - Retrieve stored data from fgraph_reserve_data() + * @idx: the index of fgraph_array (fgraph_ops::idx) + * @size_bytes: pointer to retrieved data size. + * + * This is to be called by a fgraph_ops retfunc(), to retrieve data that + * was stored by the fgraph_ops entryfunc() on the function entry. + * That is, this will retrieve the data that was reserved on the + * entry of the function that corresponds to the exit of the function + * that the fgraph_ops retfunc() is called on. + * + * Returns: The stored data from fgraph_reserve_data() called by the + * matching entryfunc() for the retfunc() this is called from. + * Or NULL if there was nothing stored. + */ +void *fgraph_retrieve_data(int idx, int *size_bytes) +{ + int offset = current->curr_ret_stack - 1; + unsigned long val; + + val = get_fgraph_entry(current, offset); + while (__get_type(val) == FGRAPH_TYPE_DATA) { + if (__get_data_index(val) == idx) + goto found; + offset -= __get_data_size(val) + 1; + val = get_fgraph_entry(current, offset); + } + return NULL; +found: + if (size_bytes) + *size_bytes = __get_data_size(val) * sizeof(long); + return get_data_type_data(current, offset); +} + /** * fgraph_get_task_var - retrieve a task specific state variable * @gops: The ftrace_ops that owns the task specific variable @@ -464,13 +633,18 @@ int function_graph_enter(unsigned long ret, unsigned long func, for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) { struct fgraph_ops *gops = fgraph_array[i]; + int save_curr_ret_stack; if (gops == &fgraph_stub) continue; + save_curr_ret_stack = current->curr_ret_stack; if (ftrace_ops_test(&gops->ops, func, NULL) && gops->entryfunc(&trace, gops)) bitmap |= BIT(i); + else + /* Clear out any saved storage */ + current->curr_ret_stack = save_curr_ret_stack; } if (!bitmap) @@ -626,7 +800,8 @@ static unsigned long __ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs * curr_ret_stack is after that. */ barrier(); - current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1; + current->curr_ret_stack = offset - FGRAPH_FRAME_OFFSET; + current->curr_ret_depth--; return ret; } -- cgit v1.2.3 From 3aee48a8e01f98d3285a0064285b0152cdbb8a9e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 May 2024 21:46:57 +0300 Subject: misc: eeprom_93xx46: Hide legacy platform data in the driver First of all, there is no user for the platform data in the kernel. Second, it needs a lot of updates to follow the modern standards of the kernel, including proper Device Tree bindings and device property handling. For now, just hide the legacy platform data in the driver's code. Signed-off-by: Andy Shevchenko Reviewed-by: Linus Walleij Link: https://lore.kernel.org/r/20240508184905.2102633-5-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/misc/eeprom/eeprom_93xx46.c | 35 +++++++++++++++++++++++++++++------ include/linux/eeprom_93xx46.h | 32 -------------------------------- 2 files changed, 29 insertions(+), 38 deletions(-) delete mode 100644 include/linux/eeprom_93xx46.h (limited to 'include') diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c index bbcc9412bb4e..a5a043ddedbb 100644 --- a/drivers/misc/eeprom/eeprom_93xx46.c +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -5,6 +5,7 @@ * (C) 2011 DENX Software Engineering, Anatolij Gustschin */ +#include #include #include #include @@ -19,7 +20,31 @@ #include -#include +struct eeprom_93xx46_platform_data { + unsigned char flags; +#define EE_ADDR8 0x01 /* 8 bit addr. cfg */ +#define EE_ADDR16 0x02 /* 16 bit addr. cfg */ +#define EE_READONLY 0x08 /* forbid writing */ +#define EE_SIZE1K 0x10 /* 1 kb of data, that is a 93xx46 */ +#define EE_SIZE2K 0x20 /* 2 kb of data, that is a 93xx56 */ +#define EE_SIZE4K 0x40 /* 4 kb of data, that is a 93xx66 */ + + unsigned int quirks; +/* Single word read transfers only; no sequential read. */ +#define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ (1 << 0) +/* Instructions such as EWEN are (addrlen + 2) in length. */ +#define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH (1 << 1) +/* Add extra cycle after address during a read */ +#define EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE BIT(2) + + /* + * optional hooks to control additional logic + * before and after spi transfer. + */ + void (*prepare)(void *); + void (*finish)(void *); + struct gpio_desc *select; +}; #define OP_START 0x4 #define OP_WRITE (OP_START | 0x1) @@ -479,11 +504,9 @@ static int eeprom_93xx46_probe(struct spi_device *spi) struct device *dev = &spi->dev; int err; - if (dev_fwnode(dev)) { - err = eeprom_93xx46_probe_fw(dev); - if (err < 0) - return err; - } + err = eeprom_93xx46_probe_fw(dev); + if (err < 0) + return err; pd = spi->dev.platform_data; if (!pd) { diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h deleted file mode 100644 index 34c2175e6a1e..000000000000 --- a/include/linux/eeprom_93xx46.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Module: eeprom_93xx46 - * platform description for 93xx46 EEPROMs. - */ -#include - -struct eeprom_93xx46_platform_data { - unsigned char flags; -#define EE_ADDR8 0x01 /* 8 bit addr. cfg */ -#define EE_ADDR16 0x02 /* 16 bit addr. cfg */ -#define EE_READONLY 0x08 /* forbid writing */ -#define EE_SIZE1K 0x10 /* 1 kb of data, that is a 93xx46 */ -#define EE_SIZE2K 0x20 /* 2 kb of data, that is a 93xx56 */ -#define EE_SIZE4K 0x40 /* 4 kb of data, that is a 93xx66 */ - - unsigned int quirks; -/* Single word read transfers only; no sequential read. */ -#define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ (1 << 0) -/* Instructions such as EWEN are (addrlen + 2) in length. */ -#define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH (1 << 1) -/* Add extra cycle after address during a read */ -#define EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE BIT(2) - - /* - * optional hooks to control additional logic - * before and after spi transfer. - */ - void (*prepare)(void *); - void (*finish)(void *); - struct gpio_desc *select; -}; -- cgit v1.2.3 From 1968845d358e108cfbfba45538d64b3cbdf04ac2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 May 2024 17:51:29 +0300 Subject: driver core: device.h: Group of_node handling declarations and definitions There are a few of_node related APIs defined in the driver core. Group the respective declarations and definitions in the header. There is no functional change. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240531145129.1506733-1-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/device.h b/include/linux/device.h index fc3bd7116ab9..56f266429229 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1031,13 +1031,6 @@ static inline void device_lock_assert(struct device *dev) lockdep_assert_held(&dev->mutex); } -static inline struct device_node *dev_of_node(struct device *dev) -{ - if (!IS_ENABLED(CONFIG_OF) || !dev) - return NULL; - return dev->of_node; -} - static inline bool dev_has_sync_state(struct device *dev) { if (!dev) @@ -1144,10 +1137,18 @@ void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); int device_offline(struct device *dev); int device_online(struct device *dev); + void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); -void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); void device_set_node(struct device *dev, struct fwnode_handle *fwnode); +void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); + +static inline struct device_node *dev_of_node(struct device *dev) +{ + if (!IS_ENABLED(CONFIG_OF) || !dev) + return NULL; + return dev->of_node; +} static inline int dev_num_vf(struct device *dev) { -- cgit v1.2.3 From fd7179ece035417f44f7ecff086d6df674d8a5bd Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 30 May 2024 10:14:08 -0500 Subject: iio: introduce struct iio_scan_type This gives the channel scan_type a named type so that it can be used to simplify code in later commits. Signed-off-by: David Lechner Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240530-iio-add-support-for-multiple-scan-types-v3-1-cbc4acea2cfa@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 55e2b22086a1..19de573a944a 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -173,6 +173,27 @@ struct iio_event_spec { unsigned long mask_shared_by_all; }; +/** + * struct iio_scan_type - specification for channel data format in buffer + * @sign: 's' or 'u' to specify signed or unsigned + * @realbits: Number of valid bits of data + * @storagebits: Realbits + padding + * @shift: Shift right by this before masking out realbits. + * @repeat: Number of times real/storage bits repeats. When the + * repeat element is more than 1, then the type element in + * sysfs will show a repeat value. Otherwise, the number + * of repetitions is omitted. + * @endianness: little or big endian + */ +struct iio_scan_type { + char sign; + u8 realbits; + u8 storagebits; + u8 shift; + u8 repeat; + enum iio_endian endianness; +}; + /** * struct iio_chan_spec - specification of a single channel * @type: What type of measurement is the channel making. @@ -184,17 +205,6 @@ struct iio_event_spec { * @scan_index: Monotonic index to give ordering in scans when read * from a buffer. * @scan_type: struct describing the scan type - * @scan_type.sign: 's' or 'u' to specify signed or unsigned - * @scan_type.realbits: Number of valid bits of data - * @scan_type.storagebits: Realbits + padding - * @scan_type.shift: Shift right by this before masking out - * realbits. - * @scan_type.repeat: Number of times real/storage bits repeats. - * When the repeat element is more than 1, then - * the type element in sysfs will show a repeat - * value. Otherwise, the number of repetitions - * is omitted. - * @scan_type.endianness: little or big endian * @info_mask_separate: What information is to be exported that is specific to * this channel. * @info_mask_separate_available: What availability information is to be @@ -245,14 +255,7 @@ struct iio_chan_spec { int channel2; unsigned long address; int scan_index; - struct { - char sign; - u8 realbits; - u8 storagebits; - u8 shift; - u8 repeat; - enum iio_endian endianness; - } scan_type; + struct iio_scan_type scan_type; long info_mask_separate; long info_mask_separate_available; long info_mask_shared_by_type; -- cgit v1.2.3 From d8f2bb50845f2797f594ffe3cac9417abff4d7b0 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Thu, 30 May 2024 10:14:10 -0500 Subject: iio: add support for multiple scan types per channel This adds new fields to the iio_channel structure to support multiple scan types per channel. This is useful for devices that support multiple resolution modes or other modes that require different data formats of the raw data. To make use of this, drivers need to implement the new callback get_current_scan_type() to resolve the scan type for a given channel based on the current state of the driver. There is a new scan_type_ext field in the iio_channel structure that should be used to store the scan types for any channel that has more than one. There is also a new flag has_ext_scan_type that acts as a type discriminator for the scan_type/ext_scan_type union. A union is used so that we don't grow the size of the iio_channel structure and also makes it clear that scan_type and ext_scan_type are mutually exclusive. The buffer code is the only code in the IIO core code that is using the scan_type field. This patch updates the buffer code to use the new iio_channel_validate_scan_type() function to ensure it is returning the correct scan type for the current state of the device when reading the sysfs attributes. The buffer validation code is also update to validate any additional scan types that are set in the scan_type_ext field. Part of that code is refactored to a new function to avoid duplication. Some userspace tools may need to be updated to re-read the scan type after writing any other attribute. During testing, we noticed that we had to restart iiod to get it to re-read the scan type after enabling oversampling on the ad7380 driver. Signed-off-by: David Lechner Reviewed-by: Nuno Sa Link: https://lore.kernel.org/r/20240530-iio-add-support-for-multiple-scan-types-v3-3-cbc4acea2cfa@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-buffer.c | 101 ++++++++++++++++++++++++++++++-------- include/linux/iio/iio.h | 55 ++++++++++++++++++++- 2 files changed, 133 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 08103a9e77f7..0138b21b244f 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -365,9 +365,16 @@ static ssize_t iio_show_fixed_type(struct device *dev, struct device_attribute *attr, char *buf) { + struct iio_dev *indio_dev = dev_to_iio_dev(dev); struct iio_dev_attr *this_attr = to_iio_dev_attr(attr); - const struct iio_scan_type *scan_type = &this_attr->c->scan_type; - u8 type = scan_type->endianness; + const struct iio_scan_type *scan_type; + u8 type; + + scan_type = iio_get_current_scan_type(indio_dev, this_attr->c); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + + type = scan_type->endianness; if (type == IIO_CPU) { #ifdef __LITTLE_ENDIAN @@ -691,15 +698,18 @@ static ssize_t enable_show(struct device *dev, struct device_attribute *attr, return sysfs_emit(buf, "%d\n", iio_buffer_is_active(buffer)); } -static unsigned int iio_storage_bytes_for_si(struct iio_dev *indio_dev, - unsigned int scan_index) +static int iio_storage_bytes_for_si(struct iio_dev *indio_dev, + unsigned int scan_index) { const struct iio_chan_spec *ch; const struct iio_scan_type *scan_type; unsigned int bytes; ch = iio_find_channel_from_si(indio_dev, scan_index); - scan_type = &ch->scan_type; + scan_type = iio_get_current_scan_type(indio_dev, ch); + if (IS_ERR(scan_type)) + return PTR_ERR(scan_type); + bytes = scan_type->storagebits / 8; if (scan_type->repeat > 1) @@ -708,7 +718,7 @@ static unsigned int iio_storage_bytes_for_si(struct iio_dev *indio_dev, return bytes; } -static unsigned int iio_storage_bytes_for_timestamp(struct iio_dev *indio_dev) +static int iio_storage_bytes_for_timestamp(struct iio_dev *indio_dev) { struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); @@ -726,6 +736,9 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, for_each_set_bit(i, mask, indio_dev->masklength) { length = iio_storage_bytes_for_si(indio_dev, i); + if (length < 0) + return length; + bytes = ALIGN(bytes, length); bytes += length; largest = max(largest, length); @@ -733,6 +746,9 @@ static int iio_compute_scan_bytes(struct iio_dev *indio_dev, if (timestamp) { length = iio_storage_bytes_for_timestamp(indio_dev); + if (length < 0) + return length; + bytes = ALIGN(bytes, length); bytes += length; largest = max(largest, length); @@ -1012,14 +1028,22 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev, indio_dev->masklength, in_ind + 1); while (in_ind != out_ind) { - length = iio_storage_bytes_for_si(indio_dev, in_ind); + ret = iio_storage_bytes_for_si(indio_dev, in_ind); + if (ret < 0) + goto error_clear_mux_table; + + length = ret; /* Make sure we are aligned */ in_loc = roundup(in_loc, length) + length; in_ind = find_next_bit(indio_dev->active_scan_mask, indio_dev->masklength, in_ind + 1); } - length = iio_storage_bytes_for_si(indio_dev, in_ind); + ret = iio_storage_bytes_for_si(indio_dev, in_ind); + if (ret < 0) + goto error_clear_mux_table; + + length = ret; out_loc = roundup(out_loc, length); in_loc = roundup(in_loc, length); ret = iio_buffer_add_demux(buffer, &p, in_loc, out_loc, length); @@ -1030,7 +1054,11 @@ static int iio_buffer_update_demux(struct iio_dev *indio_dev, } /* Relies on scan_timestamp being last */ if (buffer->scan_timestamp) { - length = iio_storage_bytes_for_timestamp(indio_dev); + ret = iio_storage_bytes_for_timestamp(indio_dev); + if (ret < 0) + goto error_clear_mux_table; + + length = ret; out_loc = roundup(out_loc, length); in_loc = roundup(in_loc, length); ret = iio_buffer_add_demux(buffer, &p, in_loc, out_loc, length); @@ -1597,6 +1625,22 @@ static long iio_device_buffer_ioctl(struct iio_dev *indio_dev, struct file *filp } } +static int iio_channel_validate_scan_type(struct device *dev, int ch, + const struct iio_scan_type *scan_type) +{ + /* Verify that sample bits fit into storage */ + if (scan_type->storagebits < scan_type->realbits + scan_type->shift) { + dev_err(dev, + "Channel %d storagebits (%d) < shifted realbits (%d + %d)\n", + ch, scan_type->storagebits, + scan_type->realbits, + scan_type->shift); + return -EINVAL; + } + + return 0; +} + static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, struct iio_dev *indio_dev, int index) @@ -1626,18 +1670,33 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer, if (channels[i].scan_index < 0) continue; - scan_type = &channels[i].scan_type; - - /* Verify that sample bits fit into storage */ - if (scan_type->storagebits < - scan_type->realbits + scan_type->shift) { - dev_err(&indio_dev->dev, - "Channel %d storagebits (%d) < shifted realbits (%d + %d)\n", - i, scan_type->storagebits, - scan_type->realbits, - scan_type->shift); - ret = -EINVAL; - goto error_cleanup_dynamic; + if (channels[i].has_ext_scan_type) { + int j; + + /* + * get_current_scan_type is required when using + * extended scan types. + */ + if (!indio_dev->info->get_current_scan_type) { + ret = -EINVAL; + goto error_cleanup_dynamic; + } + + for (j = 0; j < channels[i].num_ext_scan_type; j++) { + scan_type = &channels[i].ext_scan_type[j]; + + ret = iio_channel_validate_scan_type( + &indio_dev->dev, i, scan_type); + if (ret) + goto error_cleanup_dynamic; + } + } else { + scan_type = &channels[i].scan_type; + + ret = iio_channel_validate_scan_type( + &indio_dev->dev, i, scan_type); + if (ret) + goto error_cleanup_dynamic; } ret = iio_buffer_add_channel_sysfs(indio_dev, buffer, diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 19de573a944a..894309294182 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -204,7 +204,13 @@ struct iio_scan_type { * @address: Driver specific identifier. * @scan_index: Monotonic index to give ordering in scans when read * from a buffer. - * @scan_type: struct describing the scan type + * @scan_type: struct describing the scan type - mutually exclusive + * with ext_scan_type. + * @ext_scan_type: Used in rare cases where there is more than one scan + * format for a channel. When this is used, the flag + * has_ext_scan_type must be set and the driver must + * implement get_current_scan_type in struct iio_info. + * @num_ext_scan_type: Number of elements in ext_scan_type. * @info_mask_separate: What information is to be exported that is specific to * this channel. * @info_mask_separate_available: What availability information is to be @@ -248,6 +254,7 @@ struct iio_scan_type { * attributes but not for event codes. * @output: Channel is output. * @differential: Channel is differential. + * @has_ext_scan_type: True if ext_scan_type is used instead of scan_type. */ struct iio_chan_spec { enum iio_chan_type type; @@ -255,7 +262,13 @@ struct iio_chan_spec { int channel2; unsigned long address; int scan_index; - struct iio_scan_type scan_type; + union { + struct iio_scan_type scan_type; + struct { + const struct iio_scan_type *ext_scan_type; + unsigned int num_ext_scan_type; + }; + }; long info_mask_separate; long info_mask_separate_available; long info_mask_shared_by_type; @@ -273,6 +286,7 @@ struct iio_chan_spec { unsigned indexed:1; unsigned output:1; unsigned differential:1; + unsigned has_ext_scan_type:1; }; @@ -435,6 +449,9 @@ struct iio_trigger; /* forward declaration */ * for better event identification. * @validate_trigger: function to validate the trigger when the * current trigger gets changed. + * @get_current_scan_type: must be implemented by drivers that use ext_scan_type + * in the channel spec to return the index of the currently + * active ext_scan type for a channel. * @update_scan_mode: function to configure device and scan buffer when * channels have changed * @debugfs_reg_access: function to read or write register value of device @@ -519,6 +536,8 @@ struct iio_info { int (*validate_trigger)(struct iio_dev *indio_dev, struct iio_trigger *trig); + int (*get_current_scan_type)(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan); int (*update_scan_mode)(struct iio_dev *indio_dev, const unsigned long *scan_mask); int (*debugfs_reg_access)(struct iio_dev *indio_dev, @@ -804,6 +823,38 @@ static inline bool iio_read_acpi_mount_matrix(struct device *dev, } #endif +/** + * iio_get_current_scan_type - Get the current scan type for a channel + * @indio_dev: the IIO device to get the scan type for + * @chan: the channel to get the scan type for + * + * Most devices only have one scan type per channel and can just access it + * directly without calling this function. Core IIO code and drivers that + * implement ext_scan_type in the channel spec should use this function to + * get the current scan type for a channel. + * + * Returns: the current scan type for the channel or error. + */ +static inline const struct iio_scan_type +*iio_get_current_scan_type(const struct iio_dev *indio_dev, + const struct iio_chan_spec *chan) +{ + int ret; + + if (chan->has_ext_scan_type) { + ret = indio_dev->info->get_current_scan_type(indio_dev, chan); + if (ret < 0) + return ERR_PTR(ret); + + if (ret >= chan->num_ext_scan_type) + return ERR_PTR(-EINVAL); + + return &chan->ext_scan_type[ret]; + } + + return &chan->scan_type; +} + ssize_t iio_format_value(char *buf, unsigned int type, int size, int *vals); int iio_str_to_fixpoint(const char *str, int fract_mult, int *integer, -- cgit v1.2.3 From b6e6aca6c2b1b53fb4db4b672eaa2722a75aa6a2 Mon Sep 17 00:00:00 2001 From: Ramona Gradinariu Date: Mon, 27 May 2024 17:26:12 +0300 Subject: iio: imu: adis_buffer: Add buffer setup API with buffer attributes Add new API called devm_adis_setup_buffer_and_trigger_with_attrs() which also takes buffer attributes as a parameter. Rewrite devm_adis_setup_buffer_and_trigger() implementation such that it calls devm_adis_setup_buffer_and_trigger_with_attrs() with buffer attributes parameter NULL Reviewed-by: Nuno Sa Signed-off-by: Ramona Gradinariu Link: https://lore.kernel.org/r/20240527142618.275897-4-ramona.bolboaca13@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis_buffer.c | 32 ++++++++++++++++++-------------- include/linux/iio/imu/adis.h | 20 ++++++++++++++++---- 2 files changed, 34 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/iio/imu/adis_buffer.c b/drivers/iio/imu/adis_buffer.c index 928933027ae3..871b78b225e2 100644 --- a/drivers/iio/imu/adis_buffer.c +++ b/drivers/iio/imu/adis_buffer.c @@ -175,31 +175,36 @@ static void adis_buffer_cleanup(void *arg) } /** - * devm_adis_setup_buffer_and_trigger() - Sets up buffer and trigger for - * the managed adis device + * devm_adis_setup_buffer_and_trigger_with_attrs() - Sets up buffer and trigger + * for the managed adis device with buffer attributes. * @adis: The adis device * @indio_dev: The IIO device - * @trigger_handler: Optional trigger handler, may be NULL. + * @trigger_handler: Trigger handler: should handle the buffer readings. + * @ops: Optional buffer setup functions, may be NULL. + * @buffer_attrs: Extra buffer attributes. * * Returns 0 on success, a negative error code otherwise. * - * This function sets up the buffer and trigger for a adis devices. If - * 'trigger_handler' is NULL the default trigger handler will be used. The - * default trigger handler will simply read the registers assigned to the - * currently active channels. + * This function sets up the buffer (with buffer setup functions and extra + * buffer attributes) and trigger for a adis devices with buffer attributes. */ int -devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, - irq_handler_t trigger_handler) +devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis, struct iio_dev *indio_dev, + irq_handler_t trigger_handler, + const struct iio_buffer_setup_ops *ops, + const struct iio_dev_attr **buffer_attrs) { int ret; if (!trigger_handler) trigger_handler = adis_trigger_handler; - ret = devm_iio_triggered_buffer_setup(&adis->spi->dev, indio_dev, - &iio_pollfunc_store_time, - trigger_handler, NULL); + ret = devm_iio_triggered_buffer_setup_ext(&adis->spi->dev, indio_dev, + &iio_pollfunc_store_time, + trigger_handler, + IIO_BUFFER_DIRECTION_IN, + ops, + buffer_attrs); if (ret) return ret; @@ -212,5 +217,4 @@ devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, return devm_add_action_or_reset(&adis->spi->dev, adis_buffer_cleanup, adis); } -EXPORT_SYMBOL_NS_GPL(devm_adis_setup_buffer_and_trigger, IIO_ADISLIB); - +EXPORT_SYMBOL_NS_GPL(devm_adis_setup_buffer_and_trigger_with_attrs, IIO_ADISLIB); diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 8898966bc0f0..0fe3a2f63033 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -21,6 +21,7 @@ #define ADIS_REG_PAGE_ID 0x00 struct adis; +struct iio_dev_attr; /** * struct adis_timeouts - ADIS chip variant timeouts @@ -515,11 +516,19 @@ int adis_single_conversion(struct iio_dev *indio_dev, #define ADIS_ROT_CHAN(mod, addr, si, info_sep, info_all, bits) \ ADIS_MOD_CHAN(IIO_ROT, mod, addr, si, info_sep, info_all, bits) +#define devm_adis_setup_buffer_and_trigger(adis, indio_dev, trigger_handler) \ + devm_adis_setup_buffer_and_trigger_with_attrs((adis), (indio_dev), \ + (trigger_handler), NULL, \ + NULL) + #ifdef CONFIG_IIO_ADIS_LIB_BUFFER int -devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, - irq_handler_t trigger_handler); +devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis, + struct iio_dev *indio_dev, + irq_handler_t trigger_handler, + const struct iio_buffer_setup_ops *ops, + const struct iio_dev_attr **buffer_attrs); int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev); @@ -529,8 +538,11 @@ int adis_update_scan_mode(struct iio_dev *indio_dev, #else /* CONFIG_IIO_BUFFER */ static inline int -devm_adis_setup_buffer_and_trigger(struct adis *adis, struct iio_dev *indio_dev, - irq_handler_t trigger_handler) +devm_adis_setup_buffer_and_trigger_with_attrs(struct adis *adis, + struct iio_dev *indio_dev, + irq_handler_t trigger_handler, + const struct iio_buffer_setup_ops *ops, + const struct iio_dev_attr **buffer_attrs) { return 0; } -- cgit v1.2.3 From 01724ce2d9405b2246bbb69701c74880eb56a34b Mon Sep 17 00:00:00 2001 From: Ramona Gradinariu Date: Mon, 27 May 2024 17:26:15 +0300 Subject: iio: imu: adis_trigger: Allow level interrupts for FIFO readings Currently, adis library allows configuration only for edge interrupts, needed for data ready sampling. This patch removes the restriction for level interrupts for devices which have FIFO support. Furthermore, in case of devices which have FIFO support, devm_request_threaded_irq is used for interrupt allocation, to avoid flooding the processor with the FIFO watermark level interrupt, which is active until enough data has been read from the FIFO. Reviewed-by: Nuno Sa Signed-off-by: Ramona Gradinariu Link: https://lore.kernel.org/r/20240527142618.275897-7-ramona.bolboaca13@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/imu/adis_trigger.c | 37 ++++++++++++++++++++++++++----------- include/linux/iio/imu/adis.h | 1 + 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/iio/imu/adis_trigger.c b/drivers/iio/imu/adis_trigger.c index f890bf842db8..a8740b043cfe 100644 --- a/drivers/iio/imu/adis_trigger.c +++ b/drivers/iio/imu/adis_trigger.c @@ -34,17 +34,24 @@ static int adis_validate_irq_flag(struct adis *adis) if (adis->data->unmasked_drdy) adis->irq_flag |= IRQF_NO_AUTOEN; /* - * Typically this devices have data ready either on the rising edge or - * on the falling edge of the data ready pin. This checks enforces that - * one of those is set in the drivers... It defaults to - * IRQF_TRIGGER_RISING for backward compatibility with devices that - * don't support changing the pin polarity. + * Typically adis devices without FIFO have data ready either on the + * rising edge or on the falling edge of the data ready pin. + * IMU devices with FIFO support have the watermark pin level driven + * either high or low when the FIFO is filled with the desired number + * of samples. + * It defaults to IRQF_TRIGGER_RISING for backward compatibility with + * devices that don't support changing the pin polarity. */ if (direction == IRQF_TRIGGER_NONE) { adis->irq_flag |= IRQF_TRIGGER_RISING; return 0; } else if (direction != IRQF_TRIGGER_RISING && - direction != IRQF_TRIGGER_FALLING) { + direction != IRQF_TRIGGER_FALLING && !adis->data->has_fifo) { + dev_err(&adis->spi->dev, "Invalid IRQ mask: %08lx\n", + adis->irq_flag); + return -EINVAL; + } else if (direction != IRQF_TRIGGER_HIGH && + direction != IRQF_TRIGGER_LOW && adis->data->has_fifo) { dev_err(&adis->spi->dev, "Invalid IRQ mask: %08lx\n", adis->irq_flag); return -EINVAL; @@ -77,11 +84,19 @@ int devm_adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev) if (ret) return ret; - ret = devm_request_irq(&adis->spi->dev, adis->spi->irq, - &iio_trigger_generic_data_rdy_poll, - adis->irq_flag, - indio_dev->name, - adis->trig); + if (adis->data->has_fifo) + ret = devm_request_threaded_irq(&adis->spi->dev, adis->spi->irq, + NULL, + &iio_trigger_generic_data_rdy_poll, + adis->irq_flag | IRQF_ONESHOT, + indio_dev->name, + adis->trig); + else + ret = devm_request_irq(&adis->spi->dev, adis->spi->irq, + &iio_trigger_generic_data_rdy_poll, + adis->irq_flag, + indio_dev->name, + adis->trig); if (ret) return ret; diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index 0fe3a2f63033..4bb0a53cf7ea 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -85,6 +85,7 @@ struct adis_data { bool unmasked_drdy; bool has_paging; + bool has_fifo; unsigned int burst_reg_cmd; unsigned int burst_len; -- cgit v1.2.3 From 175d1825ca4d2288fee734ada0955a1e36dd50e6 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 1 Jun 2024 06:22:43 +0900 Subject: scsi: ufs: pci: Add support MCQ for QEMU-based UFS Recently, ufs-mcq feature has been introduced to QEMU hw/ufs device [1]. This patch adds MCQ support for upstream QEMU UFS PCI controller. This patch provides mandatory vops callbacks to make UFS controller work properly on MCQ mode. Operation and Runtime Config register stride is fixed to 48bytes which is implemented by qemu. [1] https://lore.kernel.org/qemu-devel/cover.1716876237.git.jeuk20.kim@samsung.com/ Signed-off-by: Minwoo Im Link: https://lore.kernel.org/r/20240531212244.1593535-2-minwoo.im@samsung.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufs-mcq.c | 14 +++++++++++++ drivers/ufs/host/ufshcd-pci.c | 48 ++++++++++++++++++++++++++++++++++++++++++- include/ufs/ufshcd.h | 1 + 3 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 52210c4c20dc..46faa54aea94 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -18,6 +18,7 @@ #include #define MAX_QUEUE_SUP GENMASK(7, 0) +#define QCFGPTR GENMASK(23, 16) #define UFS_MCQ_MIN_RW_QUEUES 2 #define UFS_MCQ_MIN_READ_QUEUES 0 #define UFS_MCQ_MIN_POLL_QUEUES 0 @@ -116,6 +117,19 @@ struct ufs_hw_queue *ufshcd_mcq_req_to_hwq(struct ufs_hba *hba, return &hba->uhq[hwq]; } +/** + * ufshcd_mcq_queue_cfg_addr - get an start address of the MCQ Queue Config + * Registers. + * @hba: per adapter instance + * + * Return: Start address of MCQ Queue Config Registers in HCI + */ +unsigned int ufshcd_mcq_queue_cfg_addr(struct ufs_hba *hba) +{ + return FIELD_GET(QCFGPTR, hba->mcq_capabilities) * 0x200; +} +EXPORT_SYMBOL_GPL(ufshcd_mcq_queue_cfg_addr); + /** * ufshcd_mcq_decide_queue_depth - decide the queue depth * @hba: per adapter instance diff --git a/drivers/ufs/host/ufshcd-pci.c b/drivers/ufs/host/ufshcd-pci.c index 0aca666d2199..ba8af9c0e77f 100644 --- a/drivers/ufs/host/ufshcd-pci.c +++ b/drivers/ufs/host/ufshcd-pci.c @@ -20,6 +20,8 @@ #include #include +#define MAX_SUPP_MAC 64 + struct ufs_host { void (*late_init)(struct ufs_hba *hba); }; @@ -446,6 +448,49 @@ static int ufs_intel_mtl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static int ufs_qemu_get_hba_mac(struct ufs_hba *hba) +{ + return MAX_SUPP_MAC; +} + +static int ufs_qemu_mcq_config_resource(struct ufs_hba *hba) +{ + hba->mcq_base = hba->mmio_base + ufshcd_mcq_queue_cfg_addr(hba); + + return 0; +} + +static int ufs_qemu_op_runtime_config(struct ufs_hba *hba) +{ + struct ufshcd_mcq_opr_info_t *opr; + int i; + + u32 sqdao = ufsmcq_readl(hba, ufshcd_mcq_cfg_offset(REG_SQDAO, 0)); + u32 sqisao = ufsmcq_readl(hba, ufshcd_mcq_cfg_offset(REG_SQISAO, 0)); + u32 cqdao = ufsmcq_readl(hba, ufshcd_mcq_cfg_offset(REG_CQDAO, 0)); + u32 cqisao = ufsmcq_readl(hba, ufshcd_mcq_cfg_offset(REG_CQISAO, 0)); + + hba->mcq_opr[OPR_SQD].offset = sqdao; + hba->mcq_opr[OPR_SQIS].offset = sqisao; + hba->mcq_opr[OPR_CQD].offset = cqdao; + hba->mcq_opr[OPR_CQIS].offset = cqisao; + + for (i = 0; i < OPR_MAX; i++) { + opr = &hba->mcq_opr[i]; + opr->stride = 48; + opr->base = hba->mmio_base + opr->offset; + } + + return 0; +} + +static struct ufs_hba_variant_ops ufs_qemu_hba_vops = { + .name = "qemu-pci", + .get_hba_mac = ufs_qemu_get_hba_mac, + .mcq_config_resource = ufs_qemu_mcq_config_resource, + .op_runtime_config = ufs_qemu_op_runtime_config, +}; + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .init = ufs_intel_common_init, @@ -591,7 +636,8 @@ static const struct dev_pm_ops ufshcd_pci_pm_ops = { }; static const struct pci_device_id ufshcd_pci_tbl[] = { - { PCI_VENDOR_ID_REDHAT, 0x0013, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { PCI_VENDOR_ID_REDHAT, 0x0013, PCI_ANY_ID, PCI_ANY_ID, 0, 0, + (kernel_ulong_t)&ufs_qemu_hba_vops }, { PCI_VENDOR_ID_SAMSUNG, 0xC00C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, { PCI_VDEVICE(INTEL, 0x9DFA), (kernel_ulong_t)&ufs_intel_cnl_hba_vops }, { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index df68fb1d4f3f..9e0581115b34 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -1278,6 +1278,7 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val); void ufshcd_hba_stop(struct ufs_hba *hba); void ufshcd_schedule_eh_work(struct ufs_hba *hba); void ufshcd_mcq_config_mac(struct ufs_hba *hba, u32 max_active_cmds); +unsigned int ufshcd_mcq_queue_cfg_addr(struct ufs_hba *hba); u32 ufshcd_mcq_read_cqis(struct ufs_hba *hba, int i); void ufshcd_mcq_write_cqis(struct ufs_hba *hba, u32 val, int i); unsigned long ufshcd_mcq_poll_cqe_lock(struct ufs_hba *hba, -- cgit v1.2.3 From ba098ed9829c5de4d65e5708d08d3508f2e70a7d Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Tue, 4 Jun 2024 10:01:48 -0700 Subject: platform/chrome: Add struct ec_response_get_next_event_v3 Add struct ec_response_get_next_event_v3 to upgrade EC_CMD_GET_NEXT_EVENT to version 3. Signed-off-by: Daisuke Nojiri Link: https://lore.kernel.org/r/20240604170552.2517189-1-dnojiri@chromium.org Signed-off-by: Tzung-Bi Shih --- include/linux/platform_data/cros_ec_commands.h | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/linux/platform_data/cros_ec_commands.h b/include/linux/platform_data/cros_ec_commands.h index ecc47d5fe239..ec598057d1da 100644 --- a/include/linux/platform_data/cros_ec_commands.h +++ b/include/linux/platform_data/cros_ec_commands.h @@ -3463,6 +3463,34 @@ union __ec_align_offset1 ec_response_get_next_data_v1 { }; BUILD_ASSERT(sizeof(union ec_response_get_next_data_v1) == 16); +union __ec_align_offset1 ec_response_get_next_data_v3 { + uint8_t key_matrix[18]; + + /* Unaligned */ + uint32_t host_event; + uint64_t host_event64; + + struct __ec_todo_unpacked { + /* For aligning the fifo_info */ + uint8_t reserved[3]; + struct ec_response_motion_sense_fifo_info info; + } sensor_fifo; + + uint32_t buttons; + + uint32_t switches; + + uint32_t fp_events; + + uint32_t sysrq; + + /* CEC events from enum mkbp_cec_event */ + uint32_t cec_events; + + uint8_t cec_message[16]; +}; +BUILD_ASSERT(sizeof(union ec_response_get_next_data_v3) == 18); + struct ec_response_get_next_event { uint8_t event_type; /* Followed by event data if any */ @@ -3475,6 +3503,12 @@ struct ec_response_get_next_event_v1 { union ec_response_get_next_data_v1 data; } __ec_align1; +struct ec_response_get_next_event_v3 { + uint8_t event_type; + /* Followed by event data if any */ + union ec_response_get_next_data_v3 data; +} __ec_align1; + /* Bit indices for buttons and switches.*/ /* Buttons */ #define EC_MKBP_POWER_BUTTON 0 -- cgit v1.2.3 From 106d6739823369c734a8fc3b13634274eee4f60e Mon Sep 17 00:00:00 2001 From: Daisuke Nojiri Date: Tue, 4 Jun 2024 16:08:25 -0700 Subject: platform/chrome: cros_ec_proto: Upgrade get_next_event to v3 Upgrade EC_CMD_GET_NEXT_EVENT to version 3. The max supported version will be v3. So, we speak v3 even if the EC says it supports v4+. Signed-off-by: Daisuke Nojiri Link: https://lore.kernel.org/r/20240604230837.2878737-1-dnojiri@chromium.org [tzungbi: uint32_t -> u32 per suggested by checkpatch.pl] Signed-off-by: Tzung-Bi Shih --- drivers/platform/chrome/cros_ec_proto.c | 27 ++++++++++++++++++--------- include/linux/platform_data/cros_ec_proto.h | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 2ad1b8221ec0..fe68be66ee98 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -684,7 +684,7 @@ EXPORT_SYMBOL(cros_ec_cmd_xfer_status); static int get_next_event_xfer(struct cros_ec_device *ec_dev, struct cros_ec_command *msg, - struct ec_response_get_next_event_v1 *event, + struct ec_response_get_next_event_v3 *event, int version, uint32_t size) { int ret; @@ -707,11 +707,12 @@ static int get_next_event(struct cros_ec_device *ec_dev) { struct { struct cros_ec_command msg; - struct ec_response_get_next_event_v1 event; + struct ec_response_get_next_event_v3 event; } __packed buf; struct cros_ec_command *msg = &buf.msg; - struct ec_response_get_next_event_v1 *event = &buf.event; - const int cmd_version = ec_dev->mkbp_event_supported - 1; + struct ec_response_get_next_event_v3 *event = &buf.event; + int cmd_version = ec_dev->mkbp_event_supported - 1; + u32 size; memset(msg, 0, sizeof(*msg)); if (ec_dev->suspended) { @@ -719,12 +720,20 @@ static int get_next_event(struct cros_ec_device *ec_dev) return -EHOSTDOWN; } - if (cmd_version == 0) - return get_next_event_xfer(ec_dev, msg, event, 0, - sizeof(struct ec_response_get_next_event)); + if (cmd_version == 0) { + size = sizeof(struct ec_response_get_next_event); + } else if (cmd_version < 3) { + size = sizeof(struct ec_response_get_next_event_v1); + } else { + /* + * The max version we support is v3. So, we speak v3 even if the + * EC says it supports v4+. + */ + cmd_version = 3; + size = sizeof(struct ec_response_get_next_event_v3); + } - return get_next_event_xfer(ec_dev, msg, event, cmd_version, - sizeof(struct ec_response_get_next_event_v1)); + return get_next_event_xfer(ec_dev, msg, event, cmd_version, size); } static int get_keyboard_state_event(struct cros_ec_device *ec_dev) diff --git a/include/linux/platform_data/cros_ec_proto.h b/include/linux/platform_data/cros_ec_proto.h index 1ddc52603f9a..6e9225bdf903 100644 --- a/include/linux/platform_data/cros_ec_proto.h +++ b/include/linux/platform_data/cros_ec_proto.h @@ -185,7 +185,7 @@ struct cros_ec_device { bool host_sleep_v1; struct blocking_notifier_head event_notifier; - struct ec_response_get_next_event_v1 event_data; + struct ec_response_get_next_event_v3 event_data; int event_size; u32 host_event_wake_mask; u32 last_resume_result; -- cgit v1.2.3 From 93bbbcb1e762a49fc18ee1272545b77371595f1e Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sat, 25 May 2024 02:30:39 +0000 Subject: mm/memblock: fix a typo in description of for_each_mem_region() No functional change. Signed-off-by: Wei Yang Link: https://lore.kernel.org/r/20240525023040.13509-2-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- include/linux/memblock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e2082240586d..6cf18dc2b4d0 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -565,7 +565,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo } /** - * for_each_mem_region - itereate over memory regions + * for_each_mem_region - iterate over memory regions * @region: loop variable */ #define for_each_mem_region(region) \ -- cgit v1.2.3 From 172e422ffea20a89bfdc672741c1aad6fbb5044e Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Sun, 12 May 2024 13:30:07 +0200 Subject: fsnotify: clear PARENT_WATCHED flags lazily In some setups directories can have many (usually negative) dentries. Hence __fsnotify_update_child_dentry_flags() function can take a significant amount of time. Since the bulk of this function happens under inode->i_lock this causes a significant contention on the lock when we remove the watch from the directory as the __fsnotify_update_child_dentry_flags() call from fsnotify_recalc_mask() races with __fsnotify_update_child_dentry_flags() calls from __fsnotify_parent() happening on children. This can lead upto softlockup reports reported by users. Fix the problem by calling fsnotify_update_children_dentry_flags() to set PARENT_WATCHED flags only when parent starts watching children. When parent stops watching children, clear false positive PARENT_WATCHED flags lazily in __fsnotify_parent() for each accessed child. Suggested-by: Jan Kara Signed-off-by: Amir Goldstein Signed-off-by: Stephen Brennan Signed-off-by: Jan Kara --- fs/notify/fsnotify.c | 31 +++++++++++++++++++++---------- fs/notify/fsnotify.h | 2 +- fs/notify/mark.c | 32 +++++++++++++++++++++++++++++--- include/linux/fsnotify_backend.h | 8 +++++--- 4 files changed, 56 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ff69ae24c4e8..272c8a1dab3c 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -117,17 +117,13 @@ void fsnotify_sb_free(struct super_block *sb) * parent cares. Thus when an event happens on a child it can quickly tell * if there is a need to find a parent and send the event to the parent. */ -void __fsnotify_update_child_dentry_flags(struct inode *inode) +void fsnotify_set_children_dentry_flags(struct inode *inode) { struct dentry *alias; - int watched; if (!S_ISDIR(inode->i_mode)) return; - /* determine if the children should tell inode about their events */ - watched = fsnotify_inode_watches_children(inode); - spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ @@ -143,10 +139,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) continue; spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); - if (watched) - child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; - else - child->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; + child->d_flags |= DCACHE_FSNOTIFY_PARENT_WATCHED; spin_unlock(&child->d_lock); } spin_unlock(&alias->d_lock); @@ -154,6 +147,24 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_unlock(&inode->i_lock); } +/* + * Lazily clear false positive PARENT_WATCHED flag for child whose parent had + * stopped watching children. + */ +static void fsnotify_clear_child_dentry_flag(struct inode *pinode, + struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + /* + * d_lock is a sufficient barrier to prevent observing a non-watched + * parent state from before the fsnotify_set_children_dentry_flags() + * or fsnotify_update_flags() call that had set PARENT_WATCHED. + */ + if (!fsnotify_inode_watches_children(pinode)) + dentry->d_flags &= ~DCACHE_FSNOTIFY_PARENT_WATCHED; + spin_unlock(&dentry->d_lock); +} + /* Are inode/sb/mount interested in parent and name info with this event? */ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, __u32 mask) @@ -228,7 +239,7 @@ int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, p_inode = parent->d_inode; p_mask = fsnotify_inode_watches_children(p_inode); if (unlikely(parent_watched && !p_mask)) - __fsnotify_update_child_dentry_flags(p_inode); + fsnotify_clear_child_dentry_flag(p_inode, dentry); /* * Include parent/name in notification either if some notification diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 2d059f789ee3..663759ed6fbc 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -93,7 +93,7 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) * update the dentry->d_flags of all of inode's children to indicate if inode cares * about events that happen to its children. */ -extern void __fsnotify_update_child_dentry_flags(struct inode *inode); +extern void fsnotify_set_children_dentry_flags(struct inode *inode); extern struct kmem_cache *fsnotify_mark_connector_cachep; diff --git a/fs/notify/mark.c b/fs/notify/mark.c index c3eefa70633c..5e170e713088 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -250,6 +250,24 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) return fsnotify_update_iref(conn, want_iref); } +static bool fsnotify_conn_watches_children( + struct fsnotify_mark_connector *conn) +{ + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) + return false; + + return fsnotify_inode_watches_children(fsnotify_conn_inode(conn)); +} + +static void fsnotify_conn_set_children_dentry_flags( + struct fsnotify_mark_connector *conn) +{ + if (conn->type != FSNOTIFY_OBJ_TYPE_INODE) + return; + + fsnotify_set_children_dentry_flags(fsnotify_conn_inode(conn)); +} + /* * Calculate mask of events for a list of marks. The caller must make sure * connector and connector->obj cannot disappear under us. Callers achieve @@ -258,15 +276,23 @@ static void *__fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) */ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { + bool update_children; + if (!conn) return; spin_lock(&conn->lock); + update_children = !fsnotify_conn_watches_children(conn); __fsnotify_recalc_mask(conn); + update_children &= fsnotify_conn_watches_children(conn); spin_unlock(&conn->lock); - if (conn->type == FSNOTIFY_OBJ_TYPE_INODE) - __fsnotify_update_child_dentry_flags( - fsnotify_conn_inode(conn)); + /* + * Set children's PARENT_WATCHED flags only if parent started watching. + * When parent stops watching, we clear false positive PARENT_WATCHED + * flags lazily in __fsnotify_parent(). + */ + if (update_children) + fsnotify_conn_set_children_dentry_flags(conn); } /* Free all connectors queued for freeing once SRCU period ends */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 4dd6143db271..8be029bc50b1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -594,12 +594,14 @@ static inline __u32 fsnotify_parent_needed_mask(__u32 mask) static inline int fsnotify_inode_watches_children(struct inode *inode) { + __u32 parent_mask = READ_ONCE(inode->i_fsnotify_mask); + /* FS_EVENT_ON_CHILD is set if the inode may care */ - if (!(inode->i_fsnotify_mask & FS_EVENT_ON_CHILD)) + if (!(parent_mask & FS_EVENT_ON_CHILD)) return 0; /* this inode might care about child events, does it care about the * specific set of events that can happen on a child? */ - return inode->i_fsnotify_mask & FS_EVENTS_POSS_ON_CHILD; + return parent_mask & FS_EVENTS_POSS_ON_CHILD; } /* @@ -613,7 +615,7 @@ static inline void fsnotify_update_flags(struct dentry *dentry) /* * Serialisation of setting PARENT_WATCHED on the dentries is provided * by d_lock. If inotify_inode_watched changes after we have taken - * d_lock, the following __fsnotify_update_child_dentry_flags call will + * d_lock, the following fsnotify_set_children_dentry_flags call will * find our entry, so it will spin until we complete here, and update * us with the new state. */ -- cgit v1.2.3 From 61e2bbafb00e4b9a5de45e6448a7b6b818658576 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Fri, 31 May 2024 23:46:34 +0800 Subject: net: remove NULL-pointer net parameter in ip_metrics_convert When I was doing some experiments, I found that when using the first parameter, namely, struct net, in ip_metrics_convert() always triggers NULL pointer crash. Then I digged into this part, realizing that we can remove this one due to its uselessness. Signed-off-by: Jason Xing Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip.h | 3 +-- include/net/tcp.h | 2 +- net/ipv4/fib_semantics.c | 5 ++--- net/ipv4/metrics.c | 8 ++++---- net/ipv4/tcp_cong.c | 11 +++++------ net/ipv6/route.c | 2 +- 6 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 6d735e00d3f3..c5606cadb1a5 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -506,8 +506,7 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return mtu - lwtunnel_headroom(skb_dst(skb)->lwtstate, mtu); } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, - int fc_mx_len, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack); static inline void ip_fib_metrics_put(struct dst_metrics *fib_metrics) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 08c3b99501cf..a70fc39090fe 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1224,7 +1224,7 @@ extern struct tcp_congestion_ops tcp_reno; struct tcp_congestion_ops *tcp_ca_find(const char *name); struct tcp_congestion_ops *tcp_ca_find_key(u32 key); -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca); +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca); #ifdef CONFIG_INET char *tcp_ca_get_name_by_key(u32 key, char *buffer); #else diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f669da98d11d..7b6b042208bd 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1030,7 +1030,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) bool ecn_ca = false; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); } else { if (nla_len(nla) != sizeof(u32)) return false; @@ -1459,8 +1459,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); if (!fi) goto failure; - fi->fib_metrics = ip_fib_metrics_init(fi->fib_net, cfg->fc_mx, - cfg->fc_mx_len, extack); + fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(fi->fib_metrics)) { err = PTR_ERR(fi->fib_metrics); kfree(fi); diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 0e3ee1532848..8ddac1f595ed 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -7,7 +7,7 @@ #include #include -static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, +static int ip_metrics_convert(struct nlattr *fc_mx, int fc_mx_len, u32 *metrics, struct netlink_ext_ack *extack) { @@ -31,7 +31,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, char tmp[TCP_CA_NAME_MAX]; nla_strscpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) { NL_SET_ERR_MSG(extack, "Unknown tcp congestion algorithm"); return -EINVAL; @@ -63,7 +63,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, return 0; } -struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, +struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, int fc_mx_len, struct netlink_ext_ack *extack) { @@ -77,7 +77,7 @@ struct dst_metrics *ip_fib_metrics_init(struct net *net, struct nlattr *fc_mx, if (unlikely(!fib_metrics)) return ERR_PTR(-ENOMEM); - err = ip_metrics_convert(net, fc_mx, fc_mx_len, fib_metrics->metrics, + err = ip_metrics_convert(fc_mx, fc_mx_len, fib_metrics->metrics, extack); if (!err) { refcount_set(&fib_metrics->refcnt, 1); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 28ffcfbeef14..48617d99abb0 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -46,8 +46,7 @@ void tcp_set_ca_state(struct sock *sk, const u8 ca_state) } /* Must be called with rcu lock held */ -static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, - const char *name) +static struct tcp_congestion_ops *tcp_ca_find_autoload(const char *name) { struct tcp_congestion_ops *ca = tcp_ca_find(name); @@ -178,7 +177,7 @@ int tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_cong return ret; } -u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) +u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; @@ -186,7 +185,7 @@ u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) might_sleep(); rcu_read_lock(); - ca = tcp_ca_find_autoload(net, name); + ca = tcp_ca_find_autoload(name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; @@ -283,7 +282,7 @@ int tcp_set_default_congestion_control(struct net *net, const char *name) int ret; rcu_read_lock(); - ca = tcp_ca_find_autoload(net, name); + ca = tcp_ca_find_autoload(name); if (!ca) { ret = -ENOENT; } else if (!bpf_try_module_get(ca, ca->owner)) { @@ -421,7 +420,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, if (!load) ca = tcp_ca_find(name); else - ca = tcp_ca_find_autoload(sock_net(sk), name); + ca = tcp_ca_find_autoload(name); /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a504b88ec06b..12f647d0fec0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3761,7 +3761,7 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, if (!rt) goto out; - rt->fib6_metrics = ip_fib_metrics_init(net, cfg->fc_mx, cfg->fc_mx_len, + rt->fib6_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(rt->fib6_metrics)) { err = PTR_ERR(rt->fib6_metrics); -- cgit v1.2.3 From 6f49c3fb563c0a95a838216eaf7d9b02ece44bf5 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Sat, 1 Jun 2024 00:29:17 +0100 Subject: net: caif: remove unused structs 'cfpktq' has been unused since commit 73d6ac633c6c ("caif: code cleanup"). 'caif_packet_funcs' is declared but never defined. Remove both of them. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Jacob Keller Signed-off-by: David S. Miller --- include/net/caif/caif_layer.h | 2 -- net/caif/cfpkt_skbuff.c | 7 ------- 2 files changed, 9 deletions(-) (limited to 'include') diff --git a/include/net/caif/caif_layer.h b/include/net/caif/caif_layer.h index 51f7bb42a936..0f45d875905f 100644 --- a/include/net/caif/caif_layer.h +++ b/include/net/caif/caif_layer.h @@ -11,9 +11,7 @@ struct cflayer; struct cfpkt; -struct cfpktq; struct caif_payload_info; -struct caif_packet_funcs; #define CAIF_LAYER_NAME_SZ 16 diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c index 7796414d47e5..2ae8cfa3df88 100644 --- a/net/caif/cfpkt_skbuff.c +++ b/net/caif/cfpkt_skbuff.c @@ -21,13 +21,6 @@ do { \ pr_warn(errmsg); \ } while (0) -struct cfpktq { - struct sk_buff_head head; - atomic_t count; - /* Lock protects count updates */ - spinlock_t lock; -}; - /* * net/caif/ is generic and does not * understand SKB, so we do this typecast -- cgit v1.2.3 From 310ec03841a36e3f45fb528f0dfdfe5b9e84b037 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Wed, 5 Jun 2024 13:26:05 +1200 Subject: dma-buf: align fd_flags and heap_flags with dma_heap_allocation_data dma_heap_allocation_data defines the UAPI as follows: struct dma_heap_allocation_data { __u64 len; __u32 fd; __u32 fd_flags; __u64 heap_flags; }; However, dma_heap_buffer_alloc() casts both fd_flags and heap_flags into unsigned int. We're inconsistent with types in the non UAPI arguments. This patch fixes it. Signed-off-by: Barry Song Acked-by: John Stultz Signed-off-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20240605012605.5341-1-21cnbao@gmail.com --- drivers/dma-buf/dma-heap.c | 4 ++-- include/uapi/linux/dma-heap.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 84ae708fafe7..2298ca5e112e 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -50,8 +50,8 @@ static struct class *dma_heap_class; static DEFINE_XARRAY_ALLOC(dma_heap_minors); static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len, - unsigned int fd_flags, - unsigned int heap_flags) + u32 fd_flags, + u64 heap_flags) { struct dma_buf *dmabuf; int fd; diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h index 6f84fa08e074..a4cf716a49fa 100644 --- a/include/uapi/linux/dma-heap.h +++ b/include/uapi/linux/dma-heap.h @@ -19,7 +19,7 @@ #define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE) /* Currently no heap flags */ -#define DMA_HEAP_VALID_HEAP_FLAGS (0) +#define DMA_HEAP_VALID_HEAP_FLAGS (0ULL) /** * struct dma_heap_allocation_data - metadata passed from userspace for -- cgit v1.2.3 From 82dc29b9737edf2d13561ebcf6212c0b88c41129 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 2 Jun 2024 16:18:52 +0200 Subject: devlink: Constify the 'table_ops' parameter of devl_dpipe_table_register() "struct devlink_dpipe_table_ops" only contains some function pointers. Update "struct devlink_dpipe_table" and the 'table_ops' parameter of devl_dpipe_table_register() so that structures in drivers can be constified. Constifying these structures will move some data to a read-only section, so increase overall security. Signed-off-by: Christophe JAILLET Reviewed-by: Wojciech Drewek Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/net/devlink.h | 4 ++-- net/devlink/dpipe.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index 35eb0f884386..db5eff6cb60f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -352,7 +352,7 @@ struct devlink_dpipe_table { bool resource_valid; u64 resource_id; u64 resource_units; - struct devlink_dpipe_table_ops *table_ops; + const struct devlink_dpipe_table_ops *table_ops; struct rcu_head rcu; }; @@ -1751,7 +1751,7 @@ void devl_sb_unregister(struct devlink *devlink, unsigned int sb_index); void devlink_sb_unregister(struct devlink *devlink, unsigned int sb_index); int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern); void devl_dpipe_table_unregister(struct devlink *devlink, const char *table_name); diff --git a/net/devlink/dpipe.c b/net/devlink/dpipe.c index a72a9292efc5..55009b377447 100644 --- a/net/devlink/dpipe.c +++ b/net/devlink/dpipe.c @@ -839,7 +839,7 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); */ int devl_dpipe_table_register(struct devlink *devlink, const char *table_name, - struct devlink_dpipe_table_ops *table_ops, + const struct devlink_dpipe_table_ops *table_ops, void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; -- cgit v1.2.3 From 5bbe5872fed4497a192556426d75fd9223c5bfeb Mon Sep 17 00:00:00 2001 From: Xianwei Zhao Date: Wed, 29 May 2024 11:10:33 +0800 Subject: dt-bindings: power: add Amlogic A4 power domains Add devicetree binding document and related header file for Amlogic A4 secure power domains. Acked-by: Krzysztof Kozlowski Signed-off-by: Xianwei Zhao Link: https://lore.kernel.org/r/20240529-a4_secpowerdomain-v2-1-47502fc0eaf3@amlogic.com Signed-off-by: Ulf Hansson --- .../bindings/power/amlogic,meson-sec-pwrc.yaml | 1 + include/dt-bindings/power/amlogic,a4-pwrc.h | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 include/dt-bindings/power/amlogic,a4-pwrc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml index dab3d92bc273..59915f9d4860 100644 --- a/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml +++ b/Documentation/devicetree/bindings/power/amlogic,meson-sec-pwrc.yaml @@ -20,6 +20,7 @@ properties: enum: - amlogic,meson-a1-pwrc - amlogic,meson-s4-pwrc + - amlogic,a4-pwrc - amlogic,c3-pwrc - amlogic,t7-pwrc diff --git a/include/dt-bindings/power/amlogic,a4-pwrc.h b/include/dt-bindings/power/amlogic,a4-pwrc.h new file mode 100644 index 000000000000..bd2f9c558d22 --- /dev/null +++ b/include/dt-bindings/power/amlogic,a4-pwrc.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR MIT) */ +/* + * Copyright (C) 2024 Amlogic, Inc. All rights reserved + */ +#ifndef _DT_BINDINGS_AMLOGIC_A4_POWER_H +#define _DT_BINDINGS_AMLOGIC_A4_POWER_H + +#define PWRC_A4_AUDIO_ID 0 +#define PWRC_A4_SDIOA_ID 1 +#define PWRC_A4_EMMC_ID 2 +#define PWRC_A4_USB_COMB_ID 3 +#define PWRC_A4_ETH_ID 4 +#define PWRC_A4_VOUT_ID 5 +#define PWRC_A4_AUDIO_PDM_ID 6 +#define PWRC_A4_DMC_ID 7 +#define PWRC_A4_SYS_WRAP_ID 8 +#define PWRC_A4_AO_I2C_S_ID 9 +#define PWRC_A4_AO_UART_ID 10 +#define PWRC_A4_AO_IR_ID 11 + +#endif -- cgit v1.2.3 From f086edef71be7174a16c1ed67ac65a085cda28b1 Mon Sep 17 00:00:00 2001 From: Kevin Yang Date: Mon, 3 Jun 2024 21:30:54 +0000 Subject: tcp: add sysctl_tcp_rto_min_us Adding a sysctl knob to allow user to specify a default rto_min at socket init time, other than using the hard coded 200ms default rto_min. Note that the rto_min route option has the highest precedence for configuring this setting, followed by the TCP_BPF_RTO_MIN socket option, followed by the tcp_rto_min_us sysctl. Signed-off-by: Kevin Yang Reviewed-by: Neal Cardwell Reviewed-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Tony Lu Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 13 +++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 8 ++++++++ net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_ipv4.c | 1 + 5 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index bd50df6a5a42..6e99eccdb837 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1196,6 +1196,19 @@ tcp_pingpong_thresh - INTEGER Default: 1 +tcp_rto_min_us - INTEGER + Minimal TCP retransmission timeout (in microseconds). Note that the + rto_min route option has the highest precedence for configuring this + setting, followed by the TCP_BPF_RTO_MIN socket option, followed by + this tcp_rto_min_us sysctl. + + The recommended practice is to use a value less or equal to 200000 + microseconds. + + Possible Values: 1 - INT_MAX + + Default: 200000 + UDP variables ============= diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index c356c458b340..a91bb971f901 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -170,6 +170,7 @@ struct netns_ipv4 { u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; u8 sysctl_tcp_timestamps; + int sysctl_tcp_rto_min_us; u8 sysctl_tcp_recovery; u8 sysctl_tcp_thin_linear_timeouts; u8 sysctl_tcp_slow_start_after_idle; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index d7892f34a15b..bb64c0ef092d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1503,6 +1503,14 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ONE, }, + { + .procname = "tcp_rto_min_us", + .data = &init_net.ipv4.sysctl_tcp_rto_min_us, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, }; static __net_init int ipv4_sysctl_init_net(struct net *net) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5fa68e7f6ddb..fa43aaacd92b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -420,6 +420,7 @@ void tcp_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); + int rto_min_us; tp->out_of_order_queue = RB_ROOT; sk->tcp_rtx_queue = RB_ROOT; @@ -428,7 +429,8 @@ void tcp_init_sock(struct sock *sk) INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; - icsk->icsk_rto_min = TCP_RTO_MIN; + rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us); + icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us); icsk->icsk_delack_max = TCP_DELACK_MAX; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3ef4b274c24b..3613e08ca794 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -3502,6 +3502,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_shrink_window = 0; net->ipv4.sysctl_tcp_pingpong_thresh = 1; + net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); return 0; } -- cgit v1.2.3 From 6fef518594bcb7e374f809717281bd02894929f8 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 25 Apr 2024 15:07:01 -0700 Subject: KVM: x86: Add a capability to configure bus frequency for APIC timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add KVM_CAP_X86_APIC_BUS_CYCLES_NS capability to configure the APIC bus clock frequency for APIC timer emulation. Allow KVM_ENABLE_CAPABILITY(KVM_CAP_X86_APIC_BUS_CYCLES_NS) to set the frequency in nanoseconds. When using this capability, the user space VMM should configure CPUID leaf 0x15 to advertise the frequency. Vishal reported that the TDX guest kernel expects a 25MHz APIC bus frequency but ends up getting interrupts at a significantly higher rate. The TDX architecture hard-codes the core crystal clock frequency to 25MHz and mandates exposing it via CPUID leaf 0x15. The TDX architecture does not allow the VMM to override the value. In addition, per Intel SDM: "The APIC timer frequency will be the processor’s bus clock or core crystal clock frequency (when TSC/core crystal clock ratio is enumerated in CPUID leaf 0x15) divided by the value specified in the divide configuration register." The resulting 25MHz APIC bus frequency conflicts with the KVM hardcoded APIC bus frequency of 1GHz. The KVM doesn't enumerate CPUID leaf 0x15 to the guest unless the user space VMM sets it using KVM_SET_CPUID. If the CPUID leaf 0x15 is enumerated, the guest kernel uses it as the APIC bus frequency. If not, the guest kernel measures the frequency based on other known timers like the ACPI timer or the legacy PIT. As reported by Vishal the TDX guest kernel expects a 25MHz timer frequency but gets timer interrupt more frequently due to the 1GHz frequency used by KVM. To ensure that the guest doesn't have a conflicting view of the APIC bus frequency, allow the userspace to tell KVM to use the same frequency that TDX mandates instead of the default 1Ghz. Reported-by: Vishal Annapurve Closes: https://lore.kernel.org/lkml/20231006011255.4163884-1-vannapurve@google.com Signed-off-by: Isaku Yamahata Reviewed-by: Rick Edgecombe Co-developed-by: Reinette Chatre Signed-off-by: Reinette Chatre Reviewed-by: Xiaoyao Li Reviewed-by: Yuan Yao Link: https://lore.kernel.org/r/6748a4c12269e756f0c48680da8ccc5367c31ce7.1714081726.git.reinette.chatre@intel.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 17 +++++++++++++++++ arch/x86/kvm/x86.c | 27 +++++++++++++++++++++++++++ include/uapi/linux/kvm.h | 1 + 3 files changed, 45 insertions(+) (limited to 'include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a71d91978d9e..cb31ae071cba 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8070,6 +8070,23 @@ error/annotated fault. See KVM_EXIT_MEMORY_FAULT for more information. +7.35 KVM_CAP_X86_APIC_BUS_CYCLES_NS +----------------------------------- + +:Architectures: x86 +:Target: VM +:Parameters: args[0] is the desired APIC bus clock rate, in nanoseconds +:Returns: 0 on success, -EINVAL if args[0] contains an invalid value for the + frequency or if any vCPUs have been created, -ENXIO if a virtual + local APIC has not been created using KVM_CREATE_IRQCHIP. + +This capability sets the VM's APIC bus clock frequency, used by KVM's in-kernel +virtual APIC when emulating APIC timers. KVM's default value can be retrieved +by KVM_CHECK_EXTENSION. + +Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the +core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest. + 8. Other capabilities. ====================== diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9bdc841d5928..09e3460bcbc5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4706,6 +4706,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MEMORY_FAULT_INFO: r = 1; break; + case KVM_CAP_X86_APIC_BUS_CYCLES_NS: + r = APIC_BUS_CYCLE_NS_DEFAULT; + break; case KVM_CAP_EXIT_HYPERCALL: r = KVM_EXIT_HYPERCALL_VALID_MASK; break; @@ -6746,6 +6749,30 @@ split_irqchip_unlock: } mutex_unlock(&kvm->lock); break; + case KVM_CAP_X86_APIC_BUS_CYCLES_NS: { + u64 bus_cycle_ns = cap->args[0]; + u64 unused; + + /* + * Guard against overflow in tmict_to_ns(). 128 is the highest + * divide value that can be programmed in APIC_TDCR. + */ + r = -EINVAL; + if (!bus_cycle_ns || + check_mul_overflow((u64)U32_MAX * 128, bus_cycle_ns, &unused)) + break; + + r = 0; + mutex_lock(&kvm->lock); + if (!irqchip_in_kernel(kvm)) + r = -ENXIO; + else if (kvm->created_vcpus) + r = -EINVAL; + else + kvm->arch.apic_bus_cycle_ns = bus_cycle_ns; + mutex_unlock(&kvm->lock); + break; + } default: r = -EINVAL; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..ec998e6b6555 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -917,6 +917,7 @@ struct kvm_enable_cap { #define KVM_CAP_MEMORY_ATTRIBUTES 233 #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From f0dc887f21d18791037c0166f652c67da761f16f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 27 May 2024 17:34:47 -0700 Subject: sched/core: Move preempt_model_*() helpers from sched.h to preempt.h Move the declarations and inlined implementations of the preempt_model_*() helpers to preempt.h so that they can be referenced in spinlock.h without creating a potential circular dependency between spinlock.h and sched.h. No functional change intended. Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ankur Arora Link: https://lkml.kernel.org/r/20240528003521.979836-2-ankur.a.arora@oracle.com --- include/linux/preempt.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 41 ----------------------------------------- 2 files changed, 41 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 7233e9cf1bab..ce76f1a45722 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -481,4 +481,45 @@ DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) +#ifdef CONFIG_PREEMPT_DYNAMIC + +extern bool preempt_model_none(void); +extern bool preempt_model_voluntary(void); +extern bool preempt_model_full(void); + +#else + +static inline bool preempt_model_none(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_NONE); +} +static inline bool preempt_model_voluntary(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); +} +static inline bool preempt_model_full(void) +{ + return IS_ENABLED(CONFIG_PREEMPT); +} + +#endif + +static inline bool preempt_model_rt(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_RT); +} + +/* + * Does the preemption model allow non-cooperative preemption? + * + * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with + * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the + * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the + * PREEMPT_NONE model. + */ +static inline bool preempt_model_preemptible(void) +{ + return preempt_model_full() || preempt_model_rt(); +} + #endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 61591ac6eab6..90691d99027e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2064,47 +2064,6 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock); __cond_resched_rwlock_write(lock); \ }) -#ifdef CONFIG_PREEMPT_DYNAMIC - -extern bool preempt_model_none(void); -extern bool preempt_model_voluntary(void); -extern bool preempt_model_full(void); - -#else - -static inline bool preempt_model_none(void) -{ - return IS_ENABLED(CONFIG_PREEMPT_NONE); -} -static inline bool preempt_model_voluntary(void) -{ - return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); -} -static inline bool preempt_model_full(void) -{ - return IS_ENABLED(CONFIG_PREEMPT); -} - -#endif - -static inline bool preempt_model_rt(void) -{ - return IS_ENABLED(CONFIG_PREEMPT_RT); -} - -/* - * Does the preemption model allow non-cooperative preemption? - * - * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with - * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the - * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the - * PREEMPT_NONE model. - */ -static inline bool preempt_model_preemptible(void) -{ - return preempt_model_full() || preempt_model_rt(); -} - static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); -- cgit v1.2.3 From c793a62823d1ce8f70d9cfc7803e3ea436277cda Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 27 May 2024 17:34:48 -0700 Subject: sched/core: Drop spinlocks on contention iff kernel is preemptible Use preempt_model_preemptible() to detect a preemptible kernel when deciding whether or not to reschedule in order to drop a contended spinlock or rwlock. Because PREEMPT_DYNAMIC selects PREEMPTION, kernels built with PREEMPT_DYNAMIC=y will yield contended locks even if the live preemption model is "none" or "voluntary". In short, make kernels with dynamically selected models behave the same as kernels with statically selected models. Somewhat counter-intuitively, NOT yielding a lock can provide better latency for the relevant tasks/processes. E.g. KVM x86's mmu_lock, a rwlock, is often contended between an invalidation event (takes mmu_lock for write) and a vCPU servicing a guest page fault (takes mmu_lock for read). For _some_ setups, letting the invalidation task complete even if there is mmu_lock contention provides lower latency for *all* tasks, i.e. the invalidation completes sooner *and* the vCPU services the guest page fault sooner. But even KVM's mmu_lock behavior isn't uniform, e.g. the "best" behavior can vary depending on the host VMM, the guest workload, the number of vCPUs, the number of pCPUs in the host, why there is lock contention, etc. In other words, simply deleting the CONFIG_PREEMPTION guard (or doing the opposite and removing contention yielding entirely) needs to come with a big pile of data proving that changing the status quo is a net positive. Opportunistically document this side effect of preempt=full, as yielding contended spinlocks can have significant, user-visible impact. Fixes: c597bfddc9e9 ("sched: Provide Kconfig support for default dynamic preempt mode") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Ankur Arora Reviewed-by: Chen Yu Link: https://lore.kernel.org/kvm/ef81ff36-64bb-4cfe-ae9b-e3acf47bff24@proxmox.com --- Documentation/admin-guide/kernel-parameters.txt | 4 +++- include/linux/spinlock.h | 14 ++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 500cfa776225..555e6b53eb41 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4752,7 +4752,9 @@ none - Limited to cond_resched() calls voluntary - Limited to cond_resched() and might_sleep() calls full - Any section that isn't explicitly preempt disabled - can be preempted anytime. + can be preempted anytime. Tasks will also yield + contended spinlocks (if the critical section isn't + explicitly preempt disabled beyond the lock itself). print-fatal-signals= [KNL] debug: print fatal signals diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3fcd20de6ca8..63dd8cf3c3c2 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -462,11 +462,10 @@ static __always_inline int spin_is_contended(spinlock_t *lock) */ static inline int spin_needbreak(spinlock_t *lock) { -#ifdef CONFIG_PREEMPTION + if (!preempt_model_preemptible()) + return 0; + return spin_is_contended(lock); -#else - return 0; -#endif } /* @@ -479,11 +478,10 @@ static inline int spin_needbreak(spinlock_t *lock) */ static inline int rwlock_needbreak(rwlock_t *lock) { -#ifdef CONFIG_PREEMPTION + if (!preempt_model_preemptible()) + return 0; + return rwlock_is_contended(lock); -#else - return 0; -#endif } /* -- cgit v1.2.3 From dff60734fc7606fabde668ab6a26feacec8787cc Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 4 Jun 2024 17:52:56 +0200 Subject: vfs: retire user_path_at_empty and drop empty arg from getname_flags No users after do_readlinkat started doing the job on its own. Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240604155257.109500-3-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fsopen.c | 2 +- fs/namei.c | 16 +++++++--------- fs/stat.c | 6 +++--- include/linux/fs.h | 2 +- include/linux/namei.h | 8 +------- io_uring/statx.c | 3 +-- io_uring/xattr.c | 4 ++-- 7 files changed, 16 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/fs/fsopen.c b/fs/fsopen.c index 18fe979da7e2..ed2dd000622e 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -448,7 +448,7 @@ SYSCALL_DEFINE5(fsconfig, fallthrough; case FSCONFIG_SET_PATH: param.type = fs_value_is_filename; - param.name = getname_flags(_value, lookup_flags, NULL); + param.name = getname_flags(_value, lookup_flags); if (IS_ERR(param.name)) { ret = PTR_ERR(param.name); goto out_key; diff --git a/fs/namei.c b/fs/namei.c index 37fb0a8aa09a..950ad6bdd9fe 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -126,7 +126,7 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) struct filename * -getname_flags(const char __user *filename, int flags, int *empty) +getname_flags(const char __user *filename, int flags) { struct filename *result; char *kname; @@ -190,8 +190,6 @@ getname_flags(const char __user *filename, int flags, int *empty) atomic_set(&result->refcnt, 1); /* The empty path is special. */ if (unlikely(!len)) { - if (empty) - *empty = 1; if (!(flags & LOOKUP_EMPTY)) { putname(result); return ERR_PTR(-ENOENT); @@ -209,13 +207,13 @@ getname_uflags(const char __user *filename, int uflags) { int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0; - return getname_flags(filename, flags, NULL); + return getname_flags(filename, flags); } struct filename * getname(const char __user * filename) { - return getname_flags(filename, 0, NULL); + return getname_flags(filename, 0); } struct filename * @@ -2922,16 +2920,16 @@ int path_pts(struct path *path) } #endif -int user_path_at_empty(int dfd, const char __user *name, unsigned flags, - struct path *path, int *empty) +int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) { - struct filename *filename = getname_flags(name, flags, empty); + struct filename *filename = getname_flags(name, flags); int ret = filename_lookup(dfd, filename, flags, path, NULL); putname(filename); return ret; } -EXPORT_SYMBOL(user_path_at_empty); +EXPORT_SYMBOL(user_path_at); int __check_sticky(struct mnt_idmap *idmap, struct inode *dir, struct inode *inode) diff --git a/fs/stat.c b/fs/stat.c index 7f7861544500..16aa1f5ceec4 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -300,7 +300,7 @@ int vfs_fstatat(int dfd, const char __user *filename, return vfs_fstat(dfd, stat); } - name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL); + name = getname_flags(filename, getname_statx_lookup_flags(statx_flags)); ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS); putname(name); @@ -496,7 +496,7 @@ static int do_readlinkat(int dfd, const char __user *pathname, return -EINVAL; retry: - name = getname_flags(pathname, lookup_flags, NULL); + name = getname_flags(pathname, lookup_flags); error = filename_lookup(dfd, name, lookup_flags, &path, NULL); if (unlikely(error)) { putname(name); @@ -710,7 +710,7 @@ SYSCALL_DEFINE5(statx, int ret; struct filename *name; - name = getname_flags(filename, getname_statx_lookup_flags(flags), NULL); + name = getname_flags(filename, getname_statx_lookup_flags(flags)); ret = do_statx(dfd, name, flags, mask, buffer); putname(name); diff --git a/include/linux/fs.h b/include/linux/fs.h index 639885621608..bfc1e6407bf6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2701,7 +2701,7 @@ static inline struct file *file_clone_open(struct file *file) } extern int filp_close(struct file *, fl_owner_t id); -extern struct filename *getname_flags(const char __user *, int, int *); +extern struct filename *getname_flags(const char __user *, int); extern struct filename *getname_uflags(const char __user *, int); extern struct filename *getname(const char __user *); extern struct filename *getname_kernel(const char *); diff --git a/include/linux/namei.h b/include/linux/namei.h index 967aa9ea9f96..8ec8fed3bce8 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -50,13 +50,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT}; extern int path_pts(struct path *path); -extern int user_path_at_empty(int, const char __user *, unsigned, struct path *, int *empty); - -static inline int user_path_at(int dfd, const char __user *name, unsigned flags, - struct path *path) -{ - return user_path_at_empty(dfd, name, flags, path, NULL); -} +extern int user_path_at(int, const char __user *, unsigned, struct path *); struct dentry *lookup_one_qstr_excl(const struct qstr *name, struct dentry *base, diff --git a/io_uring/statx.c b/io_uring/statx.c index abb874209caa..f7f9b202eec0 100644 --- a/io_uring/statx.c +++ b/io_uring/statx.c @@ -37,8 +37,7 @@ int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sx->flags = READ_ONCE(sqe->statx_flags); sx->filename = getname_flags(path, - getname_statx_lookup_flags(sx->flags), - NULL); + getname_statx_lookup_flags(sx->flags)); if (IS_ERR(sx->filename)) { int ret = PTR_ERR(sx->filename); diff --git a/io_uring/xattr.c b/io_uring/xattr.c index 44905b82eea8..6cf41c3bc369 100644 --- a/io_uring/xattr.c +++ b/io_uring/xattr.c @@ -96,7 +96,7 @@ int io_getxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); - ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + ix->filename = getname_flags(path, LOOKUP_FOLLOW); if (IS_ERR(ix->filename)) { ret = PTR_ERR(ix->filename); ix->filename = NULL; @@ -189,7 +189,7 @@ int io_setxattr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); - ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + ix->filename = getname_flags(path, LOOKUP_FOLLOW); if (IS_ERR(ix->filename)) { ret = PTR_ERR(ix->filename); ix->filename = NULL; -- cgit v1.2.3 From 758191c9ea7bcc45dd99398a538ae4ab27c4029e Mon Sep 17 00:00:00 2001 From: Yoray Zack Date: Tue, 4 Jun 2024 00:22:17 +0300 Subject: net/mlx5e: SHAMPO, Use KSMs instead of KLMs KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact, it is a faster mechanism than KLM. SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer. As it used KLMs with the same buffer size for each entry, we can use KSMs instead. This commit changes the Mkeys that map the SHAMPO headers buffer from KLMs to KSMs. Signed-off-by: Yoray Zack Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240603212219.1037656-13-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 +------ .../net/ethernet/mellanox/mlx5/core/en/params.c | 12 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 19 +++++++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 ++++--- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 65 +++++++++++----------- include/linux/mlx5/device.h | 1 + 6 files changed, 71 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ff326601d4a4..bec784d25d7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -80,6 +80,7 @@ struct page_pool; SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define MLX5E_RX_MAX_HEAD (256) +#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8) #define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9) #define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE) #define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64) @@ -146,25 +147,6 @@ struct page_pool; #define MLX5E_TX_XSK_POLL_BUDGET 64 #define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */ -#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\ - (sizeof(struct mlx5e_umr_wqe) +\ - (sizeof(struct mlx5_klm) * (sgl_len))) - -#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \ - (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB)) - -#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\ - (DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS)) - -#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\ - (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm)) - -#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\ - ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT) - -#define MLX5E_MAX_KLM_PER_WQE(mdev) \ - MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) - #define mlx5e_state_dereference(priv, p) \ rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index ec819dfc98be..6c9ccccca81e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -1071,18 +1071,18 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param) { - int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest; + int max_num_of_umr_per_wqe, max_hd_per_wqe, max_ksm_per_umr, rest; void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq); int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz)); u32 wqebbs; - max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev); + max_ksm_per_umr = MLX5E_MAX_KSM_PER_WQE(mdev); max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param); - max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr; - rest = max_hd_per_wqe % max_klm_per_umr; - wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe; + max_num_of_umr_per_wqe = max_hd_per_wqe / max_ksm_per_umr; + rest = max_hd_per_wqe % max_ksm_per_umr; + wqebbs = MLX5E_KSM_UMR_WQEBBS(max_ksm_per_umr) * max_num_of_umr_per_wqe; if (rest) - wqebbs += MLX5E_KLM_UMR_WQEBBS(rest); + wqebbs += MLX5E_KSM_UMR_WQEBBS(rest); wqebbs *= wq_size; return wqebbs; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 879d698b6119..d1f0f868d494 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -34,6 +34,25 @@ #define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND) +#define MLX5E_KSM_UMR_WQE_SZ(sgl_len)\ + (sizeof(struct mlx5e_umr_wqe) +\ + (sizeof(struct mlx5_ksm) * (sgl_len))) + +#define MLX5E_KSM_UMR_WQEBBS(ksm_entries) \ + (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_BB)) + +#define MLX5E_KSM_UMR_DS_CNT(ksm_entries)\ + (DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_DS)) + +#define MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size)\ + (((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_ksm)) + +#define MLX5E_KSM_ENTRIES_PER_WQE(wqe_size)\ + ALIGN_DOWN(MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT) + +#define MLX5E_MAX_KSM_PER_WQE(mdev) \ + MLX5E_KSM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev)) + static inline ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d21a87ddc934..2a3e0de51f0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -504,8 +504,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev, return err; } -static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, - u64 nentries, +static int mlx5e_create_umr_ksm_mkey(struct mlx5_core_dev *mdev, + u64 nentries, u8 log_entry_size, u32 *umr_mkey) { int inlen; @@ -525,12 +525,13 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev, MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KSM); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET(mkc, mkc, translations_octword_size, nentries); - MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, log_page_size, log_entry_size); + MLX5_SET64(mkc, mkc, len, nentries << log_entry_size); err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); kvfree(in); @@ -565,14 +566,16 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq) { - u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); + u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size)); - if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) { - mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", - max_klm_size, rq->mpwqe.shampo->hd_per_wq); + if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) { + mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n", + max_ksm_size, rq->mpwqe.shampo->hd_per_wq); return -EINVAL; } - return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + + return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq, + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE, &rq->mpwqe.shampo->mkey); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3af4f70de334..f1fbf60d0356 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -619,25 +619,25 @@ static int bitmap_find_window(unsigned long *bitmap, int len, return min(len, count); } -static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, - __be32 key, u16 offset, u16 klm_len, u16 wqe_bbs) +static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe, + __be32 key, u16 offset, u16 ksm_len) { - memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms)); + memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_ksms)); umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); umr_wqe->ctrl.umr_mkey = key; umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) - | MLX5E_KLM_UMR_DS_CNT(klm_len)); + | MLX5E_KSM_UMR_DS_CNT(ksm_len)); umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE; umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset); - umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len); + umr_wqe->uctrl.xlt_octowords = cpu_to_be16(ksm_len); umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); } static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, struct mlx5e_icosq *sq, - u16 klm_entries, u16 index) + u16 ksm_entries, u16 index) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; u16 entries, pi, header_offset, err, wqe_bbs, new_entries; @@ -650,20 +650,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, int headroom, i; headroom = rq->buff.headroom; - new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); - entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); - wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries); + new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1)); + entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT); + wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries); pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs); umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi); - build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs); + build_ksm_umr(sq, umr_wqe, shampo->key, index, entries); frag_page = &shampo->pages[page_index]; for (i = 0; i < entries; i++, index++) { dma_info = &shampo->info[index]; - if (i >= klm_entries || (index < shampo->pi && shampo->pi - index < - MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)) - goto update_klm; + if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index < + MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT)) + goto update_ksm; header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) << MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE; if (!(header_offset & (PAGE_SIZE - 1))) { @@ -683,12 +683,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq, dma_info->frag_page = frag_page; } -update_klm: - umr_wqe->inline_klms[i].bcount = - cpu_to_be32(MLX5E_RX_MAX_HEAD); - umr_wqe->inline_klms[i].key = cpu_to_be32(lkey); - umr_wqe->inline_klms[i].va = - cpu_to_be64(dma_info->addr + headroom); +update_ksm: + umr_wqe->inline_ksms[i] = (struct mlx5_ksm) { + .key = cpu_to_be32(lkey), + .va = cpu_to_be64(dma_info->addr + headroom), + }; } sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { @@ -720,37 +719,37 @@ err_unmap: static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) { struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; - u16 klm_entries, num_wqe, index, entries_before; + u16 ksm_entries, num_wqe, index, entries_before; struct mlx5e_icosq *sq = rq->icosq; - int i, err, max_klm_entries, len; + int i, err, max_ksm_entries, len; - max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev); - klm_entries = bitmap_find_window(shampo->bitmap, + max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev); + ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); - if (!klm_entries) + if (!ksm_entries) return 0; - klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1)); - index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); + ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1)); + index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT); entries_before = shampo->hd_per_wq - index; - if (unlikely(entries_before < klm_entries)) - num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) + - DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries); + if (unlikely(entries_before < ksm_entries)) + num_wqe = DIV_ROUND_UP(entries_before, max_ksm_entries) + + DIV_ROUND_UP(ksm_entries - entries_before, max_ksm_entries); else - num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries); + num_wqe = DIV_ROUND_UP(ksm_entries, max_ksm_entries); for (i = 0; i < num_wqe; i++) { - len = (klm_entries > max_klm_entries) ? max_klm_entries : - klm_entries; + len = (ksm_entries > max_ksm_entries) ? max_ksm_entries : + ksm_entries; if (unlikely(index + len > shampo->hd_per_wq)) len = shampo->hd_per_wq - index; err = mlx5e_build_shampo_hd_umr(rq, sq, len, index); if (unlikely(err)) return err; index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1); - klm_entries -= len; + ksm_entries -= len; } return 0; diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index d7bb31d9a446..da09bfaa7b81 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -294,6 +294,7 @@ enum { #define MLX5_UMR_FLEX_ALIGNMENT 0x40 #define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt)) #define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm)) +#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm)) #define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8) -- cgit v1.2.3 From 99be56171fa9ffea494dfe3d4a7f6e7e51630c2e Mon Sep 17 00:00:00 2001 From: Yoray Zack Date: Tue, 4 Jun 2024 00:22:18 +0300 Subject: net/mlx5e: SHAMPO, Re-enable HW-GRO Add back HW-GRO to the reported features. As the current implementation of HW-GRO uses KSMs with a specific fixed buffer size (256B) to map its headers buffer, we reported the feature only if the NIC is supporting KSM and the minimum value for buffer size is below the requested one. iperf3 bandwidth comparison: +---------+--------+--------+-----------+ | streams | SW GRO | HW GRO | Unit | |---------+--------+--------+-----------| | 1 | 36 | 42 | Gbits/sec | | 4 | 34 | 39 | Gbits/sec | | 8 | 31 | 35 | Gbits/sec | +---------+--------+--------+-----------+ A downstream patch will add skb fragment coalescing which will improve performance considerably. Benchmark details: VM based setup CPU: Intel(R) Xeon(R) Platinum 8380 CPU, 24 cores NIC: ConnectX-7 100GbE iperf3 and irq running on same CPU over a single receive queue Signed-off-by: Yoray Zack Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240603212219.1037656-14-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 26 +++++++++++++++++++++++ include/linux/mlx5/mlx5_ifc.h | 16 +++++++++----- 2 files changed, 37 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2a3e0de51f0e..44a64d062e42 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -74,6 +74,27 @@ #include "lib/devcom.h" #include "lib/sd.h" +static bool mlx5e_hw_gro_supported(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, shampo)) + return false; + + /* Our HW-GRO implementation relies on "KSM Mkey" for + * SHAMPO headers buffer mapping + */ + if (!MLX5_CAP_GEN(mdev, fixed_buffer_size)) + return false; + + if (!MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer_valid)) + return false; + + if (MLX5_CAP_GEN_2(mdev, min_mkey_log_entity_size_fixed_buffer) > + MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE) + return false; + + return true; +} + bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode) { @@ -5331,6 +5352,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; netdev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; + if (mlx5e_hw_gro_supported(mdev) && + mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT, + MLX5E_MPWRQ_UMR_MODE_ALIGNED)) + netdev->hw_features |= NETIF_F_GRO_HW; + if (mlx5e_tunnel_any_tx_proto_supported(mdev)) { netdev->hw_enc_features |= NETIF_F_HW_CSUM; netdev->hw_enc_features |= NETIF_F_TSO; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..17acd0f3ca8e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1526,8 +1526,7 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 ts_cqe_to_dest_cqn[0x1]; u8 reserved_at_b3[0x6]; u8 go_back_n[0x1]; - u8 shampo[0x1]; - u8 reserved_at_bb[0x5]; + u8 reserved_at_ba[0x6]; u8 max_sgl_for_optimized_performance[0x8]; u8 log_max_cq_sz[0x8]; @@ -1744,7 +1743,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; - u8 reserved_at_2a0[0x10]; + u8 reserved_at_2a0[0xb]; + u8 shampo[0x1]; + u8 reserved_at_2ac[0x4]; u8 max_wqe_sz_rq[0x10]; u8 max_flow_counter_31_16[0x10]; @@ -2017,7 +2018,8 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 reserved_at_250[0x10]; u8 reserved_at_260[0x120]; - u8 reserved_at_380[0x10]; + u8 reserved_at_380[0xb]; + u8 min_mkey_log_entity_size_fixed_buffer[0x5]; u8 ec_vf_vport_base[0x10]; u8 reserved_at_3a0[0x10]; @@ -2029,7 +2031,11 @@ struct mlx5_ifc_cmd_hca_cap_2_bits { u8 pcc_ifa2[0x1]; u8 reserved_at_3f1[0xf]; - u8 reserved_at_400[0x400]; + u8 reserved_at_400[0x1]; + u8 min_mkey_log_entity_size_fixed_buffer_valid[0x1]; + u8 reserved_at_402[0x1e]; + + u8 reserved_at_420[0x3e0]; }; enum mlx5_ifc_flow_destination_type { -- cgit v1.2.3 From f1180fd2a7c039691b64ebf404c746a74e40b7b0 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Wed, 5 Jun 2024 07:13:39 +0000 Subject: mm/mm_init.c: not always search next deferred_init_pfn from very beginning In function deferred_init_memmap(), we call deferred_init_mem_pfn_range_in_zone() to get the next deferred_init_pfn. But we always search it from the very beginning. Since we save the index in i, we can leverage this to search from i next time. [rppt refine the comment] Signed-off-by: Wei Yang Link: https://lore.kernel.org/all/20240605071339.15330-1-richard.weiyang@gmail.com Signed-off-by: Mike Rapoport (IBM) --- include/linux/memblock.h | 19 ------------------- mm/mm_init.c | 23 ++++++++++++++--------- 2 files changed, 14 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6cf18dc2b4d0..45cac33334c8 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone, unsigned long *out_spfn, unsigned long *out_epfn); -/** - * for_each_free_mem_pfn_range_in_zone - iterate through zone specific free - * memblock areas - * @i: u64 used as loop variable - * @zone: zone in which all of the memory blocks reside - * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL - * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL - * - * Walks over free (memory && !reserved) areas of memblock in a specific - * zone. Available once memblock and an empty zone is initialized. The main - * assumption is that the zone start, end, and pgdat have been associated. - * This way we can use the zone to determine NUMA node, and if a given part - * of the memblock is valid for the zone. - */ -#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \ - for (i = 0, \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \ - i != U64_MAX; \ - __next_mem_pfn_range_in_zone(&i, zone, p_start, p_end)) /** * for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific diff --git a/mm/mm_init.c b/mm/mm_init.c index c9c8c7458f27..5a4c846393b7 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2021,24 +2021,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone, } /* - * This function is meant to pre-load the iterator for the zone init. - * Specifically it walks through the ranges until we are caught up to the - * first_init_pfn value and exits there. If we never encounter the value we - * return false indicating there are no valid ranges left. + * This function is meant to pre-load the iterator for the zone init from + * a given point. + * Specifically it walks through the ranges starting with initial index + * passed to it until we are caught up to the first_init_pfn value and + * exits there. If we never encounter the value we return false indicating + * there are no valid ranges left. */ static bool __init deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone, unsigned long *spfn, unsigned long *epfn, unsigned long first_init_pfn) { - u64 j; + u64 j = *i; + + if (j == 0) + __next_mem_pfn_range_in_zone(&j, zone, spfn, epfn); /* * Start out by walking through the ranges in this zone that have * already been initialized. We don't need to do anything with them * so we just need to flush them out of the system. */ - for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) { + for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) { if (*epfn <= first_init_pfn) continue; if (*spfn < first_init_pfn) @@ -2110,7 +2115,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn, { unsigned long spfn, epfn; struct zone *zone = arg; - u64 i; + u64 i = 0; deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn); @@ -2141,7 +2146,7 @@ static int __init deferred_init_memmap(void *data) unsigned long start = jiffies; struct zone *zone; int max_threads; - u64 i; + u64 i = 0; /* Bind memory initialisation thread to a local node if possible */ if (!cpumask_empty(cpumask)) @@ -2216,7 +2221,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order) unsigned long first_deferred_pfn = pgdat->first_deferred_pfn; unsigned long spfn, epfn, flags; unsigned long nr_pages = 0; - u64 i; + u64 i = 0; /* Only the last zone may have deferred pages */ if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat)) -- cgit v1.2.3 From b4cb4a1391dcdc640c4ade003aaf0ee19cc8d509 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 4 Jun 2024 11:16:03 +0000 Subject: net: use unrcu_pointer() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Toke mentioned unrcu_pointer() existence, allowing to remove some of the ugly casts we have when using xchg() for rcu protected pointers. Also make inet_rcv_compat const. Signed-off-by: Eric Dumazet Cc: Toke Høiland-Jørgensen Reviewed-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/r/20240604111603.45871-1-edumazet@google.com Signed-off-by: Paolo Abeni --- include/net/sock.h | 2 +- net/core/gen_estimator.c | 2 +- net/core/sock_diag.c | 8 +++----- net/ipv4/cipso_ipv4.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_fastopen.c | 7 ++++--- net/ipv4/udp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/ip6_fib.c | 2 +- net/ipv6/ipv6_sockglue.c | 3 +-- net/ipv6/route.c | 6 +++--- net/sched/act_api.c | 2 +- 12 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 953c8dc4e259..b30ea0c342a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2095,7 +2095,7 @@ sk_dst_set(struct sock *sk, struct dst_entry *dst) sk_tx_queue_clear(sk); WRITE_ONCE(sk->sk_dst_pending_confirm, 0); - old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst); + old_dst = unrcu_pointer(xchg(&sk->sk_dst_cache, RCU_INITIALIZER(dst))); dst_release(old_dst); } diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index fae9c4694186..412816076b8b 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -206,7 +206,7 @@ void gen_kill_estimator(struct net_rate_estimator __rcu **rate_est) { struct net_rate_estimator *est; - est = xchg((__force struct net_rate_estimator **)rate_est, NULL); + est = unrcu_pointer(xchg(rate_est, NULL)); if (est) { timer_shutdown_sync(&est->timer); kfree_rcu(est, rcu); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 654122838025..a08eed9b9142 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -18,7 +18,7 @@ static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX]; -static struct sock_diag_inet_compat __rcu *inet_rcv_compat; +static const struct sock_diag_inet_compat __rcu *inet_rcv_compat; static struct workqueue_struct *broadcast_wq; @@ -187,8 +187,7 @@ void sock_diag_broadcast_destroy(struct sock *sk) void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr) { - xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, - ptr); + xchg(&inet_rcv_compat, RCU_INITIALIZER(ptr)); } EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat); @@ -196,8 +195,7 @@ void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr) { const struct sock_diag_inet_compat *old; - old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat, - NULL); + old = unrcu_pointer(xchg(&inet_rcv_compat, NULL)); WARN_ON_ONCE(old != ptr); } EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index dd6d46015058..3a95c0f13ce3 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1953,7 +1953,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); + opt = unrcu_pointer(xchg(&req_inet->ireq_opt, RCU_INITIALIZER(opt))); if (opt) kfree_rcu(opt, rcu); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index fa43aaacd92b..f727bc8d82a6 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3081,7 +3081,7 @@ int tcp_disconnect(struct sock *sk, int flags) icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); - dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL)); + dst_release(unrcu_pointer(xchg(&sk->sk_rx_dst, NULL))); tcp_saved_syn_free(tp); tp->compressed_ack = 0; tp->segs_in = 0; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 8ed54e7334a9..0f523cbfe329 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -49,7 +49,7 @@ void tcp_fastopen_ctx_destroy(struct net *net) { struct tcp_fastopen_context *ctxt; - ctxt = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, NULL); + ctxt = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, NULL)); if (ctxt) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); @@ -80,9 +80,10 @@ int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; - octx = xchg((__force struct tcp_fastopen_context **)&q->ctx, ctx); + octx = unrcu_pointer(xchg(&q->ctx, RCU_INITIALIZER(ctx))); } else { - octx = xchg((__force struct tcp_fastopen_context **)&net->ipv4.tcp_fastopen_ctx, ctx); + octx = unrcu_pointer(xchg(&net->ipv4.tcp_fastopen_ctx, + RCU_INITIALIZER(ctx))); } if (octx) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 189c9113fe9a..c9ca6d285347 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2230,7 +2230,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) struct dst_entry *old; if (dst_hold_safe(dst)) { - old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst); + old = unrcu_pointer(xchg(&sk->sk_rx_dst, RCU_INITIALIZER(dst))); dst_release(old); return old != dst; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 8041dc181bd4..e03fb9a1dbeb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -509,7 +509,7 @@ void inet6_cleanup_sock(struct sock *sk) /* Free tx options */ - opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + opt = unrcu_pointer(xchg(&np->opt, NULL)); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 31d77885bcae..15f9abe50656 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -984,7 +984,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, if (pcpu_rt && rcu_access_pointer(pcpu_rt->from) == match) { struct fib6_info *from; - from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); fib6_info_release(from); } } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d4c28ec1bc51..cd342d5015c6 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -111,8 +111,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, - opt); + opt = unrcu_pointer(xchg(&inet6_sk(sk)->opt, RCU_INITIALIZER(opt))); sk_dst_reset(sk); return opt; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12f647d0fec0..ae6fbdf8995d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -368,7 +368,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) in6_dev_put(idev); } - from = xchg((__force struct fib6_info **)&rt->from, NULL); + from = unrcu_pointer(xchg(&rt->from, NULL)); fib6_info_release(from); } @@ -1437,7 +1437,7 @@ static struct rt6_info *rt6_make_pcpu_route(struct net *net, if (res->f6i->fib6_destroying) { struct fib6_info *from; - from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); + from = unrcu_pointer(xchg(&pcpu_rt->from, NULL)); fib6_info_release(from); } @@ -1466,7 +1466,7 @@ static void rt6_remove_exception(struct rt6_exception_bucket *bucket, /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); + from = unrcu_pointer(xchg(&rt6_ex->rt6i->from, NULL)); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9ee622fb1160..7458b3154426 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -62,7 +62,7 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, { struct tc_cookie *old; - old = xchg((__force struct tc_cookie **)old_cookie, new_cookie); + old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie))); if (old) call_rcu(&old->rcu, tcf_free_cookie_rcu); } -- cgit v1.2.3 From c34506406dd5cfb352f8c53bb6a1b9535c0905dd Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:51 +0000 Subject: tcp: small changes in reqsk_put() and reqsk_free() In reqsk_free(), use DEBUG_NET_WARN_ON_ONCE() instead of WARN_ON_ONCE() for a condition which never fired. In reqsk_put() directly call __reqsk_free(), there is no point checking rsk_refcnt again right after a transition to zero. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index ebcb8896bffc..a8f82216c628 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -172,14 +172,14 @@ static inline void __reqsk_free(struct request_sock *req) static inline void reqsk_free(struct request_sock *req) { - WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); + DEBUG_NET_WARN_ON_ONCE(refcount_read(&req->rsk_refcnt) != 0); __reqsk_free(req); } static inline void reqsk_put(struct request_sock *req) { if (refcount_dec_and_test(&req->rsk_refcnt)) - reqsk_free(req); + __reqsk_free(req); } /* -- cgit v1.2.3 From 6971d21672827a701c5ea180891b7ea6cf06f6a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Jun 2024 07:15:53 +0000 Subject: tcp: move reqsk_alloc() to inet_connection_sock.c reqsk_alloc() has a single caller, no need to expose it in include/net/request_sock.h. Signed-off-by: Eric Dumazet Reviewed-by: Kuniyuki Iwashima Signed-off-by: Paolo Abeni --- include/net/request_sock.h | 33 --------------------------------- net/ipv4/inet_connection_sock.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index a8f82216c628..b07b1cd14e9f 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -128,39 +128,6 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb, return sk; } -static inline struct request_sock * -reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, - bool attach_listener) -{ - struct request_sock *req; - - req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); - if (!req) - return NULL; - req->rsk_listener = NULL; - if (attach_listener) { - if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { - kmem_cache_free(ops->slab, req); - return NULL; - } - req->rsk_listener = sk_listener; - } - req->rsk_ops = ops; - req_to_sk(req)->sk_prot = sk_listener->sk_prot; - sk_node_init(&req_to_sk(req)->sk_node); - sk_tx_queue_clear(req_to_sk(req)); - req->saved_syn = NULL; - req->syncookie = 0; - req->timeout = 0; - req->num_timeout = 0; - req->num_retrans = 0; - req->sk = NULL; - refcount_set(&req->rsk_refcnt, 0); - - return req; -} -#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) - static inline void __reqsk_free(struct request_sock *req) { req->rsk_ops->destructor(req); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index a9d2e6308910..7ced569778ab 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -911,6 +911,39 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +static struct request_sock * +reqsk_alloc_noprof(const struct request_sock_ops *ops, struct sock *sk_listener, + bool attach_listener) +{ + struct request_sock *req; + + req = kmem_cache_alloc_noprof(ops->slab, GFP_ATOMIC | __GFP_NOWARN); + if (!req) + return NULL; + req->rsk_listener = NULL; + if (attach_listener) { + if (unlikely(!refcount_inc_not_zero(&sk_listener->sk_refcnt))) { + kmem_cache_free(ops->slab, req); + return NULL; + } + req->rsk_listener = sk_listener; + } + req->rsk_ops = ops; + req_to_sk(req)->sk_prot = sk_listener->sk_prot; + sk_node_init(&req_to_sk(req)->sk_node); + sk_tx_queue_clear(req_to_sk(req)); + req->saved_syn = NULL; + req->syncookie = 0; + req->timeout = 0; + req->num_timeout = 0; + req->num_retrans = 0; + req->sk = NULL; + refcount_set(&req->rsk_refcnt, 0); + + return req; +} +#define reqsk_alloc(...) alloc_hooks(reqsk_alloc_noprof(__VA_ARGS__)) + struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener, bool attach_listener) -- cgit v1.2.3 From 3ed96977a3c5b0a9b017d626600402be3089d4fc Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Tue, 4 Jun 2024 13:54:38 -0400 Subject: drm/mm: Remove unused drm_mm_replace_node Last caller was removed with commit 078a5b498d6a ("drm/tests: Remove slow tests"). Cc: Maxime Ripard Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240604175438.48125-1-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/drm_mm.c | 35 ----------------------------------- include/drm/drm_mm.h | 1 - 2 files changed, 36 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 8257f9d4f619..5ace481c1901 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -648,41 +648,6 @@ void drm_mm_remove_node(struct drm_mm_node *node) } EXPORT_SYMBOL(drm_mm_remove_node); -/** - * drm_mm_replace_node - move an allocation from @old to @new - * @old: drm_mm_node to remove from the allocator - * @new: drm_mm_node which should inherit @old's allocation - * - * This is useful for when drivers embed the drm_mm_node structure and hence - * can't move allocations by reassigning pointers. It's a combination of remove - * and insert with the guarantee that the allocation start will match. - */ -void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) -{ - struct drm_mm *mm = old->mm; - - DRM_MM_BUG_ON(!drm_mm_node_allocated(old)); - - *new = *old; - - __set_bit(DRM_MM_NODE_ALLOCATED_BIT, &new->flags); - list_replace(&old->node_list, &new->node_list); - rb_replace_node_cached(&old->rb, &new->rb, &mm->interval_tree); - - if (drm_mm_hole_follows(old)) { - list_replace(&old->hole_stack, &new->hole_stack); - rb_replace_node_cached(&old->rb_hole_size, - &new->rb_hole_size, - &mm->holes_size); - rb_replace_node(&old->rb_hole_addr, - &new->rb_hole_addr, - &mm->holes_addr); - } - - clear_bit_unlock(DRM_MM_NODE_ALLOCATED_BIT, &old->flags); -} -EXPORT_SYMBOL(drm_mm_replace_node); - /** * DOC: lru scan roster * diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index ac33ba1b18bc..f654874c4ce6 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -463,7 +463,6 @@ static inline int drm_mm_insert_node(struct drm_mm *mm, } void drm_mm_remove_node(struct drm_mm_node *node); -void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); void drm_mm_init(struct drm_mm *mm, u64 start, u64 size); void drm_mm_takedown(struct drm_mm *mm); -- cgit v1.2.3 From ababa16fd9bd0e2727a1c31c4fb68d6be053bddc Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 6 Jun 2024 11:42:38 +0200 Subject: irqchip/gic-v3: Enable non-coherent redistributors/ITSes ACPI probing The GIC architecture specification defines a set of registers for redistributors and ITSes that control the sharebility and cacheability attributes of redistributors/ITSes initiator ports on the interconnect (GICR_[V]PROPBASER, GICR_[V]PENDBASER, GITS_BASER). Architecturally the GIC provides a means to drive shareability and cacheability attributes signals but it is not mandatory for designs to wire up the corresponding interconnect signals that control the cacheability/shareability of transactions. Redistributors and ITSes interconnect ports can be connected to non-coherent interconnects that are not able to manage the shareability/cacheability attributes; this implicitly makes the redistributors and ITSes non-coherent observers. To enable non-coherent GIC designs on ACPI based systems, parse the MADT GICC/GICR/ITS subtables non-coherent flags to determine whether the respective components are non-coherent observers and force the shareability attributes to be programmed into the redistributors and ITSes registers. An ACPI global function (acpi_get_madt_revision()) is added to retrieve the MADT revision, in that it is essential to check the MADT revision before checking for flags that were added with MADT revision 7 so that if the kernel is booted with an ACPI MADT table with revision < 7 it skips parsing the newly added flags (that should be zeroed reserved values for MADT versions < 7 but they could turn out to be buggy and should be ignored). Signed-off-by: Lorenzo Pieralisi Signed-off-by: Thomas Gleixner Reviewed-by: Robin Murphy Acked-by: Marc Zyngier Cc: "Rafael J. Wysocki" Link: https://lore.kernel.org/r/20240606094238.757649-2-lpieralisi@kernel.org --- drivers/acpi/processor_core.c | 15 +++++++++++++++ drivers/irqchip/irq-gic-v3-its.c | 4 ++++ drivers/irqchip/irq-gic-v3.c | 9 +++++++++ include/linux/acpi.h | 3 +++ 4 files changed, 31 insertions(+) (limited to 'include') diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index b203cfe28550..915713c0e9b7 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -215,6 +215,21 @@ phys_cpuid_t __init acpi_map_madt_entry(u32 acpi_id) return rv; } +int __init acpi_get_madt_revision(void) +{ + struct acpi_table_header *madt = NULL; + int revision; + + if (ACPI_FAILURE(acpi_get_table(ACPI_SIG_MADT, 0, &madt))) + return -EINVAL; + + revision = madt->revision; + + acpi_put_table(madt); + + return revision; +} + static phys_cpuid_t map_mat_entry(acpi_handle handle, int type, u32 acpi_id) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 40ebf1726393..af5297ef2293 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -5600,6 +5600,10 @@ static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, goto node_err; } + if (acpi_get_madt_revision() >= 7 && + (its_entry->flags & ACPI_MADT_ITS_NON_COHERENT)) + its->flags |= ITS_FLAGS_FORCE_NON_SHAREABLE; + err = its_probe_one(its); if (!err) return 0; diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 6fb276504bcc..e4bc5f04226e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -2349,6 +2349,11 @@ gic_acpi_parse_madt_redist(union acpi_subtable_headers *header, pr_err("Couldn't map GICR region @%llx\n", redist->base_address); return -ENOMEM; } + + if (acpi_get_madt_revision() >= 7 && + (redist->flags & ACPI_MADT_GICR_NON_COHERENT)) + gic_data.rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; + gic_request_region(redist->base_address, redist->length, "GICR"); gic_acpi_register_redist(redist->base_address, redist_base); @@ -2373,6 +2378,10 @@ gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, return -ENOMEM; gic_request_region(gicc->gicr_base_address, size, "GICR"); + if (acpi_get_madt_revision() >= 7 && + (gicc->flags & ACPI_MADT_GICC_NON_COHERENT)) + gic_data.rdists.flags |= RDIST_FLAGS_FORCE_NON_SHAREABLE; + gic_acpi_register_redist(gicc->gicr_base_address, redist_base); return 0; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..000d339e1596 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -279,6 +279,9 @@ static inline bool invalid_phys_cpuid(phys_cpuid_t phys_id) return phys_id == PHYS_CPUID_INVALID; } + +int __init acpi_get_madt_revision(void); + /* Validate the processor object's proc_id */ bool acpi_duplicate_processor_id(int proc_id); /* Processor _CTS control */ -- cgit v1.2.3 From 0ee14725471cea66e03e3cd4f4c582d759de502c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 6 Jun 2024 15:46:09 +0100 Subject: mm/util: Swap kmemdup_array() arguments GCC 14.1 complains about the argument usage of kmemdup_array(): drivers/soc/tegra/fuse/fuse-tegra.c:130:65: error: 'kmemdup_array' sizes specified with 'sizeof' in the earlier argument and not in the later argument [-Werror=calloc-transposed-args] 130 | fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups), | ^ drivers/soc/tegra/fuse/fuse-tegra.c:130:65: note: earlier argument should specify number of elements, later size of each element The annotation introduced by commit 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers") lets the compiler think that kmemdup_array() follows the same format as calloc(), with the number of elements preceding the size of one element. So we could simply swap the arguments to __realloc_size() to get rid of that warning, but it seems cleaner to instead have kmemdup_array() follow the same format as krealloc_array(), memdup_array_user(), calloc() etc. Fixes: 7d78a7773355 ("string: Add additional __realloc_size() annotations for "dup" helpers") Signed-off-by: Jean-Philippe Brucker Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240606144608.97817-2-jean-philippe@linaro.org Signed-off-by: Kees Cook --- drivers/soc/tegra/fuse/fuse-tegra.c | 4 ++-- include/linux/string.h | 2 +- lib/fortify_kunit.c | 2 +- mm/util.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index b6bfd6729df3..d27667283846 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -127,8 +127,8 @@ static void tegra_fuse_print_sku_info(struct tegra_sku_info *tegra_sku_info) static int tegra_fuse_add_lookups(struct tegra_fuse *fuse) { - fuse->lookups = kmemdup_array(fuse->soc->lookups, sizeof(*fuse->lookups), - fuse->soc->num_lookups, GFP_KERNEL); + fuse->lookups = kmemdup_array(fuse->soc->lookups, fuse->soc->num_lookups, + sizeof(*fuse->lookups), GFP_KERNEL); if (!fuse->lookups) return -ENOMEM; diff --git a/include/linux/string.h b/include/linux/string.h index 60168aa2af07..9edace076ddb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -289,7 +289,7 @@ extern void *kmemdup_noprof(const void *src, size_t len, gfp_t gfp) __realloc_si extern void *kvmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2); extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp); -extern void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +extern void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) __realloc_size(2, 3); /* lib/argv_split.c */ diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c index f9cc467334ce..e17d520f532c 100644 --- a/lib/fortify_kunit.c +++ b/lib/fortify_kunit.c @@ -374,7 +374,7 @@ static const char * const test_strs[] = { for (i = 0; i < ARRAY_SIZE(test_strs); i++) { \ len = strlen(test_strs[i]); \ KUNIT_EXPECT_EQ(test, __builtin_constant_p(len), 0); \ - checker(len, kmemdup_array(test_strs[i], len, 1, gfp), \ + checker(len, kmemdup_array(test_strs[i], 1, len, gfp), \ kfree(p)); \ checker(len, kmemdup(test_strs[i], len, gfp), \ kfree(p)); \ diff --git a/mm/util.c b/mm/util.c index c9e519e6811f..6682097372ef 100644 --- a/mm/util.c +++ b/mm/util.c @@ -139,14 +139,14 @@ EXPORT_SYMBOL(kmemdup_noprof); * kmemdup_array - duplicate a given array. * * @src: array to duplicate. - * @element_size: size of each element of array. * @count: number of elements to duplicate from array. + * @element_size: size of each element of array. * @gfp: GFP mask to use. * * Return: duplicated array of @src or %NULL in case of error, * result is physically contiguous. Use kfree() to free. */ -void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp) +void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp) { return kmemdup(src, size_mul(element_size, count), gfp); } -- cgit v1.2.3 From 0d11307022978f1f395da587285c06c9cea47288 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 23 May 2024 19:44:29 +0200 Subject: drm/print: Add missing [drm] prefix to drm based WARN All drm_device based logging macros, except those related to WARN, include the [drm] prefix. Fix that. [ ] 0000:00:00.0: this is a warning [ ] 0000:00:00.0: drm_WARN_ON(true) vs [ ] 0000:00:00.0: [drm] this is a warning [ ] 0000:00:00.0: [drm] drm_WARN_ON(true) Signed-off-by: Michal Wajdeczko Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240523174429.800-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- include/drm/drm_print.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 089950ad8681..112f8830b372 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -632,12 +632,12 @@ void __drm_err(const char *format, ...); /* Helper for struct drm_device based WARNs */ #define drm_WARN(drm, condition, format, arg...) \ - WARN(condition, "%s %s: " format, \ + WARN(condition, "%s %s: [drm] " format, \ dev_driver_string((drm)->dev), \ dev_name((drm)->dev), ## arg) #define drm_WARN_ONCE(drm, condition, format, arg...) \ - WARN_ONCE(condition, "%s %s: " format, \ + WARN_ONCE(condition, "%s %s: [drm] " format, \ dev_driver_string((drm)->dev), \ dev_name((drm)->dev), ## arg) -- cgit v1.2.3 From 0d5edcc60abe9a02501f01e032bfa2432c1364de Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Thu, 16 May 2024 18:00:15 +0200 Subject: drm/print: Kill ___drm_dbg() There is no point in maintaining a separate print function, while there is __drm_dev_dbg() function that can work with a NULL device. Signed-off-by: Michal Wajdeczko Cc: Jani Nikula Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240516160015.2260-1-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/drm_print.c | 19 ------------------- include/drm/drm_print.h | 8 +++----- 2 files changed, 3 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index cf2efb44722c..7e16ce7c7c7a 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -323,25 +323,6 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev, } EXPORT_SYMBOL(__drm_dev_dbg); -void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const char *format, ...) -{ - struct va_format vaf; - va_list args; - - if (!__drm_debug_enabled(category)) - return; - - va_start(args, format); - vaf.fmt = format; - vaf.va = &args; - - printk(KERN_DEBUG "[" DRM_NAME ":%ps] %pV", - __builtin_return_address(0), &vaf); - - va_end(args); -} -EXPORT_SYMBOL(___drm_dbg); - void __drm_err(const char *format, ...) { struct va_format vaf; diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 112f8830b372..679a2086fbea 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -527,17 +527,15 @@ void __drm_dev_dbg(struct _ddebug *desc, const struct device *dev, * Prefer drm_device based logging over device or prink based logging. */ -__printf(3, 4) -void ___drm_dbg(struct _ddebug *desc, enum drm_debug_category category, const char *format, ...); __printf(1, 2) void __drm_err(const char *format, ...); #if !defined(CONFIG_DRM_USE_DYNAMIC_DEBUG) -#define __drm_dbg(cat, fmt, ...) ___drm_dbg(NULL, cat, fmt, ##__VA_ARGS__) +#define __drm_dbg(cat, fmt, ...) __drm_dev_dbg(NULL, NULL, cat, fmt, ##__VA_ARGS__) #else #define __drm_dbg(cat, fmt, ...) \ - _dynamic_func_call_cls(cat, fmt, ___drm_dbg, \ - cat, fmt, ##__VA_ARGS__) + _dynamic_func_call_cls(cat, fmt, __drm_dev_dbg, \ + NULL, cat, fmt, ##__VA_ARGS__) #endif /* Macros to make printk easier */ -- cgit v1.2.3 From c2ef66e9ad882ab4b055a86657c20c61d203f003 Mon Sep 17 00:00:00 2001 From: Michal Wajdeczko Date: Fri, 17 May 2024 18:34:05 +0200 Subject: drm/print: Improve drm_dbg_printer With recent introduction of a generic drm dev printk function, we can now store and use location where drm_dbg_printer was invoked and output it's symbolic name like we do for all drm debug prints. Cc: Jani Nikula Reviewed-by: Jani Nikula Signed-off-by: Michal Wajdeczko Link: https://patchwork.freedesktop.org/patch/msgid/20240517163406.2348-3-michal.wajdeczko@intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/drm_print.c | 3 +-- include/drm/drm_print.h | 2 ++ 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index 94e8f3542846..cf24dfdeb6b2 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -217,8 +217,7 @@ void __drm_printfn_dbg(struct drm_printer *p, struct va_format *vaf) if (!__drm_debug_enabled(category)) return; - /* Note: __builtin_return_address(0) is useless here. */ - __drm_dev_vprintk(dev, KERN_DEBUG, NULL, p->prefix, vaf); + __drm_dev_vprintk(dev, KERN_DEBUG, p->origin, p->prefix, vaf); } EXPORT_SYMBOL(__drm_printfn_dbg); diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 679a2086fbea..5d9dff5149c9 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -175,6 +175,7 @@ struct drm_printer { void (*printfn)(struct drm_printer *p, struct va_format *vaf); void (*puts)(struct drm_printer *p, const char *str); void *arg; + const void *origin; const char *prefix; enum drm_debug_category category; }; @@ -332,6 +333,7 @@ static inline struct drm_printer drm_dbg_printer(struct drm_device *drm, struct drm_printer p = { .printfn = __drm_printfn_dbg, .arg = drm, + .origin = (const void *)_THIS_IP_, /* it's fine as we will be inlined */ .prefix = prefix, .category = category, }; -- cgit v1.2.3 From 1d5f0222944fe723b9bfaaeb27e368363644ccab Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Wed, 5 Jun 2024 16:26:45 -0400 Subject: ftrace: Declare function_trace_op in header to quiet sparse warning Sparse complains that function_trace_op is not static but is not declared in a header file. It is used only in assembly code. But add it to a header so that sparse no longer complains: kernel/trace/ftrace.c:99:19: warning: symbol 'function_trace_op' was not declared. Should it be static? Link: https://lore.kernel.org/linux-trace-kernel/20240605202708.289105647@goodmis.org Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Andrew Morton Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- include/linux/ftrace.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9f61556a9491..4135dc171447 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1131,6 +1131,9 @@ extern void ftrace_graph_init_task(struct task_struct *t); extern void ftrace_graph_exit_task(struct task_struct *t); extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu); +/* Used by assembly, but to quiet sparse warnings */ +extern struct ftrace_ops *function_trace_op; + static inline void pause_graph_tracing(void) { atomic_inc(¤t->tracing_graph_pause); -- cgit v1.2.3 From c76494768761aef7630e7e0db820ba7b375964da Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Thu, 30 May 2024 11:07:19 -0700 Subject: linux/interrupt.h: allow "guard" notation to disable and reenable IRQ Drivers often need to first disable an interrupt, carry out some action, and then reenable the interrupt. Introduce support for the "guard" notation for this so that the following is possible: ... scoped_cond_guard(mutex_intr, return -EINTR, &data->sysfs_mutex) { guard(disable_irq)(&client->irq); error = elan_acquire_baseline(data); if (error) return error; } ... Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/ZljAV6HjkPSEhWSw@google.com Signed-off-by: Dmitry Torokhov --- include/linux/interrupt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 5c9bdd3ffccc..3a36e64119c8 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -235,6 +236,9 @@ extern void enable_percpu_irq(unsigned int irq, unsigned int type); extern bool irq_percpu_is_enabled(unsigned int irq); extern void irq_wake_thread(unsigned int irq, void *dev_id); +DEFINE_LOCK_GUARD_1(disable_irq, int, + disable_irq(*_T->lock), enable_irq(*_T->lock)) + extern void disable_nmi_nosync(unsigned int irq); extern void disable_percpu_nmi(unsigned int irq); extern void enable_nmi(unsigned int irq); -- cgit v1.2.3 From b472b996a43404a912c5cb4f27050022fdbce10c Mon Sep 17 00:00:00 2001 From: Udit Kumar Date: Fri, 31 May 2024 22:27:25 +0530 Subject: dt-bindings: net: dp8386x: Add MIT license along with GPL-2.0 Modify license to include dual licensing as GPL-2.0-only OR MIT license for TI specific phy header files. This allows for Linux kernel files to be used in other Operating System ecosystems such as Zephyr or FreeBSD. While at this, update the GPL-2.0 to be GPL-2.0-only to be in sync with latest SPDX conventions (GPL-2.0 is deprecated). While at this, update the TI copyright year to sync with current year to indicate license change. Cc: Thomas Gleixner Cc: Trent Piepho Cc: Wadim Egorov Cc: Kip Broadhurst Signed-off-by: Udit Kumar Acked-by: Wadim Egorov Acked-by: Rob Herring Signed-off-by: David S. Miller --- include/dt-bindings/net/ti-dp83867.h | 4 ++-- include/dt-bindings/net/ti-dp83869.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/dt-bindings/net/ti-dp83867.h b/include/dt-bindings/net/ti-dp83867.h index 6fc4b445d3a1..b8a4f3ff4a3b 100644 --- a/include/dt-bindings/net/ti-dp83867.h +++ b/include/dt-bindings/net/ti-dp83867.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83867 PHY * * Author: Dan Murphy * - * Copyright: (C) 2015 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83867_H diff --git a/include/dt-bindings/net/ti-dp83869.h b/include/dt-bindings/net/ti-dp83869.h index 218b1a64e975..917114aad7d0 100644 --- a/include/dt-bindings/net/ti-dp83869.h +++ b/include/dt-bindings/net/ti-dp83869.h @@ -1,10 +1,10 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ +/* SPDX-License-Identifier: GPL-2.0-only OR MIT */ /* * Device Tree constants for the Texas Instruments DP83869 PHY * * Author: Dan Murphy * - * Copyright: (C) 2019 Texas Instruments, Inc. + * Copyright (C) 2015-2024 Texas Instruments Incorporated - https://www.ti.com/ */ #ifndef _DT_BINDINGS_TI_DP83869_H -- cgit v1.2.3 From b4100947a8014e1876872546398275968f77e1ad Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 May 2024 16:07:10 -0500 Subject: crypto: ccp - align psp_platform_access_msg Align the whitespace so that future messages will also be better aligned. Acked-by: Tom Lendacky Signed-off-by: Mario Limonciello Signed-off-by: Herbert Xu --- include/linux/psp-platform-access.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index c1dc87fc536b..23893b33e48c 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -6,8 +6,8 @@ #include enum psp_platform_access_msg { - PSP_CMD_NONE = 0x0, - PSP_I2C_REQ_BUS_CMD = 0x64, + PSP_CMD_NONE = 0x0, + PSP_I2C_REQ_BUS_CMD = 0x64, PSP_DYNAMIC_BOOST_GET_NONCE, PSP_DYNAMIC_BOOST_SET_UID, PSP_DYNAMIC_BOOST_GET_PARAMETER, -- cgit v1.2.3 From 82f9327f774c6e040ba63a887a85fa2e290a233a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Tue, 28 May 2024 16:07:11 -0500 Subject: crypto: ccp - Add support for getting security attributes on some older systems Older systems will not populate the security attributes in the capabilities register. The PSP on these systems, however, does have a command to get the security attributes. Use this command during ccp startup to populate the attributes if they're missing. Closes: https://github.com/fwupd/fwupd/issues/5284 Closes: https://github.com/fwupd/fwupd/issues/5675 Closes: https://github.com/fwupd/fwupd/issues/6253 Closes: https://github.com/fwupd/fwupd/issues/7280 Closes: https://github.com/fwupd/fwupd/issues/6323 Closes: https://github.com/fwupd/fwupd/discussions/5433 Signed-off-by: Mario Limonciello Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/hsti.c | 55 +++++++++++++++++++++++++++++++++++++ drivers/crypto/ccp/hsti.h | 2 ++ drivers/crypto/ccp/psp-dev.c | 5 ++++ drivers/crypto/ccp/psp-dev.h | 2 -- drivers/crypto/ccp/sp-dev.h | 1 + drivers/crypto/ccp/sp-pci.c | 5 +++- include/linux/psp-platform-access.h | 1 + 7 files changed, 68 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/crypto/ccp/hsti.c b/drivers/crypto/ccp/hsti.c index 076c1d175b2b..8b99bbd4efe2 100644 --- a/drivers/crypto/ccp/hsti.c +++ b/drivers/crypto/ccp/hsti.c @@ -12,6 +12,13 @@ #include "psp-dev.h" #include "hsti.h" +#define PSP_CAPABILITY_PSP_SECURITY_OFFSET 8 + +struct hsti_request { + struct psp_req_buffer_hdr header; + u32 hsti; +} __packed; + #define security_attribute_show(name) \ static ssize_t name##_show(struct device *d, struct device_attribute *attr, \ char *buf) \ @@ -66,3 +73,51 @@ struct attribute_group psp_security_attr_group = { .attrs = psp_security_attrs, .is_visible = psp_security_is_visible, }; + +static int psp_poulate_hsti(struct psp_device *psp) +{ + struct hsti_request *req; + int ret; + + /* Are the security attributes already reported? */ + if (psp->capability.security_reporting) + return 0; + + /* Allocate command-response buffer */ + req = kzalloc(sizeof(*req), GFP_KERNEL | __GFP_ZERO); + if (!req) + return -ENOMEM; + + req->header.payload_size = sizeof(req); + + ret = psp_send_platform_access_msg(PSP_CMD_HSTI_QUERY, (struct psp_request *)req); + if (ret) + goto out; + + if (req->header.status != 0) { + dev_dbg(psp->dev, "failed to populate HSTI state: %d\n", req->header.status); + ret = -EINVAL; + goto out; + } + + psp->capability.security_reporting = 1; + psp->capability.raw |= req->hsti << PSP_CAPABILITY_PSP_SECURITY_OFFSET; + +out: + kfree(req); + + return ret; +} + +int psp_init_hsti(struct psp_device *psp) +{ + int ret; + + if (PSP_FEATURE(psp, HSTI)) { + ret = psp_poulate_hsti(psp); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/hsti.h b/drivers/crypto/ccp/hsti.h index e5c5ceab9973..6a70f922d2c4 100644 --- a/drivers/crypto/ccp/hsti.h +++ b/drivers/crypto/ccp/hsti.h @@ -12,4 +12,6 @@ extern struct attribute_group psp_security_attr_group; +int psp_init_hsti(struct psp_device *psp); + #endif /* __HSTI_H */ diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index 1a7b991c27f7..0a01ad134609 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -220,6 +220,11 @@ static int psp_init(struct psp_device *psp) return ret; } + /* HSTI uses platform access on some systems. */ + ret = psp_init_hsti(psp); + if (ret) + return ret; + return 0; } diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h index 02a7c94c02df..e43ce87ede76 100644 --- a/drivers/crypto/ccp/psp-dev.h +++ b/drivers/crypto/ccp/psp-dev.h @@ -78,8 +78,6 @@ void psp_clear_sev_irq_handler(struct psp_device *psp); struct psp_device *psp_get_master_device(void); -#define PSP_CAPABILITY_PSP_SECURITY_OFFSET 8 - /** * enum psp_cmd - PSP mailbox commands * @PSP_CMD_TEE_RING_INIT: Initialize TEE ring buffer diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h index c4e125efe6c7..0895de823674 100644 --- a/drivers/crypto/ccp/sp-dev.h +++ b/drivers/crypto/ccp/sp-dev.h @@ -29,6 +29,7 @@ #define CACHE_WB_NO_ALLOC 0xb7 #define PLATFORM_FEATURE_DBC 0x1 +#define PLATFORM_FEATURE_HSTI 0x2 #define PSP_FEATURE(psp, feat) (psp->vdata && psp->vdata->platform_features & PLATFORM_FEATURE_##feat) diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index dd31e791156d..248d98fd8c48 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -397,10 +397,12 @@ static const struct psp_vdata pspv1 = { static const struct psp_vdata pspv2 = { .sev = &sevv2, + .platform_access = &pa_v1, .bootloader_info_reg = 0x109ec, /* C2PMSG_59 */ .feature_reg = 0x109fc, /* C2PMSG_63 */ .inten_reg = 0x10690, /* P2CMSG_INTEN */ .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ + .platform_features = PLATFORM_FEATURE_HSTI, }; static const struct psp_vdata pspv3 = { @@ -413,7 +415,8 @@ static const struct psp_vdata pspv3 = { .feature_reg = 0x109fc, /* C2PMSG_63 */ .inten_reg = 0x10690, /* P2CMSG_INTEN */ .intsts_reg = 0x10694, /* P2CMSG_INTSTS */ - .platform_features = PLATFORM_FEATURE_DBC, + .platform_features = PLATFORM_FEATURE_DBC | + PLATFORM_FEATURE_HSTI, }; static const struct psp_vdata pspv4 = { diff --git a/include/linux/psp-platform-access.h b/include/linux/psp-platform-access.h index 23893b33e48c..1504fb012c05 100644 --- a/include/linux/psp-platform-access.h +++ b/include/linux/psp-platform-access.h @@ -7,6 +7,7 @@ enum psp_platform_access_msg { PSP_CMD_NONE = 0x0, + PSP_CMD_HSTI_QUERY = 0x14, PSP_I2C_REQ_BUS_CMD = 0x64, PSP_DYNAMIC_BOOST_GET_NONCE, PSP_DYNAMIC_BOOST_SET_UID, -- cgit v1.2.3 From 46b3ff73afc815f1feb844e6b57e43cc13051544 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 31 May 2024 18:20:03 +0800 Subject: crypto: sm2 - Remove sm2 algorithm The SM2 algorithm has a single user in the kernel. However, it's never been integrated properly with that user: asymmetric_keys. The crux of the issue is that the way it computes its digest with sm3 does not fit into the architecture of asymmetric_keys. As no solution has been proposed, remove this algorithm. It can be resubmitted when it is integrated properly into the asymmetric_keys subsystem. Signed-off-by: Herbert Xu --- crypto/Kconfig | 18 -- crypto/Makefile | 8 - crypto/asymmetric_keys/pkcs7_parser.c | 4 - crypto/asymmetric_keys/public_key.c | 7 - crypto/asymmetric_keys/x509_cert_parser.c | 16 - crypto/asymmetric_keys/x509_public_key.c | 17 +- crypto/sm2.c | 498 ------------------------------ crypto/sm2signature.asn1 | 4 - crypto/testmgr.c | 6 - crypto/testmgr.h | 59 ---- include/crypto/sm2.h | 28 -- security/integrity/digsig_asymmetric.c | 3 +- 12 files changed, 3 insertions(+), 665 deletions(-) delete mode 100644 crypto/sm2.c delete mode 100644 crypto/sm2signature.asn1 delete mode 100644 include/crypto/sm2.h (limited to 'include') diff --git a/crypto/Kconfig b/crypto/Kconfig index 5688d42a59c2..72e2decb8c6a 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -313,24 +313,6 @@ config CRYPTO_ECRDSA One of the Russian cryptographic standard algorithms (called GOST algorithms). Only signature verification is implemented. -config CRYPTO_SM2 - tristate "SM2 (ShangMi 2)" - select CRYPTO_SM3 - select CRYPTO_AKCIPHER - select CRYPTO_MANAGER - select MPILIB - select ASN1 - help - SM2 (ShangMi 2) public key algorithm - - Published by State Encryption Management Bureau, China, - as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012. - - References: - https://datatracker.ietf.org/doc/draft-shen-sm2-ecdsa/ - http://www.oscca.gov.cn/sca/xxgk/2010-12/17/content_1002386.shtml - http://www.gmbz.org.cn/main/bzlb.html - config CRYPTO_CURVE25519 tristate "Curve25519" select CRYPTO_KPP diff --git a/crypto/Makefile b/crypto/Makefile index edbbaa3ffef5..4c99e5d376f6 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -50,14 +50,6 @@ rsa_generic-y += rsa_helper.o rsa_generic-y += rsa-pkcs1pad.o obj-$(CONFIG_CRYPTO_RSA) += rsa_generic.o -$(obj)/sm2signature.asn1.o: $(obj)/sm2signature.asn1.c $(obj)/sm2signature.asn1.h -$(obj)/sm2.o: $(obj)/sm2signature.asn1.h - -sm2_generic-y += sm2signature.asn1.o -sm2_generic-y += sm2.o - -obj-$(CONFIG_CRYPTO_SM2) += sm2_generic.o - $(obj)/ecdsasignature.asn1.o: $(obj)/ecdsasignature.asn1.c $(obj)/ecdsasignature.asn1.h $(obj)/ecdsa.o: $(obj)/ecdsasignature.asn1.h ecdsa_generic-y += ecdsa.o diff --git a/crypto/asymmetric_keys/pkcs7_parser.c b/crypto/asymmetric_keys/pkcs7_parser.c index 231ad7b3789d..423d13c47545 100644 --- a/crypto/asymmetric_keys/pkcs7_parser.c +++ b/crypto/asymmetric_keys/pkcs7_parser.c @@ -292,10 +292,6 @@ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, ctx->sinfo->sig->pkey_algo = "ecdsa"; ctx->sinfo->sig->encoding = "x962"; break; - case OID_SM2_with_SM3: - ctx->sinfo->sig->pkey_algo = "sm2"; - ctx->sinfo->sig->encoding = "raw"; - break; case OID_gost2012PKey256: case OID_gost2012PKey512: ctx->sinfo->sig->pkey_algo = "ecrdsa"; diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 3474fb34ded9..422940a6706a 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -124,13 +124,6 @@ software_key_determine_akcipher(const struct public_key *pkey, strcmp(hash_algo, "sha3-384") != 0 && strcmp(hash_algo, "sha3-512") != 0) return -EINVAL; - } else if (strcmp(pkey->pkey_algo, "sm2") == 0) { - if (strcmp(encoding, "raw") != 0) - return -EINVAL; - if (!hash_algo) - return -EINVAL; - if (strcmp(hash_algo, "sm3") != 0) - return -EINVAL; } else if (strcmp(pkey->pkey_algo, "ecrdsa") == 0) { if (strcmp(encoding, "raw") != 0) return -EINVAL; diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 25cc4273472f..ee2fdab42334 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -257,10 +257,6 @@ int x509_note_sig_algo(void *context, size_t hdrlen, unsigned char tag, case OID_gost2012Signature512: ctx->cert->sig->hash_algo = "streebog512"; goto ecrdsa; - - case OID_SM2_with_SM3: - ctx->cert->sig->hash_algo = "sm3"; - goto sm2; } rsa_pkcs1: @@ -273,11 +269,6 @@ ecrdsa: ctx->cert->sig->encoding = "raw"; ctx->sig_algo = ctx->last_oid; return 0; -sm2: - ctx->cert->sig->pkey_algo = "sm2"; - ctx->cert->sig->encoding = "raw"; - ctx->sig_algo = ctx->last_oid; - return 0; ecdsa: ctx->cert->sig->pkey_algo = "ecdsa"; ctx->cert->sig->encoding = "x962"; @@ -309,7 +300,6 @@ int x509_note_signature(void *context, size_t hdrlen, if (strcmp(ctx->cert->sig->pkey_algo, "rsa") == 0 || strcmp(ctx->cert->sig->pkey_algo, "ecrdsa") == 0 || - strcmp(ctx->cert->sig->pkey_algo, "sm2") == 0 || strcmp(ctx->cert->sig->pkey_algo, "ecdsa") == 0) { /* Discard the BIT STRING metadata */ if (vlen < 1 || *(const u8 *)value != 0) @@ -514,17 +504,11 @@ int x509_extract_key_data(void *context, size_t hdrlen, case OID_gost2012PKey512: ctx->cert->pub->pkey_algo = "ecrdsa"; break; - case OID_sm2: - ctx->cert->pub->pkey_algo = "sm2"; - break; case OID_id_ecPublicKey: if (parse_OID(ctx->params, ctx->params_size, &oid) != 0) return -EBADMSG; switch (oid) { - case OID_sm2: - ctx->cert->pub->pkey_algo = "sm2"; - break; case OID_id_prime192v1: ctx->cert->pub->pkey_algo = "ecdsa-nist-p192"; break; diff --git a/crypto/asymmetric_keys/x509_public_key.c b/crypto/asymmetric_keys/x509_public_key.c index 00ac7159fba2..8409d7d36cb4 100644 --- a/crypto/asymmetric_keys/x509_public_key.c +++ b/crypto/asymmetric_keys/x509_public_key.c @@ -7,7 +7,6 @@ #define pr_fmt(fmt) "X.509: "fmt #include -#include #include #include #include @@ -64,20 +63,8 @@ int x509_get_sig_params(struct x509_certificate *cert) desc->tfm = tfm; - if (strcmp(cert->pub->pkey_algo, "sm2") == 0) { - ret = strcmp(sig->hash_algo, "sm3") != 0 ? -EINVAL : - crypto_shash_init(desc) ?: - sm2_compute_z_digest(desc, cert->pub->key, - cert->pub->keylen, sig->digest) ?: - crypto_shash_init(desc) ?: - crypto_shash_update(desc, sig->digest, - sig->digest_size) ?: - crypto_shash_finup(desc, cert->tbs, cert->tbs_size, - sig->digest); - } else { - ret = crypto_shash_digest(desc, cert->tbs, cert->tbs_size, - sig->digest); - } + ret = crypto_shash_digest(desc, cert->tbs, cert->tbs_size, + sig->digest); if (ret < 0) goto error_2; diff --git a/crypto/sm2.c b/crypto/sm2.c deleted file mode 100644 index 5ab120d74c59..000000000000 --- a/crypto/sm2.c +++ /dev/null @@ -1,498 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * SM2 asymmetric public-key algorithm - * as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and - * described at https://tools.ietf.org/html/draft-shen-sm2-ecdsa-02 - * - * Copyright (c) 2020, Alibaba Group. - * Authors: Tianjia Zhang - */ - -#include -#include -#include -#include -#include -#include -#include -#include "sm2signature.asn1.h" - -/* The default user id as specified in GM/T 0009-2012 */ -#define SM2_DEFAULT_USERID "1234567812345678" -#define SM2_DEFAULT_USERID_LEN 16 - -#define MPI_NBYTES(m) ((mpi_get_nbits(m) + 7) / 8) - -struct ecc_domain_parms { - const char *desc; /* Description of the curve. */ - unsigned int nbits; /* Number of bits. */ - unsigned int fips:1; /* True if this is a FIPS140-2 approved curve */ - - /* The model describing this curve. This is mainly used to select - * the group equation. - */ - enum gcry_mpi_ec_models model; - - /* The actual ECC dialect used. This is used for curve specific - * optimizations and to select encodings etc. - */ - enum ecc_dialects dialect; - - const char *p; /* The prime defining the field. */ - const char *a, *b; /* The coefficients. For Twisted Edwards - * Curves b is used for d. For Montgomery - * Curves (a,b) has ((A-2)/4,B^-1). - */ - const char *n; /* The order of the base point. */ - const char *g_x, *g_y; /* Base point. */ - unsigned int h; /* Cofactor. */ -}; - -static const struct ecc_domain_parms sm2_ecp = { - .desc = "sm2p256v1", - .nbits = 256, - .fips = 0, - .model = MPI_EC_WEIERSTRASS, - .dialect = ECC_DIALECT_STANDARD, - .p = "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff", - .a = "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc", - .b = "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93", - .n = "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123", - .g_x = "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7", - .g_y = "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0", - .h = 1 -}; - -static int __sm2_set_pub_key(struct mpi_ec_ctx *ec, - const void *key, unsigned int keylen); - -static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec) -{ - const struct ecc_domain_parms *ecp = &sm2_ecp; - MPI p, a, b; - MPI x, y; - int rc = -EINVAL; - - p = mpi_scanval(ecp->p); - a = mpi_scanval(ecp->a); - b = mpi_scanval(ecp->b); - if (!p || !a || !b) - goto free_p; - - x = mpi_scanval(ecp->g_x); - y = mpi_scanval(ecp->g_y); - if (!x || !y) - goto free; - - rc = -ENOMEM; - - ec->Q = mpi_point_new(0); - if (!ec->Q) - goto free; - - /* mpi_ec_setup_elliptic_curve */ - ec->G = mpi_point_new(0); - if (!ec->G) { - mpi_point_release(ec->Q); - goto free; - } - - mpi_set(ec->G->x, x); - mpi_set(ec->G->y, y); - mpi_set_ui(ec->G->z, 1); - - rc = -EINVAL; - ec->n = mpi_scanval(ecp->n); - if (!ec->n) { - mpi_point_release(ec->Q); - mpi_point_release(ec->G); - goto free; - } - - ec->h = ecp->h; - ec->name = ecp->desc; - mpi_ec_init(ec, ecp->model, ecp->dialect, 0, p, a, b); - - rc = 0; - -free: - mpi_free(x); - mpi_free(y); -free_p: - mpi_free(p); - mpi_free(a); - mpi_free(b); - - return rc; -} - -static void sm2_ec_ctx_deinit(struct mpi_ec_ctx *ec) -{ - mpi_ec_deinit(ec); - - memset(ec, 0, sizeof(*ec)); -} - -/* RESULT must have been initialized and is set on success to the - * point given by VALUE. - */ -static int sm2_ecc_os2ec(MPI_POINT result, MPI value) -{ - int rc; - size_t n; - unsigned char *buf; - MPI x, y; - - n = MPI_NBYTES(value); - buf = kmalloc(n, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - rc = mpi_print(GCRYMPI_FMT_USG, buf, n, &n, value); - if (rc) - goto err_freebuf; - - rc = -EINVAL; - if (n < 1 || ((n - 1) % 2)) - goto err_freebuf; - /* No support for point compression */ - if (*buf != 0x4) - goto err_freebuf; - - rc = -ENOMEM; - n = (n - 1) / 2; - x = mpi_read_raw_data(buf + 1, n); - if (!x) - goto err_freebuf; - y = mpi_read_raw_data(buf + 1 + n, n); - if (!y) - goto err_freex; - - mpi_normalize(x); - mpi_normalize(y); - mpi_set(result->x, x); - mpi_set(result->y, y); - mpi_set_ui(result->z, 1); - - rc = 0; - - mpi_free(y); -err_freex: - mpi_free(x); -err_freebuf: - kfree(buf); - return rc; -} - -struct sm2_signature_ctx { - MPI sig_r; - MPI sig_s; -}; - -int sm2_get_signature_r(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) -{ - struct sm2_signature_ctx *sig = context; - - if (!value || !vlen) - return -EINVAL; - - sig->sig_r = mpi_read_raw_data(value, vlen); - if (!sig->sig_r) - return -ENOMEM; - - return 0; -} - -int sm2_get_signature_s(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) -{ - struct sm2_signature_ctx *sig = context; - - if (!value || !vlen) - return -EINVAL; - - sig->sig_s = mpi_read_raw_data(value, vlen); - if (!sig->sig_s) - return -ENOMEM; - - return 0; -} - -static int sm2_z_digest_update(struct shash_desc *desc, - MPI m, unsigned int pbytes) -{ - static const unsigned char zero[32]; - unsigned char *in; - unsigned int inlen; - int err; - - in = mpi_get_buffer(m, &inlen, NULL); - if (!in) - return -EINVAL; - - if (inlen < pbytes) { - /* padding with zero */ - err = crypto_shash_update(desc, zero, pbytes - inlen) ?: - crypto_shash_update(desc, in, inlen); - } else if (inlen > pbytes) { - /* skip the starting zero */ - err = crypto_shash_update(desc, in + inlen - pbytes, pbytes); - } else { - err = crypto_shash_update(desc, in, inlen); - } - - kfree(in); - return err; -} - -static int sm2_z_digest_update_point(struct shash_desc *desc, - MPI_POINT point, struct mpi_ec_ctx *ec, - unsigned int pbytes) -{ - MPI x, y; - int ret = -EINVAL; - - x = mpi_new(0); - y = mpi_new(0); - - ret = mpi_ec_get_affine(x, y, point, ec) ? -EINVAL : - sm2_z_digest_update(desc, x, pbytes) ?: - sm2_z_digest_update(desc, y, pbytes); - - mpi_free(x); - mpi_free(y); - return ret; -} - -int sm2_compute_z_digest(struct shash_desc *desc, - const void *key, unsigned int keylen, void *dgst) -{ - struct mpi_ec_ctx *ec; - unsigned int bits_len; - unsigned int pbytes; - u8 entl[2]; - int err; - - ec = kmalloc(sizeof(*ec), GFP_KERNEL); - if (!ec) - return -ENOMEM; - - err = sm2_ec_ctx_init(ec); - if (err) - goto out_free_ec; - - err = __sm2_set_pub_key(ec, key, keylen); - if (err) - goto out_deinit_ec; - - bits_len = SM2_DEFAULT_USERID_LEN * 8; - entl[0] = bits_len >> 8; - entl[1] = bits_len & 0xff; - - pbytes = MPI_NBYTES(ec->p); - - /* ZA = H256(ENTLA | IDA | a | b | xG | yG | xA | yA) */ - err = crypto_shash_init(desc); - if (err) - goto out_deinit_ec; - - err = crypto_shash_update(desc, entl, 2); - if (err) - goto out_deinit_ec; - - err = crypto_shash_update(desc, SM2_DEFAULT_USERID, - SM2_DEFAULT_USERID_LEN); - if (err) - goto out_deinit_ec; - - err = sm2_z_digest_update(desc, ec->a, pbytes) ?: - sm2_z_digest_update(desc, ec->b, pbytes) ?: - sm2_z_digest_update_point(desc, ec->G, ec, pbytes) ?: - sm2_z_digest_update_point(desc, ec->Q, ec, pbytes); - if (err) - goto out_deinit_ec; - - err = crypto_shash_final(desc, dgst); - -out_deinit_ec: - sm2_ec_ctx_deinit(ec); -out_free_ec: - kfree(ec); - return err; -} -EXPORT_SYMBOL_GPL(sm2_compute_z_digest); - -static int _sm2_verify(struct mpi_ec_ctx *ec, MPI hash, MPI sig_r, MPI sig_s) -{ - int rc = -EINVAL; - struct gcry_mpi_point sG, tP; - MPI t = NULL; - MPI x1 = NULL, y1 = NULL; - - mpi_point_init(&sG); - mpi_point_init(&tP); - x1 = mpi_new(0); - y1 = mpi_new(0); - t = mpi_new(0); - - /* r, s in [1, n-1] */ - if (mpi_cmp_ui(sig_r, 1) < 0 || mpi_cmp(sig_r, ec->n) > 0 || - mpi_cmp_ui(sig_s, 1) < 0 || mpi_cmp(sig_s, ec->n) > 0) { - goto leave; - } - - /* t = (r + s) % n, t == 0 */ - mpi_addm(t, sig_r, sig_s, ec->n); - if (mpi_cmp_ui(t, 0) == 0) - goto leave; - - /* sG + tP = (x1, y1) */ - rc = -EBADMSG; - mpi_ec_mul_point(&sG, sig_s, ec->G, ec); - mpi_ec_mul_point(&tP, t, ec->Q, ec); - mpi_ec_add_points(&sG, &sG, &tP, ec); - if (mpi_ec_get_affine(x1, y1, &sG, ec)) - goto leave; - - /* R = (e + x1) % n */ - mpi_addm(t, hash, x1, ec->n); - - /* check R == r */ - rc = -EKEYREJECTED; - if (mpi_cmp(t, sig_r)) - goto leave; - - rc = 0; - -leave: - mpi_point_free_parts(&sG); - mpi_point_free_parts(&tP); - mpi_free(x1); - mpi_free(y1); - mpi_free(t); - - return rc; -} - -static int sm2_verify(struct akcipher_request *req) -{ - struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm); - unsigned char *buffer; - struct sm2_signature_ctx sig; - MPI hash; - int ret; - - if (unlikely(!ec->Q)) - return -EINVAL; - - buffer = kmalloc(req->src_len + req->dst_len, GFP_KERNEL); - if (!buffer) - return -ENOMEM; - - sg_pcopy_to_buffer(req->src, - sg_nents_for_len(req->src, req->src_len + req->dst_len), - buffer, req->src_len + req->dst_len, 0); - - sig.sig_r = NULL; - sig.sig_s = NULL; - ret = asn1_ber_decoder(&sm2signature_decoder, &sig, - buffer, req->src_len); - if (ret) - goto error; - - ret = -ENOMEM; - hash = mpi_read_raw_data(buffer + req->src_len, req->dst_len); - if (!hash) - goto error; - - ret = _sm2_verify(ec, hash, sig.sig_r, sig.sig_s); - - mpi_free(hash); -error: - mpi_free(sig.sig_r); - mpi_free(sig.sig_s); - kfree(buffer); - return ret; -} - -static int sm2_set_pub_key(struct crypto_akcipher *tfm, - const void *key, unsigned int keylen) -{ - struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm); - - return __sm2_set_pub_key(ec, key, keylen); - -} - -static int __sm2_set_pub_key(struct mpi_ec_ctx *ec, - const void *key, unsigned int keylen) -{ - MPI a; - int rc; - - /* include the uncompressed flag '0x04' */ - a = mpi_read_raw_data(key, keylen); - if (!a) - return -ENOMEM; - - mpi_normalize(a); - rc = sm2_ecc_os2ec(ec->Q, a); - mpi_free(a); - - return rc; -} - -static unsigned int sm2_max_size(struct crypto_akcipher *tfm) -{ - /* Unlimited max size */ - return PAGE_SIZE; -} - -static int sm2_init_tfm(struct crypto_akcipher *tfm) -{ - struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm); - - return sm2_ec_ctx_init(ec); -} - -static void sm2_exit_tfm(struct crypto_akcipher *tfm) -{ - struct mpi_ec_ctx *ec = akcipher_tfm_ctx(tfm); - - sm2_ec_ctx_deinit(ec); -} - -static struct akcipher_alg sm2 = { - .verify = sm2_verify, - .set_pub_key = sm2_set_pub_key, - .max_size = sm2_max_size, - .init = sm2_init_tfm, - .exit = sm2_exit_tfm, - .base = { - .cra_name = "sm2", - .cra_driver_name = "sm2-generic", - .cra_priority = 100, - .cra_module = THIS_MODULE, - .cra_ctxsize = sizeof(struct mpi_ec_ctx), - }, -}; - -static int __init sm2_init(void) -{ - return crypto_register_akcipher(&sm2); -} - -static void __exit sm2_exit(void) -{ - crypto_unregister_akcipher(&sm2); -} - -subsys_initcall(sm2_init); -module_exit(sm2_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Tianjia Zhang "); -MODULE_DESCRIPTION("SM2 generic algorithm"); -MODULE_ALIAS_CRYPTO("sm2-generic"); diff --git a/crypto/sm2signature.asn1 b/crypto/sm2signature.asn1 deleted file mode 100644 index ab8c0b754d21..000000000000 --- a/crypto/sm2signature.asn1 +++ /dev/null @@ -1,4 +0,0 @@ -Sm2Signature ::= SEQUENCE { - sig_r INTEGER ({ sm2_get_signature_r }), - sig_s INTEGER ({ sm2_get_signature_s }) -} diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 1b30e3565e80..a780b615f8c6 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -5608,12 +5608,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(sha512_tv_template) } - }, { - .alg = "sm2", - .test = alg_test_akcipher, - .suite = { - .akcipher = __VECS(sm2_tv_template) - } }, { .alg = "sm3", .test = alg_test_hash, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 5350cfd9d325..9b38501a17b2 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -5774,65 +5774,6 @@ static const struct hash_testvec hmac_streebog512_tv_template[] = { }, }; -/* - * SM2 test vectors. - */ -static const struct akcipher_testvec sm2_tv_template[] = { - { /* Generated from openssl */ - .key = - "\x04" - "\x8e\xa0\x33\x69\x91\x7e\x3d\xec\xad\x8e\xf0\x45\x5e\x13\x3e\x68" - "\x5b\x8c\xab\x5c\xc6\xc8\x50\xdf\x91\x00\xe0\x24\x73\x4d\x31\xf2" - "\x2e\xc0\xd5\x6b\xee\xda\x98\x93\xec\xd8\x36\xaa\xb9\xcf\x63\x82" - "\xef\xa7\x1a\x03\xed\x16\xba\x74\xb8\x8b\xf9\xe5\x70\x39\xa4\x70", - .key_len = 65, - .param_len = 0, - .c = - "\x30\x45" - "\x02\x20" - "\x70\xab\xb6\x7d\xd6\x54\x80\x64\x42\x7e\x2d\x05\x08\x36\xc9\x96" - "\x25\xc2\xbb\xff\x08\xe5\x43\x15\x5e\xf3\x06\xd9\x2b\x2f\x0a\x9f" - "\x02\x21" - "\x00" - "\xbf\x21\x5f\x7e\x5d\x3f\x1a\x4d\x8f\x84\xc2\xe9\xa6\x4c\xa4\x18" - "\xb2\xb8\x46\xf4\x32\x96\xfa\x57\xc6\x29\xd4\x89\xae\xcc\xda\xdb", - .c_size = 71, - .algo = OID_SM2_with_SM3, - .m = - "\x47\xa7\xbf\xd3\xda\xc4\x79\xee\xda\x8b\x4f\xe8\x40\x94\xd4\x32" - "\x8f\xf1\xcd\x68\x4d\xbd\x9b\x1d\xe0\xd8\x9a\x5d\xad\x85\x47\x5c", - .m_size = 32, - .public_key_vec = true, - .siggen_sigver_test = true, - }, - { /* From libgcrypt */ - .key = - "\x04" - "\x87\x59\x38\x9a\x34\xaa\xad\x07\xec\xf4\xe0\xc8\xc2\x65\x0a\x44" - "\x59\xc8\xd9\x26\xee\x23\x78\x32\x4e\x02\x61\xc5\x25\x38\xcb\x47" - "\x75\x28\x10\x6b\x1e\x0b\x7c\x8d\xd5\xff\x29\xa9\xc8\x6a\x89\x06" - "\x56\x56\xeb\x33\x15\x4b\xc0\x55\x60\x91\xef\x8a\xc9\xd1\x7d\x78", - .key_len = 65, - .param_len = 0, - .c = - "\x30\x44" - "\x02\x20" - "\xd9\xec\xef\xe8\x5f\xee\x3c\x59\x57\x8e\x5b\xab\xb3\x02\xe1\x42" - "\x4b\x67\x2c\x0b\x26\xb6\x51\x2c\x3e\xfc\xc6\x49\xec\xfe\x89\xe5" - "\x02\x20" - "\x43\x45\xd0\xa5\xff\xe5\x13\x27\x26\xd0\xec\x37\xad\x24\x1e\x9a" - "\x71\x9a\xa4\x89\xb0\x7e\x0f\xc4\xbb\x2d\x50\xd0\xe5\x7f\x7a\x68", - .c_size = 70, - .algo = OID_SM2_with_SM3, - .m = - "\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa\xbb\xcc\xdd\xee\xff\x00" - "\x12\x34\x56\x78\x9a\xbc\xde\xf0\x12\x34\x56\x78\x9a\xbc\xde\xf0", - .m_size = 32, - .public_key_vec = true, - .siggen_sigver_test = true, - }, -}; - /* Example vectors below taken from * http://www.oscca.gov.cn/UpFile/20101222141857786.pdf * diff --git a/include/crypto/sm2.h b/include/crypto/sm2.h deleted file mode 100644 index 04a92c1013c8..000000000000 --- a/include/crypto/sm2.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * sm2.h - SM2 asymmetric public-key algorithm - * as specified by OSCCA GM/T 0003.1-2012 -- 0003.5-2012 SM2 and - * described at https://tools.ietf.org/html/draft-shen-sm2-ecdsa-02 - * - * Copyright (c) 2020, Alibaba Group. - * Written by Tianjia Zhang - */ - -#ifndef _CRYPTO_SM2_H -#define _CRYPTO_SM2_H - -struct shash_desc; - -#if IS_REACHABLE(CONFIG_CRYPTO_SM2) -int sm2_compute_z_digest(struct shash_desc *desc, - const void *key, unsigned int keylen, void *dgst); -#else -static inline int sm2_compute_z_digest(struct shash_desc *desc, - const void *key, unsigned int keylen, - void *dgst) -{ - return -ENOTSUPP; -} -#endif - -#endif /* _CRYPTO_SM2_H */ diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index de603cf42ac7..457c0a396caf 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -114,8 +114,7 @@ int asymmetric_verify(struct key *keyring, const char *sig, } else if (!strncmp(pk->pkey_algo, "ecdsa-", 6)) { /* edcsa-nist-p192 etc. */ pks.encoding = "x962"; - } else if (!strcmp(pk->pkey_algo, "ecrdsa") || - !strcmp(pk->pkey_algo, "sm2")) { + } else if (!strcmp(pk->pkey_algo, "ecrdsa")) { pks.encoding = "raw"; } else { ret = -ENOPKG; -- cgit v1.2.3 From df3fb27a74a4eeb1436129024a7e957c2e83a95e Mon Sep 17 00:00:00 2001 From: Noralf Trønnes Date: Tue, 4 Jun 2024 15:20:30 +0200 Subject: drm/mipi-dbi: Make bits per word configurable for pixel transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MIPI DCS write/set commands have 8 bit parameters except for the write_memory commands where it depends on the pixel format. drm_mipi_dbi does currently only support RGB565 which is 16-bit and it has to make sure that the pixels enters the SPI bus in big endian format since the MIPI DBI spec doesn't have support for little endian. drm_mipi_dbi is optimized for DBI interface option 3 which means that the 16-bit bytes are swapped by the upper layer if the SPI bus does not support 16 bits per word, signified by the swap_bytes member. In order to support both 16-bit and 24-bit pixel transfers we need a way to tell the DBI command layer the format of the buffer. Add a write_memory_bpw member that the upper layer can use to tell how many bits per word to use for the SPI transfer. v4: - Expand the commit message (Dmitry) Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240604-panel-mipi-dbi-rgb666-v4-3-d7c2bcb9b78d@tronnes.org Signed-off-by: Noralf Trønnes --- drivers/gpu/drm/drm_mipi_dbi.c | 14 ++++++++++---- include/drm/drm_mipi_dbi.h | 5 +++++ 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index fa8aba6dc81c..77f8a828d6e0 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -1079,7 +1079,7 @@ static int mipi_dbi_typec1_command_read(struct mipi_dbi *dbi, u8 *cmd, static int mipi_dbi_typec1_command(struct mipi_dbi *dbi, u8 *cmd, u8 *parameters, size_t num) { - unsigned int bpw = (*cmd == MIPI_DCS_WRITE_MEMORY_START) ? 16 : 8; + unsigned int bpw = 8; int ret; if (mipi_dbi_command_is_read(dbi, *cmd)) @@ -1091,6 +1091,9 @@ static int mipi_dbi_typec1_command(struct mipi_dbi *dbi, u8 *cmd, if (ret || !num) return ret; + if (*cmd == MIPI_DCS_WRITE_MEMORY_START) + bpw = dbi->write_memory_bpw; + return mipi_dbi_spi1_transfer(dbi, 1, parameters, num, bpw); } @@ -1184,8 +1187,8 @@ static int mipi_dbi_typec3_command(struct mipi_dbi *dbi, u8 *cmd, if (ret || !num) return ret; - if (*cmd == MIPI_DCS_WRITE_MEMORY_START && !dbi->swap_bytes) - bpw = 16; + if (*cmd == MIPI_DCS_WRITE_MEMORY_START) + bpw = dbi->write_memory_bpw; spi_bus_lock(spi->controller); gpiod_set_value_cansleep(dbi->dc, 1); @@ -1256,12 +1259,15 @@ int mipi_dbi_spi_init(struct spi_device *spi, struct mipi_dbi *dbi, dbi->spi = spi; dbi->read_commands = mipi_dbi_dcs_read_commands; + dbi->write_memory_bpw = 16; if (dc) { dbi->command = mipi_dbi_typec3_command; dbi->dc = dc; - if (!spi_is_bpw_supported(spi, 16)) + if (!spi_is_bpw_supported(spi, 16)) { + dbi->write_memory_bpw = 8; dbi->swap_bytes = true; + } } else { dbi->command = mipi_dbi_typec1_command; dbi->tx_buf9_len = SZ_16K; diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h index e8e0f8d39f3a..b36596efdcc3 100644 --- a/include/drm/drm_mipi_dbi.h +++ b/include/drm/drm_mipi_dbi.h @@ -56,6 +56,11 @@ struct mipi_dbi { */ struct spi_device *spi; + /** + * @write_memory_bpw: Bits per word used on a MIPI_DCS_WRITE_MEMORY_START transfer + */ + unsigned int write_memory_bpw; + /** * @dc: Optional D/C gpio. */ -- cgit v1.2.3 From 4aebb79021f3e6c2b6fbb92a7d9c5d1e6ad0324a Mon Sep 17 00:00:00 2001 From: Noralf Trønnes Date: Tue, 4 Jun 2024 15:20:31 +0200 Subject: drm/mipi-dbi: Add support for DRM_FORMAT_RGB888 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DRM_FORMAT_RGB888 is 24 bits per pixel and it would be natural to send it on the SPI bus using a 24 bits per word transfer. The problem with this is that not all SPI controllers support 24 bpw. Since DRM_FORMAT_RGB888 is stored in memory as little endian and the SPI bus is big endian we use 8 bpw to always get the same pixel format on the bus: b8g8r8. The MIPI DCS specification lists the standard commands that can be sent over the MIPI DBI interface. The set_address_mode (36h) command has one bit in the parameter that controls RGB/BGR order. This means that the controller can be configured to receive the pixel as BGR. RGB888 is rarely supported on these controllers but RGB666 is very common. All datasheets I have seen do at least support the pixel format option where each color is sent as one byte and the 6 MSB's are used. All this put together means that we can send each pixel as b8g8r8 and an RGB666 capable controller sees this as b6x2g6x2r6x2. v4: - s/emulation_format/pixel_format/ (Dmitry) Reviewed-by: Dmitry Baryshkov Link: https://patchwork.freedesktop.org/patch/msgid/20240604-panel-mipi-dbi-rgb666-v4-4-d7c2bcb9b78d@tronnes.org Signed-off-by: Noralf Trønnes --- drivers/gpu/drm/drm_mipi_dbi.c | 29 +++++++++++++++++++++++++---- include/drm/drm_mipi_dbi.h | 5 +++++ 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 77f8a828d6e0..1661190c29a2 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -206,6 +206,7 @@ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer * struct drm_rect *clip, bool swap, struct drm_format_conv_state *fmtcnv_state) { + struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); struct drm_gem_object *gem = drm_gem_fb_get_obj(fb, 0); struct iosys_map dst_map = IOSYS_MAP_INIT_VADDR(dst); int ret; @@ -222,8 +223,18 @@ int mipi_dbi_buf_copy(void *dst, struct iosys_map *src, struct drm_framebuffer * else drm_fb_memcpy(&dst_map, NULL, src, fb, clip); break; + case DRM_FORMAT_RGB888: + drm_fb_memcpy(&dst_map, NULL, src, fb, clip); + break; case DRM_FORMAT_XRGB8888: - drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, fmtcnv_state, swap); + switch (dbidev->pixel_format) { + case DRM_FORMAT_RGB565: + drm_fb_xrgb8888_to_rgb565(&dst_map, NULL, src, fb, clip, fmtcnv_state, swap); + break; + case DRM_FORMAT_RGB888: + drm_fb_xrgb8888_to_rgb888(&dst_map, NULL, src, fb, clip, fmtcnv_state); + break; + } break; default: drm_err_once(fb->dev, "Format is not supported: %p4cc\n", @@ -260,9 +271,11 @@ static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, struct mipi_dbi_dev *dbidev = drm_to_mipi_dbi_dev(fb->dev); unsigned int height = rect->y2 - rect->y1; unsigned int width = rect->x2 - rect->x1; + const struct drm_format_info *dst_format; struct mipi_dbi *dbi = &dbidev->dbi; bool swap = dbi->swap_bytes; int ret = 0; + size_t len; bool full; void *tr; @@ -283,8 +296,13 @@ static void mipi_dbi_fb_dirty(struct iosys_map *src, struct drm_framebuffer *fb, mipi_dbi_set_window_address(dbidev, rect->x1, rect->x2 - 1, rect->y1, rect->y2 - 1); - ret = mipi_dbi_command_buf(dbi, MIPI_DCS_WRITE_MEMORY_START, tr, - width * height * 2); + if (fb->format->format == DRM_FORMAT_XRGB8888) + dst_format = drm_format_info(dbidev->pixel_format); + else + dst_format = fb->format; + len = drm_format_info_min_pitch(dst_format, 0, width) * height; + + ret = mipi_dbi_command_buf(dbi, MIPI_DCS_WRITE_MEMORY_START, tr, len); err_msg: if (ret) drm_err_once(fb->dev, "Failed to update display %d\n", ret); @@ -572,7 +590,7 @@ static const uint32_t mipi_dbi_formats[] = { * has one fixed &drm_display_mode which is rotated according to @rotation. * This mode is used to set the mode config min/max width/height properties. * - * Use mipi_dbi_dev_init() if you don't need custom formats. + * Use mipi_dbi_dev_init() if you want native RGB565 and emulated XRGB8888 format. * * Note: * Some of the helper functions expects RGB565 to be the default format and the @@ -631,6 +649,9 @@ int mipi_dbi_dev_init_with_formats(struct mipi_dbi_dev *dbidev, drm->mode_config.min_height = dbidev->mode.vdisplay; drm->mode_config.max_height = dbidev->mode.vdisplay; dbidev->rotation = rotation; + dbidev->pixel_format = formats[0]; + if (formats[0] == DRM_FORMAT_RGB888) + dbidev->dbi.write_memory_bpw = 8; DRM_DEBUG_KMS("rotation = %u\n", rotation); diff --git a/include/drm/drm_mipi_dbi.h b/include/drm/drm_mipi_dbi.h index b36596efdcc3..f45f9612c0bc 100644 --- a/include/drm/drm_mipi_dbi.h +++ b/include/drm/drm_mipi_dbi.h @@ -101,6 +101,11 @@ struct mipi_dbi_dev { */ struct drm_display_mode mode; + /** + * @pixel_format: Native pixel format (DRM_FORMAT\_\*) + */ + u32 pixel_format; + /** * @tx_buf: Buffer used for transfer (copy clip rect area) */ -- cgit v1.2.3 From f05eda16037f9363297561bd28f318a6d7833d35 Mon Sep 17 00:00:00 2001 From: Yan Zhao Date: Fri, 8 Mar 2024 17:09:27 -0800 Subject: srcu: Add an API for a memory barrier after SRCU read lock To avoid redundant memory barriers, add smp_mb__after_srcu_read_lock() to pair with smp_mb__after_srcu_read_unlock() for use in paths that need to emit a memory barrier, but already do srcu_read_lock(), which includes a full memory barrier. Provide an API, e.g. as opposed to having callers document the behavior via a comment, as the full memory barrier provided by srcu_read_lock() is an implementation detail that shouldn't bleed into random subsystems. KVM will use smp_mb__after_srcu_read_lock() in it's VM-Exit path to ensure a memory barrier is emitted, which is necessary to ensure correctness of mixed memory types on CPUs that support self-snoop. Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Kevin Tian Signed-off-by: Yan Zhao [sean: massage changelog] Tested-by: Xiangfei Ma Tested-by: Yongwei Ma Reviewed-by: Paul E. McKenney --- include/linux/srcu.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 236610e4a8fa..1cb4527076de 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -343,6 +343,20 @@ static inline void smp_mb__after_srcu_read_unlock(void) /* __srcu_read_unlock has smp_mb() internally so nothing to do here. */ } +/** + * smp_mb__after_srcu_read_lock - ensure full ordering after srcu_read_lock + * + * Converts the preceding srcu_read_lock into a two-way memory barrier. + * + * Call this after srcu_read_lock, to guarantee that all memory operations + * that occur after smp_mb__after_srcu_read_lock will appear to happen after + * the preceding srcu_read_lock. + */ +static inline void smp_mb__after_srcu_read_lock(void) +{ + /* __srcu_read_lock has smp_mb() internally so nothing to do here. */ +} + DEFINE_LOCK_GUARD_1(srcu, struct srcu_struct, _T->idx = srcu_read_lock(_T->lock), srcu_read_unlock(_T->lock, _T->idx), -- cgit v1.2.3 From 6a79a4e187bdd17959ff3e811d5de65c066f89e6 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 6 Jun 2024 10:33:49 +0300 Subject: libfs: Introduce case-insensitive string comparison helper generic_ci_match can be used by case-insensitive filesystems to compare strings under lookup with dirents in a case-insensitive way. This function is currently reimplemented by each filesystem supporting casefolding, so this reduces code duplication in filesystem-specific code. [eugen.hristev@collabora.com: rework to first test the exact match, cleanup and add error message] Signed-off-by: Gabriel Krisman Bertazi Signed-off-by: Eugen Hristev Link: https://lore.kernel.org/r/20240606073353.47130-4-eugen.hristev@collabora.com Reviewed-by: Gabriel Krisman Bertazi Signed-off-by: Christian Brauner --- fs/libfs.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 4 +++ 2 files changed, 78 insertions(+) (limited to 'include') diff --git a/fs/libfs.c b/fs/libfs.c index b635ee5adbcc..8aa34870449f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1854,6 +1854,80 @@ static const struct dentry_operations generic_ci_dentry_ops = { .d_revalidate = fscrypt_d_revalidate, #endif }; + +/** + * generic_ci_match() - Match a name (case-insensitively) with a dirent. + * This is a filesystem helper for comparison with directory entries. + * generic_ci_d_compare should be used in VFS' ->d_compare instead. + * + * @parent: Inode of the parent of the dirent under comparison + * @name: name under lookup. + * @folded_name: Optional pre-folded name under lookup + * @de_name: Dirent name. + * @de_name_len: dirent name length. + * + * Test whether a case-insensitive directory entry matches the filename + * being searched. If @folded_name is provided, it is used instead of + * recalculating the casefold of @name. + * + * Return: > 0 if the directory entry matches, 0 if it doesn't match, or + * < 0 on error. + */ +int generic_ci_match(const struct inode *parent, + const struct qstr *name, + const struct qstr *folded_name, + const u8 *de_name, u32 de_name_len) +{ + const struct super_block *sb = parent->i_sb; + const struct unicode_map *um = sb->s_encoding; + struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len); + struct qstr dirent = QSTR_INIT(de_name, de_name_len); + int res = 0; + + if (IS_ENCRYPTED(parent)) { + const struct fscrypt_str encrypted_name = + FSTR_INIT((u8 *) de_name, de_name_len); + + if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent))) + return -EINVAL; + + decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL); + if (!decrypted_name.name) + return -ENOMEM; + res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name, + &decrypted_name); + if (res < 0) { + kfree(decrypted_name.name); + return res; + } + dirent.name = decrypted_name.name; + dirent.len = decrypted_name.len; + } + + /* + * Attempt a case-sensitive match first. It is cheaper and + * should cover most lookups, including all the sane + * applications that expect a case-sensitive filesystem. + */ + + if (dirent.len == name->len && + !memcmp(name->name, dirent.name, dirent.len)) + goto out; + + if (folded_name->name) + res = utf8_strncasecmp_folded(um, folded_name, &dirent); + else + res = utf8_strncasecmp(um, name, &dirent); + +out: + kfree(decrypted_name.name); + if (res < 0 && sb_has_strict_encoding(sb)) { + pr_err_ratelimited("Directory contains filename that is invalid UTF-8"); + return 0; + } + return !res; +} +EXPORT_SYMBOL(generic_ci_match); #endif #ifdef CONFIG_FS_ENCRYPTION diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..2b1eeca426a0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3351,6 +3351,10 @@ extern int generic_file_fsync(struct file *, loff_t, loff_t, int); extern int generic_check_addressable(unsigned, u64); extern void generic_set_sb_d_ops(struct super_block *sb); +extern int generic_ci_match(const struct inode *parent, + const struct qstr *name, + const struct qstr *folded_name, + const u8 *de_name, u32 de_name_len); static inline bool sb_has_encoding(const struct super_block *sb) { -- cgit v1.2.3 From 231035f18d6b80e5c28732a20872398116a54ecd Mon Sep 17 00:00:00 2001 From: Wenchao Hao Date: Thu, 6 Jun 2024 16:52:15 +0800 Subject: workqueue: Increase worker desc's length to 32 Commit 31c89007285d ("workqueue.c: Increase workqueue name length") increased WQ_NAME_LEN from 24 to 32, but forget to increase WORKER_DESC_LEN, which would cause truncation when setting kworker's desc from workqueue_struct's name, process_one_work() for example. Fixes: 31c89007285d ("workqueue.c: Increase workqueue name length") Signed-off-by: Wenchao Hao CC: Audra Mitchell Signed-off-by: Tejun Heo --- include/linux/workqueue.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index fb3993894536..d9968bfc8eac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -95,7 +95,7 @@ enum wq_misc_consts { WORK_BUSY_RUNNING = 1 << 1, /* maximum string length for set_worker_desc() */ - WORKER_DESC_LEN = 24, + WORKER_DESC_LEN = 32, }; /* Convenience constants - of type 'unsigned long', not 'enum'! */ -- cgit v1.2.3 From 11c63e57404e538c5df91f732e5d505860edb660 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Thu, 16 May 2024 12:25:31 +0200 Subject: firmware: add nowarn variant of request_firmware_nowait() Device drivers with optional firmware may still want to use the asynchronous firmware loading interface. To avoid printing a warning into the kernel log when the optional firmware is absent, add a nowarn variant of this interface. Signed-off-by: Lucas Stach Reviewed-by: Greg Kroah-Hartman Reviewed-by: Luis Chamberlain Link: https://lore.kernel.org/r/20240516102532.213874-1-l.stach@pengutronix.de Signed-off-by: Vinod Koul --- drivers/base/firmware_loader/main.c | 90 ++++++++++++++++++++++++++----------- include/linux/firmware.h | 12 +++++ 2 files changed, 75 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index da8ca01d011c..a03ee4b11134 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -1172,34 +1172,11 @@ static void request_firmware_work_func(struct work_struct *work) kfree(fw_work); } -/** - * request_firmware_nowait() - asynchronous version of request_firmware - * @module: module requesting the firmware - * @uevent: sends uevent to copy the firmware image if this flag - * is non-zero else the firmware copy must be done manually. - * @name: name of firmware file - * @device: device for which firmware is being loaded - * @gfp: allocation flags - * @context: will be passed over to @cont, and - * @fw may be %NULL if firmware request fails. - * @cont: function will be called asynchronously when the firmware - * request is over. - * - * Caller must hold the reference count of @device. - * - * Asynchronous variant of request_firmware() for user contexts: - * - sleep for as small periods as possible since it may - * increase kernel boot time of built-in device drivers - * requesting firmware in their ->probe() methods, if - * @gfp is GFP_KERNEL. - * - * - can't sleep at all if @gfp is GFP_ATOMIC. - **/ -int -request_firmware_nowait( + +static int _request_firmware_nowait( struct module *module, bool uevent, const char *name, struct device *device, gfp_t gfp, void *context, - void (*cont)(const struct firmware *fw, void *context)) + void (*cont)(const struct firmware *fw, void *context), bool nowarn) { struct firmware_work *fw_work; @@ -1217,7 +1194,8 @@ request_firmware_nowait( fw_work->context = context; fw_work->cont = cont; fw_work->opt_flags = FW_OPT_NOWAIT | - (uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER); + (uevent ? FW_OPT_UEVENT : FW_OPT_USERHELPER) | + (nowarn ? FW_OPT_NO_WARN : 0); if (!uevent && fw_cache_is_setup(device, name)) { kfree_const(fw_work->name); @@ -1236,8 +1214,66 @@ request_firmware_nowait( schedule_work(&fw_work->work); return 0; } + +/** + * request_firmware_nowait() - asynchronous version of request_firmware + * @module: module requesting the firmware + * @uevent: sends uevent to copy the firmware image if this flag + * is non-zero else the firmware copy must be done manually. + * @name: name of firmware file + * @device: device for which firmware is being loaded + * @gfp: allocation flags + * @context: will be passed over to @cont, and + * @fw may be %NULL if firmware request fails. + * @cont: function will be called asynchronously when the firmware + * request is over. + * + * Caller must hold the reference count of @device. + * + * Asynchronous variant of request_firmware() for user contexts: + * - sleep for as small periods as possible since it may + * increase kernel boot time of built-in device drivers + * requesting firmware in their ->probe() methods, if + * @gfp is GFP_KERNEL. + * + * - can't sleep at all if @gfp is GFP_ATOMIC. + **/ +int request_firmware_nowait( + struct module *module, bool uevent, + const char *name, struct device *device, gfp_t gfp, void *context, + void (*cont)(const struct firmware *fw, void *context)) +{ + return _request_firmware_nowait(module, uevent, name, device, gfp, + context, cont, false); + +} EXPORT_SYMBOL(request_firmware_nowait); +/** + * firmware_request_nowait_nowarn() - async version of request_firmware_nowarn + * @module: module requesting the firmware + * @name: name of firmware file + * @device: device for which firmware is being loaded + * @gfp: allocation flags + * @context: will be passed over to @cont, and + * @fw may be %NULL if firmware request fails. + * @cont: function will be called asynchronously when the firmware + * request is over. + * + * Similar in function to request_firmware_nowait(), but doesn't print a warning + * when the firmware file could not be found and always sends a uevent to copy + * the firmware image. + */ +int firmware_request_nowait_nowarn( + struct module *module, const char *name, + struct device *device, gfp_t gfp, void *context, + void (*cont)(const struct firmware *fw, void *context)) +{ + return _request_firmware_nowait(module, FW_ACTION_UEVENT, name, device, + gfp, context, cont, true); +} +EXPORT_SYMBOL_GPL(firmware_request_nowait_nowarn); + #ifdef CONFIG_FW_CACHE static ASYNC_DOMAIN_EXCLUSIVE(fw_cache_domain); diff --git a/include/linux/firmware.h b/include/linux/firmware.h index f026f8926d79..aae1b85ffc10 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -98,6 +98,10 @@ static inline bool firmware_request_builtin(struct firmware *fw, #if IS_REACHABLE(CONFIG_FW_LOADER) int request_firmware(const struct firmware **fw, const char *name, struct device *device); +int firmware_request_nowait_nowarn( + struct module *module, const char *name, + struct device *device, gfp_t gfp, void *context, + void (*cont)(const struct firmware *fw, void *context)); int firmware_request_nowarn(const struct firmware **fw, const char *name, struct device *device); int firmware_request_platform(const struct firmware **fw, const char *name, @@ -123,6 +127,14 @@ static inline int request_firmware(const struct firmware **fw, return -EINVAL; } +static inline int firmware_request_nowait_nowarn( + struct module *module, const char *name, + struct device *device, gfp_t gfp, void *context, + void (*cont)(const struct firmware *fw, void *context)) +{ + return -EINVAL; +} + static inline int firmware_request_nowarn(const struct firmware **fw, const char *name, struct device *device) -- cgit v1.2.3 From a31a0a3e90b442c1a414be24b062e27624a40a8e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 28 May 2024 11:47:16 -0700 Subject: thermal: intel: intel_soc_dts_thermal: Switch to new Intel CPU model defines New CPU #defines encode vendor and family as well as model. Signed-off-by: Tony Luck Acked-by: Rafael J. Wysocki Signed-off-by: Rafael J. Wysocki --- drivers/thermal/intel/intel_soc_dts_thermal.c | 2 +- include/linux/platform_data/x86/soc.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/thermal/intel/intel_soc_dts_thermal.c b/drivers/thermal/intel/intel_soc_dts_thermal.c index 9c825c6e1f38..718c6326eaf4 100644 --- a/drivers/thermal/intel/intel_soc_dts_thermal.c +++ b/drivers/thermal/intel/intel_soc_dts_thermal.c @@ -36,7 +36,7 @@ static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data) } static const struct x86_cpu_id soc_thermal_ids[] = { - X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, BYT_SOC_DTS_APIC_IRQ), + X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, BYT_SOC_DTS_APIC_IRQ), {} }; MODULE_DEVICE_TABLE(x86cpu, soc_thermal_ids); diff --git a/include/linux/platform_data/x86/soc.h b/include/linux/platform_data/x86/soc.h index a5705189e2ac..f981907a5cb0 100644 --- a/include/linux/platform_data/x86/soc.h +++ b/include/linux/platform_data/x86/soc.h @@ -20,7 +20,7 @@ static inline bool soc_intel_is_##soc(void) \ { \ static const struct x86_cpu_id soc##_cpu_ids[] = { \ - X86_MATCH_INTEL_FAM6_MODEL(type, NULL), \ + X86_MATCH_VFM(type, NULL), \ {} \ }; \ const struct x86_cpu_id *id; \ @@ -31,11 +31,11 @@ static inline bool soc_intel_is_##soc(void) \ return false; \ } -SOC_INTEL_IS_CPU(byt, ATOM_SILVERMONT); -SOC_INTEL_IS_CPU(cht, ATOM_AIRMONT); -SOC_INTEL_IS_CPU(apl, ATOM_GOLDMONT); -SOC_INTEL_IS_CPU(glk, ATOM_GOLDMONT_PLUS); -SOC_INTEL_IS_CPU(cml, KABYLAKE_L); +SOC_INTEL_IS_CPU(byt, INTEL_ATOM_SILVERMONT); +SOC_INTEL_IS_CPU(cht, INTEL_ATOM_AIRMONT); +SOC_INTEL_IS_CPU(apl, INTEL_ATOM_GOLDMONT); +SOC_INTEL_IS_CPU(glk, INTEL_ATOM_GOLDMONT_PLUS); +SOC_INTEL_IS_CPU(cml, INTEL_KABYLAKE_L); #undef SOC_INTEL_IS_CPU -- cgit v1.2.3 From 2a1251e3dbb2995100b6f351c2452228895386a5 Mon Sep 17 00:00:00 2001 From: Konstantin Taranov Date: Fri, 7 Jun 2024 03:08:17 -0700 Subject: RDMA/mana_ib: Process QP error events in mana_ib Process QP fatal events from the error event queue. For that, find the QP, using QPN from the event, and then call its event_handler. To find the QPs, store created RC QPs in an xarray. Signed-off-by: Konstantin Taranov Link: https://lore.kernel.org/r/1717754897-19858-1-git-send-email-kotaranov@linux.microsoft.com Reviewed-by: Wei Hu Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/device.c | 3 +++ drivers/infiniband/hw/mana/main.c | 31 +++++++++++++++++++++++-- drivers/infiniband/hw/mana/mana_ib.h | 24 +++++++++++++++++++ drivers/infiniband/hw/mana/qp.c | 22 ++++++++++++++++++ drivers/net/ethernet/microsoft/mana/gdma_main.c | 1 + include/net/mana/gdma.h | 1 + 6 files changed, 80 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c index 9a7da2ec9cdb..b07a8e2e838f 100644 --- a/drivers/infiniband/hw/mana/device.c +++ b/drivers/infiniband/hw/mana/device.c @@ -126,6 +126,7 @@ static int mana_ib_probe(struct auxiliary_device *adev, if (ret) goto destroy_eqs; + xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ); ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr); if (ret) { ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d", @@ -143,6 +144,7 @@ static int mana_ib_probe(struct auxiliary_device *adev, return 0; destroy_rnic: + xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); destroy_eqs: mana_ib_destroy_eqs(dev); @@ -158,6 +160,7 @@ static void mana_ib_remove(struct auxiliary_device *adev) struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); ib_unregister_device(&dev->ib_dev); + xa_destroy(&dev->qp_table_wq); mana_ib_gd_destroy_rnic_adapter(dev); mana_ib_destroy_eqs(dev); mana_gd_deregister_device(dev->gdma_dev); diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c index 365b4f174a83..d13abc954d2a 100644 --- a/drivers/infiniband/hw/mana/main.c +++ b/drivers/infiniband/hw/mana/main.c @@ -667,6 +667,33 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev) return 0; } +static void +mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event) +{ + struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx; + struct mana_ib_qp *qp; + struct ib_event ev; + u32 qpn; + + switch (event->type) { + case GDMA_EQE_RNIC_QP_FATAL: + qpn = event->details[0]; + qp = mana_get_qp_ref(mdev, qpn); + if (!qp) + break; + if (qp->ibqp.event_handler) { + ev.device = qp->ibqp.device; + ev.element.qp = &qp->ibqp; + ev.event = IB_EVENT_QP_FATAL; + qp->ibqp.event_handler(&ev, qp->ibqp.qp_context); + } + mana_put_qp_ref(qp); + break; + default: + break; + } +} + int mana_ib_create_eqs(struct mana_ib_dev *mdev) { struct gdma_context *gc = mdev_to_gc(mdev); @@ -676,7 +703,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev) spec.type = GDMA_EQ; spec.monitor_avl_buf = false; spec.queue_size = EQ_SIZE; - spec.eq.callback = NULL; + spec.eq.callback = mana_ib_event_handler; spec.eq.context = mdev; spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE; spec.eq.msix_index = 0; @@ -691,7 +718,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev) err = -ENOMEM; goto destroy_fatal_eq; } - + spec.eq.callback = NULL; for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) { spec.eq.msix_index = (i + 1) % gc->num_msix_usable; err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]); diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h index 60bc548ba4be..977da9569701 100644 --- a/drivers/infiniband/hw/mana/mana_ib.h +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -62,6 +62,7 @@ struct mana_ib_dev { mana_handle_t adapter_handle; struct gdma_queue *fatal_err_eq; struct gdma_queue **eqs; + struct xarray qp_table_wq; struct mana_ib_adapter_caps adapter_caps; }; @@ -124,6 +125,9 @@ struct mana_ib_qp { /* The port on the IB device, starting with 1 */ u32 port; + + refcount_t refcount; + struct completion free; }; struct mana_ib_ucontext { @@ -333,6 +337,26 @@ static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev) return mdev->gdma_dev->gdma_context; } +static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev, + uint32_t qid) +{ + struct mana_ib_qp *qp; + unsigned long flag; + + xa_lock_irqsave(&mdev->qp_table_wq, flag); + qp = xa_load(&mdev->qp_table_wq, qid); + if (qp) + refcount_inc(&qp->refcount); + xa_unlock_irqrestore(&mdev->qp_table_wq, flag); + return qp; +} + +static inline void mana_put_qp_ref(struct mana_ib_qp *qp) +{ + if (refcount_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port) { struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index 34a93729db52..10cf73b569fa 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -398,6 +398,22 @@ err_free_vport: return err; } +static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp) +{ + refcount_set(&qp->refcount, 1); + init_completion(&qp->free); + return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp, + GFP_KERNEL); +} + +static void mana_table_remove_qp(struct mana_ib_dev *mdev, + struct mana_ib_qp *qp) +{ + xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num); + mana_put_qp_ref(qp); + wait_for_completion(&qp->free); +} + static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, struct ib_qp_init_attr *attr, struct ib_udata *udata) { @@ -460,6 +476,10 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd, } } + err = mana_table_store_qp(mdev, qp); + if (err) + goto destroy_qp; + return 0; destroy_qp: @@ -620,6 +640,8 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata) container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); int i; + mana_table_remove_qp(mdev, qp); + /* Ignore return code as there is not much we can do about it. * The error message is printed inside. */ diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 1332db9a08eb..a489297f26a0 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -380,6 +380,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq) case GDMA_EQE_HWC_INIT_EQ_ID_DB: case GDMA_EQE_HWC_INIT_DATA: case GDMA_EQE_HWC_INIT_DONE: + case GDMA_EQE_RNIC_QP_FATAL: if (!eq->eq.callback) break; diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d..44c797d5f899 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -60,6 +60,7 @@ enum gdma_eqe_type { GDMA_EQE_HWC_INIT_DONE = 131, GDMA_EQE_HWC_SOC_RECONFIG = 132, GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133, + GDMA_EQE_RNIC_QP_FATAL = 176, }; enum { -- cgit v1.2.3 From 195fb517ee25bfefde9c74ecd86348eccbd6d2e4 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:24 -0700 Subject: cpu: Move CPU hotplug function declarations into their own header Avoid upcoming #include hell when wants to use lockdep_assert_cpus_held() and creates a #include loop that would break the build for arch/riscv. [ bp: s/cpu/CPU/g ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240610003927.341707-2-tony.luck@intel.com --- include/linux/cpu.h | 33 +-------------------------------- include/linux/cpuhplock.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 32 deletions(-) create mode 100644 include/linux/cpuhplock.h (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 861c3bfc5f17..a8926d0a28cd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include #include #include +#include #include struct device; @@ -132,38 +133,6 @@ static inline int add_cpu(unsigned int cpu) { return 0;} #endif /* CONFIG_SMP */ extern const struct bus_type cpu_subsys; -extern int lockdep_is_cpus_held(void); - -#ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); -void clear_tasks_mm_cpumask(int cpu); -int remove_cpu(unsigned int cpu); -int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); - -#else /* CONFIG_HOTPLUG_CPU */ - -static inline void cpus_write_lock(void) { } -static inline void cpus_write_unlock(void) { } -static inline void cpus_read_lock(void) { } -static inline void cpus_read_unlock(void) { } -static inline int cpus_read_trylock(void) { return true; } -static inline void lockdep_assert_cpus_held(void) { } -static inline void cpu_hotplug_disable(void) { } -static inline void cpu_hotplug_enable(void) { } -static inline int remove_cpu(unsigned int cpu) { return -EPERM; } -static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } -#endif /* !CONFIG_HOTPLUG_CPU */ - -DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) - #ifdef CONFIG_PM_SLEEP_SMP extern int freeze_secondary_cpus(int primary); extern void thaw_secondary_cpus(void); diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h new file mode 100644 index 000000000000..386abc482264 --- /dev/null +++ b/include/linux/cpuhplock.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * include/linux/cpuhplock.h - CPU hotplug locking + * + * Locking functions for CPU hotplug. + */ +#ifndef _LINUX_CPUHPLOCK_H_ +#define _LINUX_CPUHPLOCK_H_ + +#include +#include + +struct device; + +extern int lockdep_is_cpus_held(void); + +#ifdef CONFIG_HOTPLUG_CPU +extern void cpus_write_lock(void); +extern void cpus_write_unlock(void); +extern void cpus_read_lock(void); +extern void cpus_read_unlock(void); +extern int cpus_read_trylock(void); +extern void lockdep_assert_cpus_held(void); +extern void cpu_hotplug_disable(void); +extern void cpu_hotplug_enable(void); +void clear_tasks_mm_cpumask(int cpu); +int remove_cpu(unsigned int cpu); +int cpu_device_down(struct device *dev); +extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); + +#else /* CONFIG_HOTPLUG_CPU */ + +static inline void cpus_write_lock(void) { } +static inline void cpus_write_unlock(void) { } +static inline void cpus_read_lock(void) { } +static inline void cpus_read_unlock(void) { } +static inline int cpus_read_trylock(void) { return true; } +static inline void lockdep_assert_cpus_held(void) { } +static inline void cpu_hotplug_disable(void) { } +static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } +static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } +#endif /* !CONFIG_HOTPLUG_CPU */ + +DEFINE_LOCK_GUARD_0(cpus_read_lock, cpus_read_lock(), cpus_read_unlock()) + +#endif /* _LINUX_CPUHPLOCK_H_ */ -- cgit v1.2.3 From ddefcfdeb5a2238cbcb07b80dda9ac3136735b1e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:25 -0700 Subject: cpu: Drop "extern" from function declarations in cpuhplock.h This file was created with a direct cut and paste from cpu.h so kept the legacy declaration style. But the Linux coding standard for function declarations in header files is to avoid use of "extern". Drop "extern" from all function declarations. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20240610003927.341707-3-tony.luck@intel.com --- include/linux/cpuhplock.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index 386abc482264..431560bbd045 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -15,18 +15,18 @@ struct device; extern int lockdep_is_cpus_held(void); #ifdef CONFIG_HOTPLUG_CPU -extern void cpus_write_lock(void); -extern void cpus_write_unlock(void); -extern void cpus_read_lock(void); -extern void cpus_read_unlock(void); -extern int cpus_read_trylock(void); -extern void lockdep_assert_cpus_held(void); -extern void cpu_hotplug_disable(void); -extern void cpu_hotplug_enable(void); +void cpus_write_lock(void); +void cpus_write_unlock(void); +void cpus_read_lock(void); +void cpus_read_unlock(void); +int cpus_read_trylock(void); +void lockdep_assert_cpus_held(void); +void cpu_hotplug_disable(void); +void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); int remove_cpu(unsigned int cpu); int cpu_device_down(struct device *dev); -extern void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); +void smp_shutdown_nonboot_cpus(unsigned int primary_cpu); #else /* CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From 685cb1674060c2cb1b9da051a12933c082b8e874 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Sun, 9 Jun 2024 17:39:26 -0700 Subject: cacheinfo: Add function to get cacheinfo for a given CPU and cache level Resctrl open codes a search for information about a given cache level in a couple of places (and more are on the way). Provide a new inline function get_cpu_cacheinfo_level() in to do the search and return a pointer to the cacheinfo structure. Add lockdep_assert_cpus_held() to enforce the comment that cpuhp lock must be held. Simplify the existing get_cpu_cacheinfo_id() by using this new function to do the search. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Reinette Chatre Link: https://lore.kernel.org/r/20240610003927.341707-4-tony.luck@intel.com --- include/linux/cacheinfo.h | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 2cb15fe4fe12..3dde175f4108 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -3,6 +3,7 @@ #define _LINUX_CACHEINFO_H #include +#include #include #include @@ -113,23 +114,37 @@ int acpi_get_cache_info(unsigned int cpu, const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); /* - * Get the id of the cache associated with @cpu at level @level. + * Get the cacheinfo structure for the cache associated with @cpu at + * level @level. * cpuhp lock must be held. */ -static inline int get_cpu_cacheinfo_id(int cpu, int level) +static inline struct cacheinfo *get_cpu_cacheinfo_level(int cpu, int level) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu); int i; + lockdep_assert_cpus_held(); + for (i = 0; i < ci->num_leaves; i++) { if (ci->info_list[i].level == level) { if (ci->info_list[i].attributes & CACHE_ID) - return ci->info_list[i].id; - return -1; + return &ci->info_list[i]; + return NULL; } } - return -1; + return NULL; +} + +/* + * Get the id of the cache associated with @cpu at level @level. + * cpuhp lock must be held. + */ +static inline int get_cpu_cacheinfo_id(int cpu, int level) +{ + struct cacheinfo *ci = get_cpu_cacheinfo_level(cpu, level); + + return ci ? ci->id : -1; } #ifdef CONFIG_ARM64 -- cgit v1.2.3 From 62096c48394b49e326056a62780fb67b60edde91 Mon Sep 17 00:00:00 2001 From: Ming Qian Date: Mon, 6 May 2024 17:49:16 +0900 Subject: media: v4l2-ctrls: Add average QP control Add a control V4L2_CID_MPEG_VIDEO_AVERAGE_QP to report the average QP value of the current encoded frame. The value applies to the last dequeued capture buffer. Signed-off-by: Ming Qian Reviewed-by: Hans Verkuil Signed-off-by: Sebastian Fricke Signed-off-by: Hans Verkuil --- Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst | 14 ++++++++++++++ drivers/media/v4l2-core/v4l2-ctrls-defs.c | 5 +++++ include/uapi/linux/v4l2-controls.h | 2 ++ 3 files changed, 21 insertions(+) (limited to 'include') diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst index 2a165ae063fb..4a379bd9e3fb 100644 --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst @@ -1653,6 +1653,20 @@ enum v4l2_mpeg_video_h264_hierarchical_coding_type - Quantization parameter for a P frame for FWHT. Valid range: from 1 to 31. +``V4L2_CID_MPEG_VIDEO_AVERAGE_QP (integer)`` + This read-only control returns the average QP value of the currently + encoded frame. The value applies to the last dequeued capture buffer + (VIDIOC_DQBUF). Its valid range depends on the encoding format and parameters. + For H264, its valid range is from 0 to 51. + For HEVC, its valid range is from 0 to 51 for 8 bit and + from 0 to 63 for 10 bit. + For H263 and MPEG4, its valid range is from 1 to 31. + For VP8, its valid range is from 0 to 127. + For VP9, its valid range is from 0 to 255. + If the codec's MIN_QP and MAX_QP are set, then the QP will meet both requirements. + Codecs need to always use the specified range, rather then a HW custom range. + Applicable to encoders + .. raw:: latex \normalsize diff --git a/drivers/media/v4l2-core/v4l2-ctrls-defs.c b/drivers/media/v4l2-core/v4l2-ctrls-defs.c index 8696eb1cdd61..1ea52011247a 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls-defs.c +++ b/drivers/media/v4l2-core/v4l2-ctrls-defs.c @@ -970,6 +970,7 @@ const char *v4l2_ctrl_get_name(u32 id) case V4L2_CID_MPEG_VIDEO_LTR_COUNT: return "LTR Count"; case V4L2_CID_MPEG_VIDEO_FRAME_LTR_INDEX: return "Frame LTR Index"; case V4L2_CID_MPEG_VIDEO_USE_LTR_FRAMES: return "Use LTR Frames"; + case V4L2_CID_MPEG_VIDEO_AVERAGE_QP: return "Average QP Value"; case V4L2_CID_FWHT_I_FRAME_QP: return "FWHT I-Frame QP Value"; case V4L2_CID_FWHT_P_FRAME_QP: return "FWHT P-Frame QP Value"; @@ -1507,6 +1508,10 @@ void v4l2_ctrl_fill(u32 id, const char **name, enum v4l2_ctrl_type *type, *max = 0xffffffffffffLL; *step = 1; break; + case V4L2_CID_MPEG_VIDEO_AVERAGE_QP: + *type = V4L2_CTRL_TYPE_INTEGER; + *flags |= V4L2_CTRL_FLAG_READ_ONLY; + break; case V4L2_CID_PIXEL_RATE: *type = V4L2_CTRL_TYPE_INTEGER64; *flags |= V4L2_CTRL_FLAG_READ_ONLY; diff --git a/include/uapi/linux/v4l2-controls.h b/include/uapi/linux/v4l2-controls.h index 99c3f5e99da7..974fd254e573 100644 --- a/include/uapi/linux/v4l2-controls.h +++ b/include/uapi/linux/v4l2-controls.h @@ -898,6 +898,8 @@ enum v4l2_mpeg_video_av1_level { V4L2_MPEG_VIDEO_AV1_LEVEL_7_3 = 23 }; +#define V4L2_CID_MPEG_VIDEO_AVERAGE_QP (V4L2_CID_CODEC_BASE + 657) + /* MPEG-class control IDs specific to the CX2341x driver as defined by V4L2 */ #define V4L2_CID_CODEC_CX2341X_BASE (V4L2_CTRL_CLASS_CODEC | 0x1000) #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE (V4L2_CID_CODEC_CX2341X_BASE+0) -- cgit v1.2.3 From 97d833ceb27dc19f8777d63f90be4a27b5daeedf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 6 Jun 2024 16:49:42 +0200 Subject: mlxsw: spectrum_acl_erp: Fix object nesting warning ACLs in Spectrum-2 and newer ASICs can reside in the algorithmic TCAM (A-TCAM) or in the ordinary circuit TCAM (C-TCAM). The former can contain more ACLs (i.e., tc filters), but the number of masks in each region (i.e., tc chain) is limited. In order to mitigate the effects of the above limitation, the device allows filters to share a single mask if their masks only differ in up to 8 consecutive bits. For example, dst_ip/25 can be represented using dst_ip/24 with a delta of 1 bit. The C-TCAM does not have a limit on the number of masks being used (and therefore does not support mask aggregation), but can contain a limited number of filters. The driver uses the "objagg" library to perform the mask aggregation by passing it objects that consist of the filter's mask and whether the filter is to be inserted into the A-TCAM or the C-TCAM since filters in different TCAMs cannot share a mask. The set of created objects is dependent on the insertion order of the filters and is not necessarily optimal. Therefore, the driver will periodically ask the library to compute a more optimal set ("hints") by looking at all the existing objects. When the library asks the driver whether two objects can be aggregated the driver only compares the provided masks and ignores the A-TCAM / C-TCAM indication. This is the right thing to do since the goal is to move as many filters as possible to the A-TCAM. The driver also forbids two identical masks from being aggregated since this can only happen if one was intentionally put in the C-TCAM to avoid a conflict in the A-TCAM. The above can result in the following set of hints: H1: {mask X, A-TCAM} -> H2: {mask Y, A-TCAM} // X is Y + delta H3: {mask Y, C-TCAM} -> H4: {mask Z, A-TCAM} // Y is Z + delta After getting the hints from the library the driver will start migrating filters from one region to another while consulting the computed hints and instructing the device to perform a lookup in both regions during the transition. Assuming a filter with mask X is being migrated into the A-TCAM in the new region, the hints lookup will return H1. Since H2 is the parent of H1, the library will try to find the object associated with it and create it if necessary in which case another hints lookup (recursive) will be performed. This hints lookup for {mask Y, A-TCAM} will either return H2 or H3 since the driver passes the library an object comparison function that ignores the A-TCAM / C-TCAM indication. This can eventually lead to nested objects which are not supported by the library [1]. Fix by removing the object comparison function from both the driver and the library as the driver was the only user. That way the lookup will only return exact matches. I do not have a reliable reproducer that can reproduce the issue in a timely manner, but before the fix the issue would reproduce in several minutes and with the fix it does not reproduce in over an hour. Note that the current usefulness of the hints is limited because they include the C-TCAM indication and represent aggregation that cannot actually happen. This will be addressed in net-next. [1] WARNING: CPU: 0 PID: 153 at lib/objagg.c:170 objagg_obj_parent_assign+0xb5/0xd0 Modules linked in: CPU: 0 PID: 153 Comm: kworker/0:18 Not tainted 6.9.0-rc6-custom-g70fbc2c1c38b #42 Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 Workqueue: mlxsw_core mlxsw_sp_acl_tcam_vregion_rehash_work RIP: 0010:objagg_obj_parent_assign+0xb5/0xd0 [...] Call Trace: __objagg_obj_get+0x2bb/0x580 objagg_obj_get+0xe/0x80 mlxsw_sp_acl_erp_mask_get+0xb5/0xf0 mlxsw_sp_acl_atcam_entry_add+0xe8/0x3c0 mlxsw_sp_acl_tcam_entry_create+0x5e/0xa0 mlxsw_sp_acl_tcam_vchunk_migrate_one+0x16b/0x270 mlxsw_sp_acl_tcam_vregion_rehash_work+0xbe/0x510 process_one_work+0x151/0x370 Fixes: 9069a3817d82 ("lib: objagg: implement optimization hints assembly and use hints for object creation") Signed-off-by: Ido Schimmel Reviewed-by: Amit Cohen Tested-by: Alexander Zubkov Signed-off-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c | 13 ------------- include/linux/objagg.h | 1 - lib/objagg.c | 15 --------------- 3 files changed, 29 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c index d231f4d2888b..9eee229303cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_erp.c @@ -1217,18 +1217,6 @@ static bool mlxsw_sp_acl_erp_delta_check(void *priv, const void *parent_obj, return err ? false : true; } -static int mlxsw_sp_acl_erp_hints_obj_cmp(const void *obj1, const void *obj2) -{ - const struct mlxsw_sp_acl_erp_key *key1 = obj1; - const struct mlxsw_sp_acl_erp_key *key2 = obj2; - - /* For hints purposes, two objects are considered equal - * in case the masks are the same. Does not matter what - * the "ctcam" value is. - */ - return memcmp(key1->mask, key2->mask, sizeof(key1->mask)); -} - static void *mlxsw_sp_acl_erp_delta_create(void *priv, void *parent_obj, void *obj) { @@ -1308,7 +1296,6 @@ static void mlxsw_sp_acl_erp_root_destroy(void *priv, void *root_priv) static const struct objagg_ops mlxsw_sp_acl_erp_objagg_ops = { .obj_size = sizeof(struct mlxsw_sp_acl_erp_key), .delta_check = mlxsw_sp_acl_erp_delta_check, - .hints_obj_cmp = mlxsw_sp_acl_erp_hints_obj_cmp, .delta_create = mlxsw_sp_acl_erp_delta_create, .delta_destroy = mlxsw_sp_acl_erp_delta_destroy, .root_create = mlxsw_sp_acl_erp_root_create, diff --git a/include/linux/objagg.h b/include/linux/objagg.h index 78021777df46..6df5b887dc54 100644 --- a/include/linux/objagg.h +++ b/include/linux/objagg.h @@ -8,7 +8,6 @@ struct objagg_ops { size_t obj_size; bool (*delta_check)(void *priv, const void *parent_obj, const void *obj); - int (*hints_obj_cmp)(const void *obj1, const void *obj2); void * (*delta_create)(void *priv, void *parent_obj, void *obj); void (*delta_destroy)(void *priv, void *delta_priv); void * (*root_create)(void *priv, void *obj, unsigned int root_id); diff --git a/lib/objagg.c b/lib/objagg.c index 0f99ea5f5371..363e43e849ac 100644 --- a/lib/objagg.c +++ b/lib/objagg.c @@ -906,20 +906,6 @@ static const struct objagg_opt_algo *objagg_opt_algos[] = { [OBJAGG_OPT_ALGO_SIMPLE_GREEDY] = &objagg_opt_simple_greedy, }; -static int objagg_hints_obj_cmp(struct rhashtable_compare_arg *arg, - const void *obj) -{ - struct rhashtable *ht = arg->ht; - struct objagg_hints *objagg_hints = - container_of(ht, struct objagg_hints, node_ht); - const struct objagg_ops *ops = objagg_hints->ops; - const char *ptr = obj; - - ptr += ht->p.key_offset; - return ops->hints_obj_cmp ? ops->hints_obj_cmp(ptr, arg->key) : - memcmp(ptr, arg->key, ht->p.key_len); -} - /** * objagg_hints_get - obtains hints instance * @objagg: objagg instance @@ -958,7 +944,6 @@ struct objagg_hints *objagg_hints_get(struct objagg *objagg, offsetof(struct objagg_hints_node, obj); objagg_hints->ht_params.head_offset = offsetof(struct objagg_hints_node, ht_node); - objagg_hints->ht_params.obj_cmpfn = objagg_hints_obj_cmp; err = rhashtable_init(&objagg_hints->node_ht, &objagg_hints->ht_params); if (err) -- cgit v1.2.3 From 96f3b978736356ba0e5a7d923681765c7ea9b12b Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Wed, 15 May 2024 21:47:25 +0300 Subject: dt-bindings: clock: meson: a1: pll: introduce new syspll bindings The 'syspll' PLL is a general-purpose PLL designed specifically for the CPU clock. It is capable of producing output frequencies within the range of 768MHz to 1536MHz. The 'syspll_in' source clock is an optional parent connection from the peripherals clock controller. Signed-off-by: Dmitry Rokosov Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240515185103.20256-3-ddrokosov@salutedevices.com Signed-off-by: Jerome Brunet --- Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml | 9 +++++++-- include/dt-bindings/clock/amlogic,a1-pll-clkc.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml index a59b188a8bf5..c99274d2a9bd 100644 --- a/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml +++ b/Documentation/devicetree/bindings/clock/amlogic,a1-pll-clkc.yaml @@ -26,11 +26,15 @@ properties: items: - description: input fixpll_in - description: input hifipll_in + - description: input syspll_in + minItems: 2 # syspll_in is optional clock-names: items: - const: fixpll_in - const: hifipll_in + - const: syspll_in + minItems: 2 # syspll_in is optional required: - compatible @@ -53,7 +57,8 @@ examples: reg = <0 0x7c80 0 0x18c>; #clock-cells = <1>; clocks = <&clkc_periphs CLKID_FIXPLL_IN>, - <&clkc_periphs CLKID_HIFIPLL_IN>; - clock-names = "fixpll_in", "hifipll_in"; + <&clkc_periphs CLKID_HIFIPLL_IN>, + <&clkc_periphs CLKID_SYSPLL_IN>; + clock-names = "fixpll_in", "hifipll_in", "syspll_in"; }; }; diff --git a/include/dt-bindings/clock/amlogic,a1-pll-clkc.h b/include/dt-bindings/clock/amlogic,a1-pll-clkc.h index 2b660c0f2c9f..0dfc5e78a2d5 100644 --- a/include/dt-bindings/clock/amlogic,a1-pll-clkc.h +++ b/include/dt-bindings/clock/amlogic,a1-pll-clkc.h @@ -21,5 +21,6 @@ #define CLKID_FCLK_DIV5 8 #define CLKID_FCLK_DIV7 9 #define CLKID_HIFI_PLL 10 +#define CLKID_SYS_PLL 11 #endif /* __A1_PLL_CLKC_H */ -- cgit v1.2.3 From 41056416ed538d1a05dbba57060c02acdc5154e7 Mon Sep 17 00:00:00 2001 From: Dmitry Rokosov Date: Wed, 15 May 2024 21:47:27 +0300 Subject: dt-bindings: clock: meson: a1: peripherals: support sys_pll input The 'sys_pll' input is an optional clock that can be used to generate 'sys_pll_div16', which serves as one of the sources for the GEN clock. Signed-off-by: Dmitry Rokosov Acked-by: Rob Herring (Arm) Link: https://lore.kernel.org/r/20240515185103.20256-5-ddrokosov@salutedevices.com Signed-off-by: Jerome Brunet --- .../devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml | 9 +++++++-- include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml b/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml index 6d84cee1bd75..2568ad7dd0ac 100644 --- a/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml +++ b/Documentation/devicetree/bindings/clock/amlogic,a1-peripherals-clkc.yaml @@ -30,6 +30,8 @@ properties: - description: input fixed pll div7 - description: input hifi pll - description: input oscillator (usually at 24MHz) + - description: input sys pll + minItems: 6 # sys_pll is optional clock-names: items: @@ -39,6 +41,8 @@ properties: - const: fclk_div7 - const: hifi_pll - const: xtal + - const: sys_pll + minItems: 6 # sys_pll is optional required: - compatible @@ -65,9 +69,10 @@ examples: <&clkc_pll CLKID_FCLK_DIV5>, <&clkc_pll CLKID_FCLK_DIV7>, <&clkc_pll CLKID_HIFI_PLL>, - <&xtal>; + <&xtal>, + <&clkc_pll CLKID_SYS_PLL>; clock-names = "fclk_div2", "fclk_div3", "fclk_div5", "fclk_div7", - "hifi_pll", "xtal"; + "hifi_pll", "xtal", "sys_pll"; }; }; diff --git a/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h b/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h index 06f198ee7623..2ce1a06dc735 100644 --- a/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h +++ b/include/dt-bindings/clock/amlogic,a1-peripherals-clkc.h @@ -164,5 +164,6 @@ #define CLKID_DMC_SEL 151 #define CLKID_DMC_DIV 152 #define CLKID_DMC_SEL2 153 +#define CLKID_SYS_PLL_DIV16 154 #endif /* __A1_PERIPHERALS_CLKC_H */ -- cgit v1.2.3 From b334b924c9b709bc969644fb5c406f5c9d01dceb Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Thu, 6 Jun 2024 17:11:37 +0200 Subject: net: tcp/dccp: prepare for tw_timer un-pinning The TCP timewait timer is proving to be problematic for setups where scheduler CPU isolation is achieved at runtime via cpusets (as opposed to statically via isolcpus=domains). What happens there is a CPU goes through tcp_time_wait(), arming the time_wait timer, then gets isolated. TCP_TIMEWAIT_LEN later, the timer fires, causing interference for the now-isolated CPU. This is conceptually similar to the issue described in commit e02b93124855 ("workqueue: Unbind kworkers before sending them to exit()") Move inet_twsk_schedule() to within inet_twsk_hashdance(), with the ehash lock held. Expand the lock's critical section from inet_twsk_kill() to inet_twsk_deschedule_put(), serializing the scheduling vs descheduling of the timer. IOW, this prevents the following race: tcp_time_wait() inet_twsk_hashdance() inet_twsk_deschedule_put() del_timer_sync() inet_twsk_schedule() Thanks to Paolo Abeni for suggesting to leverage the ehash lock. This also restores a comment from commit ec94c2696f0b ("tcp/dccp: avoid one atomic operation for timewait hashdance") as inet_twsk_hashdance() had a "Step 1" and "Step 3" comment, but the "Step 2" had gone missing. inet_twsk_deschedule_put() now acquires the ehash spinlock to synchronize with inet_twsk_hashdance_schedule(). To ease possible regression search, actual un-pin is done in next patch. Link: https://lore.kernel.org/all/ZPhpfMjSiHVjQkTk@localhost.localdomain/ Reviewed-by: Eric Dumazet Signed-off-by: Valentin Schneider Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 6 +++-- net/dccp/minisocks.c | 3 +-- net/ipv4/inet_timewait_sock.c | 52 ++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/tcp_minisocks.c | 3 +-- 5 files changed, 52 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 2a536eea9424..5b43d220243d 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -93,8 +93,10 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, struct inet_timewait_death_row *dr, const int state); -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo); +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo); void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 251a57cf5822..deb52d7d31b4 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -59,11 +59,10 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) * we complete the initialization. */ local_bh_disable(); - inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + inet_twsk_hashdance_schedule(tw, sk, &dccp_hashinfo, timeo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index e28075f0006e..628d33a41ce5 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -96,9 +96,13 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, * Enter the time wait state. This is called with locally disabled BH. * Essentially we whip up a timewait bucket, copy the relevant info into it * from the SK, and mess with hash chains and list linkage. + * + * The caller must not access @tw anymore after this function returns. */ -void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, - struct inet_hashinfo *hashinfo) +void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo, + int timeo) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -129,26 +133,33 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, spin_lock(lock); + /* Step 2: Hash TW into tcp ehash chain */ inet_twsk_add_node_rcu(tw, &ehead->chain); /* Step 3: Remove SK from hash chain */ if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); - spin_unlock(lock); + /* Ensure above writes are committed into memory before updating the + * refcount. + * Provides ordering vs later refcount_inc(). + */ + smp_wmb(); /* tw_refcnt is set to 3 because we have : * - one reference for bhash chain. * - one reference for ehash chain. * - one reference for timer. - * We can use atomic_set() because prior spin_lock()/spin_unlock() - * committed into memory all tw fields. * Also note that after this point, we lost our implicit reference * so we are not allowed to use tw anymore. */ refcount_set(&tw->tw_refcnt, 3); + + inet_twsk_schedule(tw, timeo); + + spin_unlock(lock); } -EXPORT_SYMBOL_GPL(inet_twsk_hashdance); +EXPORT_SYMBOL_GPL(inet_twsk_hashdance_schedule); static void tw_timer_handler(struct timer_list *t) { @@ -217,7 +228,34 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc); */ void inet_twsk_deschedule_put(struct inet_timewait_sock *tw) { - if (del_timer_sync(&tw->tw_timer)) + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; + spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); + + /* inet_twsk_purge() walks over all sockets, including tw ones, + * and removes them via inet_twsk_deschedule_put() after a + * refcount_inc_not_zero(). + * + * inet_twsk_hashdance_schedule() must (re)init the refcount before + * arming the timer, i.e. inet_twsk_purge can obtain a reference to + * a twsk that did not yet schedule the timer. + * + * The ehash lock synchronizes these two: + * After acquiring the lock, the timer is always scheduled (else + * timer_shutdown returns false), because hashdance_schedule releases + * the ehash lock only after completing the timer initialization. + * + * Without grabbing the ehash lock, we get: + * 1) cpu x sets twsk refcount to 3 + * 2) cpu y bumps refcount to 4 + * 3) cpu y calls inet_twsk_deschedule_put() and shuts timer down + * 4) cpu x tries to start timer, but mod_timer is a noop post-shutdown + * -> timer refcount is never decremented. + */ + spin_lock(lock); + /* Makes sure hashdance_schedule() has completed */ + spin_unlock(lock); + + if (timer_shutdown_sync(&tw->tw_timer)) inet_twsk_kill(tw); inet_twsk_put(tw); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3613e08ca794..e61c7c974745 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -157,7 +157,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), ts_recent_stamp)))) { - /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk * and releasing the bucket lock. */ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4c894e540730..fc9a850ed9bd 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -350,11 +350,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) * we complete the initialization. */ local_bh_disable(); - inet_twsk_schedule(tw, timeo); /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo); + inet_twsk_hashdance_schedule(tw, sk, net->ipv4.tcp_death_row.hashinfo, timeo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this -- cgit v1.2.3 From f81d0dd2fde35fd1acc30b3f4de6aaf57d514551 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 6 Jun 2024 17:11:39 +0200 Subject: tcp: move inet_twsk_schedule helper out of header Its no longer used outside inet_timewait_sock.c, so move it there. Reviewed-by: Eric Dumazet Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 5 ----- net/ipv4/inet_timewait_sock.c | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5b43d220243d..f88b68269012 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -101,11 +101,6 @@ void inet_twsk_hashdance_schedule(struct inet_timewait_sock *tw, void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm); -static inline void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) -{ - __inet_twsk_schedule(tw, timeo, false); -} - static inline void inet_twsk_reschedule(struct inet_timewait_sock *tw, int timeo) { __inet_twsk_schedule(tw, timeo, true); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index b2d97c816c99..337390ba85b4 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -92,6 +92,11 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw, hlist_nulls_add_head_rcu(&tw->tw_node, list); } +static void inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo) +{ + __inet_twsk_schedule(tw, timeo, false); +} + /* * Enter the time wait state. * Essentially we whip up a timewait bucket, copy the relevant info into it -- cgit v1.2.3 From 0b7e448119428e1dcb854abb5855f66966fb82dc Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Tue, 28 May 2024 14:29:33 -0500 Subject: ACPI: utils: introduce acpi_get_local_u64_address() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ACPI _ADR is a 64-bit value. We changed the definitions in commit ca6f998cf9a2 ("ACPI: bus: change _ADR representation to 64 bits") but some helpers still assume the value is a 32-bit value. This patch adds a new helper to extract the full 64-bits. The existing 32-bit helper is kept for backwards-compatibility and cases where the _ADR is known to fit in a 32-bit value. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Péter Ujfalusi Reviewed-by: Bard Liao Acked-by: Rafael J. Wysocki Reviewed-by: Takashi Iwai Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20240528192936.16180-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Mark Brown --- drivers/acpi/utils.c | 16 +++++++++++++--- include/linux/acpi.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 202234ba54bd..ae9384282273 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -277,15 +277,25 @@ acpi_evaluate_integer(acpi_handle handle, EXPORT_SYMBOL(acpi_evaluate_integer); -int acpi_get_local_address(acpi_handle handle, u32 *addr) +int acpi_get_local_u64_address(acpi_handle handle, u64 *addr) { - unsigned long long adr; acpi_status status; - status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr); + status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, addr); if (ACPI_FAILURE(status)) return -ENODATA; + return 0; +} +EXPORT_SYMBOL(acpi_get_local_u64_address); + +int acpi_get_local_address(acpi_handle handle, u32 *addr) +{ + u64 adr; + int ret; + ret = acpi_get_local_u64_address(handle, &adr); + if (ret < 0) + return ret; *addr = (u32)adr; return 0; } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..65e7177bcb02 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -761,6 +761,7 @@ static inline u64 acpi_arch_get_root_pointer(void) } #endif +int acpi_get_local_u64_address(acpi_handle handle, u64 *addr); int acpi_get_local_address(acpi_handle handle, u32 *addr); const char *acpi_get_subsystem_id(acpi_handle handle); -- cgit v1.2.3 From e289df82344fecc104ad8326b9ab6da612b9c899 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 May 2024 22:42:40 +0300 Subject: spi: Rework per message DMA mapped flag to be per transfer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The granularity of DMA mappings is transfer and moreover, the direction is also important as it can be unidirect. The current cur_msg_mapped flag doesn't fit well the DMA mapping and syncing calls and we have tons of checks around on top of it. So, instead of doing that rework the code to use per transfer per direction flag to show if it's DMA mapped or not. Signed-off-by: Andy Shevchenko Tested-by: Neil Armstrong # on SM8650-QRD Link: https://lore.kernel.org/r/20240531194723.1761567-9-andriy.shevchenko@linux.intel.com Reviewed-by: Serge Semin Tested-by: Nícolas F. R. A. Prado Signed-off-by: Mark Brown --- drivers/spi/internals.h | 2 +- drivers/spi/spi.c | 73 ++++++++++++++++++------------------------------- include/linux/spi/spi.h | 11 +++++--- 3 files changed, 35 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/spi/internals.h b/drivers/spi/internals.h index 47a87c2a6979..1f459b895891 100644 --- a/drivers/spi/internals.h +++ b/drivers/spi/internals.h @@ -45,7 +45,7 @@ static inline bool spi_xfer_is_dma_mapped(struct spi_controller *ctlr, struct spi_transfer *xfer) { return ctlr->can_dma && ctlr->can_dma(ctlr, spi, xfer) && - ctlr->cur_msg_mapped; + (xfer->tx_sg_mapped || xfer->rx_sg_mapped); } #endif /* __LINUX_SPI_INTERNALS_H */ diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index c1e8cde426e5..9721adf048b5 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1220,11 +1220,6 @@ void spi_unmap_buf(struct spi_controller *ctlr, struct device *dev, spi_unmap_buf_attrs(ctlr, dev, sgt, dir, 0); } -/* Dummy SG for unidirect transfers */ -static struct scatterlist dummy_sg = { - .page_link = SG_END, -}; - static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) { struct device *tx_dev, *rx_dev; @@ -1263,8 +1258,8 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) attrs); if (ret != 0) return ret; - } else { - xfer->tx_sg.sgl = &dummy_sg; + + xfer->tx_sg_mapped = true; } if (xfer->rx_buf != NULL) { @@ -1278,8 +1273,8 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) return ret; } - } else { - xfer->rx_sg.sgl = &dummy_sg; + + xfer->rx_sg_mapped = true; } } /* No transfer has been mapped, bail out with success */ @@ -1288,7 +1283,6 @@ static int __spi_map_msg(struct spi_controller *ctlr, struct spi_message *msg) ctlr->cur_rx_dma_dev = rx_dev; ctlr->cur_tx_dma_dev = tx_dev; - ctlr->cur_msg_mapped = true; return 0; } @@ -1299,57 +1293,46 @@ static int __spi_unmap_msg(struct spi_controller *ctlr, struct spi_message *msg) struct device *tx_dev = ctlr->cur_tx_dma_dev; struct spi_transfer *xfer; - if (!ctlr->cur_msg_mapped || !ctlr->can_dma) - return 0; - list_for_each_entry(xfer, &msg->transfers, transfer_list) { /* The sync has already been done after each transfer. */ unsigned long attrs = DMA_ATTR_SKIP_CPU_SYNC; - if (!ctlr->can_dma(ctlr, msg->spi, xfer)) - continue; + if (xfer->rx_sg_mapped) + spi_unmap_buf_attrs(ctlr, rx_dev, &xfer->rx_sg, + DMA_FROM_DEVICE, attrs); + xfer->rx_sg_mapped = false; - spi_unmap_buf_attrs(ctlr, rx_dev, &xfer->rx_sg, - DMA_FROM_DEVICE, attrs); - spi_unmap_buf_attrs(ctlr, tx_dev, &xfer->tx_sg, - DMA_TO_DEVICE, attrs); + if (xfer->tx_sg_mapped) + spi_unmap_buf_attrs(ctlr, tx_dev, &xfer->tx_sg, + DMA_TO_DEVICE, attrs); + xfer->tx_sg_mapped = false; } - ctlr->cur_msg_mapped = false; - return 0; } -static void spi_dma_sync_for_device(struct spi_controller *ctlr, struct spi_message *msg, +static void spi_dma_sync_for_device(struct spi_controller *ctlr, struct spi_transfer *xfer) { struct device *rx_dev = ctlr->cur_rx_dma_dev; struct device *tx_dev = ctlr->cur_tx_dma_dev; - if (!ctlr->cur_msg_mapped) - return; - - if (!ctlr->can_dma(ctlr, msg->spi, xfer)) - return; - - dma_sync_sgtable_for_device(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE); - dma_sync_sgtable_for_device(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE); + if (xfer->tx_sg_mapped) + dma_sync_sgtable_for_device(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE); + if (xfer->rx_sg_mapped) + dma_sync_sgtable_for_device(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE); } -static void spi_dma_sync_for_cpu(struct spi_controller *ctlr, struct spi_message *msg, +static void spi_dma_sync_for_cpu(struct spi_controller *ctlr, struct spi_transfer *xfer) { struct device *rx_dev = ctlr->cur_rx_dma_dev; struct device *tx_dev = ctlr->cur_tx_dma_dev; - if (!ctlr->cur_msg_mapped) - return; - - if (!ctlr->can_dma(ctlr, msg->spi, xfer)) - return; - - dma_sync_sgtable_for_cpu(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE); - dma_sync_sgtable_for_cpu(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE); + if (xfer->rx_sg_mapped) + dma_sync_sgtable_for_cpu(rx_dev, &xfer->rx_sg, DMA_FROM_DEVICE); + if (xfer->tx_sg_mapped) + dma_sync_sgtable_for_cpu(tx_dev, &xfer->tx_sg, DMA_TO_DEVICE); } #else /* !CONFIG_HAS_DMA */ static inline int __spi_map_msg(struct spi_controller *ctlr, @@ -1365,13 +1348,11 @@ static inline int __spi_unmap_msg(struct spi_controller *ctlr, } static void spi_dma_sync_for_device(struct spi_controller *ctrl, - struct spi_message *msg, struct spi_transfer *xfer) { } static void spi_dma_sync_for_cpu(struct spi_controller *ctrl, - struct spi_message *msg, struct spi_transfer *xfer) { } @@ -1643,13 +1624,13 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, reinit_completion(&ctlr->xfer_completion); fallback_pio: - spi_dma_sync_for_device(ctlr, msg, xfer); + spi_dma_sync_for_device(ctlr, xfer); ret = ctlr->transfer_one(ctlr, msg->spi, xfer); if (ret < 0) { - spi_dma_sync_for_cpu(ctlr, msg, xfer); + spi_dma_sync_for_cpu(ctlr, xfer); - if (ctlr->cur_msg_mapped && - (xfer->error & SPI_TRANS_FAIL_NO_START)) { + if ((xfer->tx_sg_mapped || xfer->rx_sg_mapped) && + (xfer->error & SPI_TRANS_FAIL_NO_START)) { __spi_unmap_msg(ctlr, msg); ctlr->fallback = true; xfer->error &= ~SPI_TRANS_FAIL_NO_START; @@ -1671,7 +1652,7 @@ fallback_pio: msg->status = ret; } - spi_dma_sync_for_cpu(ctlr, msg, xfer); + spi_dma_sync_for_cpu(ctlr, xfer); } else { if (xfer->len) dev_err(&msg->spi->dev, diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..b4a89db4c855 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -447,7 +447,6 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * @cur_msg_need_completion: Flag used internally to opportunistically skip * the @cur_msg_completion. This flag is used to signal the context that * is running spi_finalize_current_message() that it needs to complete() - * @cur_msg_mapped: message has been mapped for DMA * @fallback: fallback to PIO if DMA transfer return failure with * SPI_TRANS_FAIL_NO_START. * @last_cs_mode_high: was (mode & SPI_CS_HIGH) true on the last call to set_cs. @@ -708,7 +707,6 @@ struct spi_controller { bool running; bool rt; bool auto_runtime_pm; - bool cur_msg_mapped; bool fallback; bool last_cs_mode_high; s8 last_cs[SPI_CS_CNT_MAX]; @@ -981,6 +979,8 @@ struct spi_res { * transfer this transfer. Set to 0 if the SPI bus driver does * not support it. * @transfer_list: transfers are sequenced through @spi_message.transfers + * @tx_sg_mapped: If true, the @tx_sg is mapped for DMA + * @rx_sg_mapped: If true, the @rx_sg is mapped for DMA * @tx_sg: Scatterlist for transmit, currently not for client use * @rx_sg: Scatterlist for receive, currently not for client use * @ptp_sts_word_pre: The word (subject to bits_per_word semantics) offset @@ -1077,10 +1077,13 @@ struct spi_transfer { #define SPI_TRANS_FAIL_IO BIT(1) u16 error; - dma_addr_t tx_dma; - dma_addr_t rx_dma; + bool tx_sg_mapped; + bool rx_sg_mapped; + struct sg_table tx_sg; struct sg_table rx_sg; + dma_addr_t tx_dma; + dma_addr_t rx_dma; unsigned dummy_data:1; unsigned cs_off:1; -- cgit v1.2.3 From 24d07f114e4ec7608659a4ef18307f76739c72a8 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Mon, 3 Jun 2024 11:47:26 +0200 Subject: drm/panic: Add a set_pixel() callback to drm_scanout_buffer This allows drivers to draw the pixel, and handle tiling, or specific color formats. v2: * Use fg_color for blit() functions (Javier Martinez Canillas) Signed-off-by: Jocelyn Falempe Reviewed-by: Javier Martinez Canillas Link: https://patchwork.freedesktop.org/patch/msgid/20240603095343.39588-3-jfalempe@redhat.com --- drivers/gpu/drm/drm_panic.c | 120 ++++++++++++++++++++++++++++---------------- include/drm/drm_panic.h | 9 ++++ 2 files changed, 85 insertions(+), 44 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 056494ae1ede..5dc9e98108ed 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -247,40 +247,54 @@ static void drm_panic_blit32(struct iosys_map *dmap, unsigned int dpitch, iosys_map_wr(dmap, y * dpitch + x * sizeof(u32), u32, fg32); } +static void drm_panic_blit_pixel(struct drm_scanout_buffer *sb, struct drm_rect *clip, + const u8 *sbuf8, unsigned int spitch, u32 fg_color) +{ + unsigned int y, x; + + for (y = 0; y < drm_rect_height(clip); y++) + for (x = 0; x < drm_rect_width(clip); x++) + if (drm_panic_is_pixel_fg(sbuf8, spitch, x, y)) + sb->set_pixel(sb, clip->x1 + x, clip->y1 + y, fg_color); +} + /* * drm_panic_blit - convert a monochrome image to a linear framebuffer - * @dmap: destination iosys_map - * @dpitch: destination pitch in bytes + * @sb: destination scanout buffer + * @clip: destination rectangle * @sbuf8: source buffer, in monochrome format, 8 pixels per byte. * @spitch: source pitch in bytes - * @height: height of the image to copy, in pixels - * @width: width of the image to copy, in pixels * @fg_color: foreground color, in destination format - * @pixel_width: pixel width in bytes. * * This can be used to draw a font character, which is a monochrome image, to a * framebuffer in other supported format. */ -static void drm_panic_blit(struct iosys_map *dmap, unsigned int dpitch, - const u8 *sbuf8, unsigned int spitch, - unsigned int height, unsigned int width, - u32 fg_color, unsigned int pixel_width) +static void drm_panic_blit(struct drm_scanout_buffer *sb, struct drm_rect *clip, + const u8 *sbuf8, unsigned int spitch, u32 fg_color) { - switch (pixel_width) { + struct iosys_map map; + + if (sb->set_pixel) + return drm_panic_blit_pixel(sb, clip, sbuf8, spitch, fg_color); + + map = sb->map[0]; + iosys_map_incr(&map, clip->y1 * sb->pitch[0] + clip->x1 * sb->format->cpp[0]); + + switch (sb->format->cpp[0]) { case 2: - drm_panic_blit16(dmap, dpitch, sbuf8, spitch, - height, width, fg_color); + drm_panic_blit16(&map, sb->pitch[0], sbuf8, spitch, + drm_rect_height(clip), drm_rect_width(clip), fg_color); break; case 3: - drm_panic_blit24(dmap, dpitch, sbuf8, spitch, - height, width, fg_color); + drm_panic_blit24(&map, sb->pitch[0], sbuf8, spitch, + drm_rect_height(clip), drm_rect_width(clip), fg_color); break; case 4: - drm_panic_blit32(dmap, dpitch, sbuf8, spitch, - height, width, fg_color); + drm_panic_blit32(&map, sb->pitch[0], sbuf8, spitch, + drm_rect_height(clip), drm_rect_width(clip), fg_color); break; default: - WARN_ONCE(1, "Can't blit with pixel width %d\n", pixel_width); + WARN_ONCE(1, "Can't blit with pixel width %d\n", sb->format->cpp[0]); } } @@ -324,33 +338,51 @@ static void drm_panic_fill32(struct iosys_map *dmap, unsigned int dpitch, iosys_map_wr(dmap, y * dpitch + x * sizeof(u32), u32, color); } +static void drm_panic_fill_pixel(struct drm_scanout_buffer *sb, + struct drm_rect *clip, + u32 color) +{ + unsigned int y, x; + + for (y = 0; y < drm_rect_height(clip); y++) + for (x = 0; x < drm_rect_width(clip); x++) + sb->set_pixel(sb, clip->x1 + x, clip->y1 + y, color); +} + /* * drm_panic_fill - Fill a rectangle with a color - * @dmap: destination iosys_map, pointing to the top left corner of the rectangle - * @dpitch: destination pitch in bytes - * @height: height of the rectangle, in pixels - * @width: width of the rectangle, in pixels - * @color: color to fill the rectangle. - * @pixel_width: pixel width in bytes + * @sb: destination scanout buffer + * @clip: destination rectangle + * @color: foreground color, in destination format * * Fill a rectangle with a color, in a linear framebuffer. */ -static void drm_panic_fill(struct iosys_map *dmap, unsigned int dpitch, - unsigned int height, unsigned int width, - u32 color, unsigned int pixel_width) +static void drm_panic_fill(struct drm_scanout_buffer *sb, struct drm_rect *clip, + u32 color) { - switch (pixel_width) { + struct iosys_map map; + + if (sb->set_pixel) + return drm_panic_fill_pixel(sb, clip, color); + + map = sb->map[0]; + iosys_map_incr(&map, clip->y1 * sb->pitch[0] + clip->x1 * sb->format->cpp[0]); + + switch (sb->format->cpp[0]) { case 2: - drm_panic_fill16(dmap, dpitch, height, width, color); + drm_panic_fill16(&map, sb->pitch[0], drm_rect_height(clip), + drm_rect_width(clip), color); break; case 3: - drm_panic_fill24(dmap, dpitch, height, width, color); + drm_panic_fill24(&map, sb->pitch[0], drm_rect_height(clip), + drm_rect_width(clip), color); break; case 4: - drm_panic_fill32(dmap, dpitch, height, width, color); + drm_panic_fill32(&map, sb->pitch[0], drm_rect_height(clip), + drm_rect_width(clip), color); break; default: - WARN_ONCE(1, "Can't fill with pixel width %d\n", pixel_width); + WARN_ONCE(1, "Can't fill with pixel width %d\n", sb->format->cpp[0]); } } @@ -383,25 +415,24 @@ static void draw_txt_rectangle(struct drm_scanout_buffer *sb, int i, j; const u8 *src; size_t font_pitch = DIV_ROUND_UP(font->width, 8); - struct iosys_map dst; - unsigned int px_width = sb->format->cpp[0]; - int left = 0; + struct drm_rect rec; msg_lines = min(msg_lines, drm_rect_height(clip) / font->height); for (i = 0; i < msg_lines; i++) { size_t line_len = min(msg[i].len, drm_rect_width(clip) / font->width); + rec.y1 = clip->y1 + i * font->height; + rec.y2 = rec.y1 + font->height; + rec.x1 = clip->x1; + if (centered) - left = (drm_rect_width(clip) - (line_len * font->width)) / 2; + rec.x1 += (drm_rect_width(clip) - (line_len * font->width)) / 2; - dst = sb->map[0]; - iosys_map_incr(&dst, (clip->y1 + i * font->height) * sb->pitch[0] + - (clip->x1 + left) * px_width); for (j = 0; j < line_len; j++) { src = get_char_bitmap(font, msg[i].txt[j], font_pitch); - drm_panic_blit(&dst, sb->pitch[0], src, font_pitch, - font->height, font->width, color, px_width); - iosys_map_incr(&dst, font->width * px_width); + rec.x2 = rec.x1 + font->width; + drm_panic_blit(sb, &rec, src, font_pitch, color); + rec.x1 += font->width; } } } @@ -416,7 +447,7 @@ static void draw_panic_static(struct drm_scanout_buffer *sb) u32 fg_color = CONFIG_DRM_PANIC_FOREGROUND_COLOR; u32 bg_color = CONFIG_DRM_PANIC_BACKGROUND_COLOR; const struct font_desc *font = get_default_font(sb->width, sb->height, NULL, NULL); - struct drm_rect r_logo, r_msg; + struct drm_rect r_screen, r_logo, r_msg; if (!font) return; @@ -424,6 +455,8 @@ static void draw_panic_static(struct drm_scanout_buffer *sb) fg_color = convert_from_xrgb8888(fg_color, sb->format->format); bg_color = convert_from_xrgb8888(bg_color, sb->format->format); + r_screen = DRM_RECT_INIT(0, 0, sb->width, sb->height); + r_logo = DRM_RECT_INIT(0, 0, get_max_line_len(logo, logo_lines) * font->width, logo_lines * font->height); @@ -435,8 +468,7 @@ static void draw_panic_static(struct drm_scanout_buffer *sb) drm_rect_translate(&r_msg, (sb->width - r_msg.x2) / 2, (sb->height - r_msg.y2) / 2); /* Fill with the background color, and draw text on top */ - drm_panic_fill(&sb->map[0], sb->pitch[0], sb->height, sb->width, - bg_color, sb->format->cpp[0]); + drm_panic_fill(sb, &r_screen, bg_color); if ((r_msg.x1 >= drm_rect_width(&r_logo) || r_msg.y1 >= drm_rect_height(&r_logo)) && drm_rect_width(&r_logo) < sb->width && drm_rect_height(&r_logo) < sb->height) { diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index 822dbb1aa9d6..73bb3f3d9ed9 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -50,6 +50,15 @@ struct drm_scanout_buffer { * @pitch: Length in bytes between the start of two consecutive lines. */ unsigned int pitch[DRM_FORMAT_MAX_PLANES]; + + /** + * @set_pixel: Optional function, to set a pixel color on the + * framebuffer. It allows to handle special tiling format inside the + * driver. + */ + void (*set_pixel)(struct drm_scanout_buffer *sb, unsigned int x, + unsigned int y, u32 color); + }; /** -- cgit v1.2.3 From 1f020495458396766496ee067130f507a4d718e4 Mon Sep 17 00:00:00 2001 From: Sam Ravnborg Date: Fri, 31 May 2024 22:37:46 +0200 Subject: drm/bridge: Drop drm_bridge_chain_mode_fixup There are no users left of drm_bridge_chain_mode_fixup() and we do not want to have this function available, so drop it. Signed-off-by: Sam Ravnborg Reviewed-by: Maxime Ripard Reviewed-by: Laurent Pinchart Cc: Laurent Pinchart Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Robert Foss Link: https://patchwork.freedesktop.org/patch/msgid/20240531-bridge_chain_mode-v1-2-8b49e36c5dd3@ravnborg.org --- drivers/gpu/drm/drm_bridge.c | 37 ------------------------------------- include/drm/drm_bridge.h | 3 --- 2 files changed, 40 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_bridge.c b/drivers/gpu/drm/drm_bridge.c index 584d109330ab..d44f055dbe3e 100644 --- a/drivers/gpu/drm/drm_bridge.c +++ b/drivers/gpu/drm/drm_bridge.c @@ -467,43 +467,6 @@ void drm_bridge_detach(struct drm_bridge *bridge) * needed, in order to gradually transition to the new model. */ -/** - * drm_bridge_chain_mode_fixup - fixup proposed mode for all bridges in the - * encoder chain - * @bridge: bridge control structure - * @mode: desired mode to be set for the bridge - * @adjusted_mode: updated mode that works for this bridge - * - * Calls &drm_bridge_funcs.mode_fixup for all the bridges in the - * encoder chain, starting from the first bridge to the last. - * - * Note: the bridge passed should be the one closest to the encoder - * - * RETURNS: - * true on success, false on failure - */ -bool drm_bridge_chain_mode_fixup(struct drm_bridge *bridge, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct drm_encoder *encoder; - - if (!bridge) - return true; - - encoder = bridge->encoder; - list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) { - if (!bridge->funcs->mode_fixup) - continue; - - if (!bridge->funcs->mode_fixup(bridge, mode, adjusted_mode)) - return false; - } - - return true; -} -EXPORT_SYMBOL(drm_bridge_chain_mode_fixup); - /** * drm_bridge_chain_mode_valid - validate the mode against all bridges in the * encoder chain. diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 4baca0d9107b..5cf41f92d1f0 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -855,9 +855,6 @@ drm_bridge_chain_get_first_bridge(struct drm_encoder *encoder) #define drm_for_each_bridge_in_chain(encoder, bridge) \ list_for_each_entry(bridge, &(encoder)->bridge_chain, chain_node) -bool drm_bridge_chain_mode_fixup(struct drm_bridge *bridge, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode); enum drm_mode_status drm_bridge_chain_mode_valid(struct drm_bridge *bridge, const struct drm_display_info *info, -- cgit v1.2.3 From 5fbf57a937f418fe204f9dbb7735e91984f4ee6a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 6 Jun 2024 12:29:06 -0700 Subject: net: netlink: remove the cb_mutex "injection" from netlink core Back in 2007, in commit af65bdfce98d ("[NETLINK]: Switch cb_lock spinlock to mutex and allow to override it") netlink core was extended to allow subsystems to replace the dump mutex lock with its own lock. The mechanism was used by rtnetlink to take rtnl_lock but it isn't sufficiently flexible for other users. Over the 17 years since it was added no other user appeared. Since rtnetlink needs conditional locking now, and doesn't use it either, axe this feature complete. Signed-off-by: Jakub Kicinski Reviewed-by: Kuniyuki Iwashima Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - net/netlink/af_netlink.c | 18 +++--------------- 2 files changed, 3 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 5df7340d4dab..b332c2048c75 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -47,7 +47,6 @@ struct netlink_kernel_cfg { unsigned int groups; unsigned int flags; void (*input)(struct sk_buff *skb); - struct mutex *cb_mutex; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); void (*release) (struct sock *sk, unsigned long *groups); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8bbbe75e75db..0b7a89db3ab7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -636,8 +636,7 @@ static struct proto netlink_proto = { }; static int __netlink_create(struct net *net, struct socket *sock, - struct mutex *dump_cb_mutex, int protocol, - int kern) + int protocol, int kern) { struct sock *sk; struct netlink_sock *nlk; @@ -655,7 +654,6 @@ static int __netlink_create(struct net *net, struct socket *sock, lockdep_set_class_and_name(&nlk->nl_cb_mutex, nlk_cb_mutex_keys + protocol, nlk_cb_mutex_key_strings[protocol]); - nlk->dump_cb_mutex = dump_cb_mutex; init_waitqueue_head(&nlk->wait); sk->sk_destruct = netlink_sock_destruct; @@ -667,7 +665,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, int kern) { struct module *module = NULL; - struct mutex *cb_mutex; struct netlink_sock *nlk; int (*bind)(struct net *net, int group); void (*unbind)(struct net *net, int group); @@ -696,7 +693,6 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, module = nl_table[protocol].module; else err = -EPROTONOSUPPORT; - cb_mutex = nl_table[protocol].cb_mutex; bind = nl_table[protocol].bind; unbind = nl_table[protocol].unbind; release = nl_table[protocol].release; @@ -705,7 +701,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol, if (err < 0) goto out; - err = __netlink_create(net, sock, cb_mutex, protocol, kern); + err = __netlink_create(net, sock, protocol, kern); if (err < 0) goto out_module; @@ -2016,7 +2012,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, struct sock *sk; struct netlink_sock *nlk; struct listeners *listeners = NULL; - struct mutex *cb_mutex = cfg ? cfg->cb_mutex : NULL; unsigned int groups; BUG_ON(!nl_table); @@ -2027,7 +2022,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; - if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0) + if (__netlink_create(net, sock, unit, 1) < 0) goto out_sock_release_nosk; sk = sock->sk; @@ -2055,7 +2050,6 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module, if (!nl_table[unit].registered) { nl_table[unit].groups = groups; rcu_assign_pointer(nl_table[unit].listeners, listeners); - nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; if (cfg) { nl_table[unit].bind = cfg->bind; @@ -2326,15 +2320,9 @@ static int netlink_dump(struct sock *sk, bool lock_taken) netlink_skb_set_owner_r(skb, sk); if (nlk->dump_done_errno > 0) { - struct mutex *extra_mutex = nlk->dump_cb_mutex; - cb->extack = &extack; - if (extra_mutex) - mutex_lock(extra_mutex); nlk->dump_done_errno = cb->dump(skb, cb); - if (extra_mutex) - mutex_unlock(extra_mutex); /* EMSGSIZE plus something already in the skb means * that there's more to dump but current skb has filled up. -- cgit v1.2.3 From c6ae073f5903f6c6439d0ac855836a4da5c0a701 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 6 Jun 2024 23:32:48 +0300 Subject: geneve: Fix incorrect inner network header offset when innerprotoinherit is set When innerprotoinherit is set, the tunneled packets do not have an inner Ethernet header. Change 'maclen' to not always assume the header length is ETH_HLEN, as there might not be a MAC header. This resolves issues with drivers (e.g. mlx5, in mlx5e_tx_tunnel_accel()) who rely on the skb inner network header offset to be correct, and use it for TX offloads. Fixes: d8a6213d70ac ("geneve: fix header validation in geneve[6]_xmit_skb") Signed-off-by: Gal Pressman Signed-off-by: Tariq Toukan Reviewed-by: Wojciech Drewek Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 ++++++---- include/net/ip_tunnels.h | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 51495cb4b9be..838e85ddec67 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,6 +815,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; @@ -826,7 +827,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs4) @@ -908,7 +909,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; @@ -925,6 +926,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; @@ -935,7 +937,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; if (!gs6) @@ -997,7 +999,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 9a6a08ec7713..1db2417b8ff5 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -461,9 +461,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, + bool inner_proto_inherit) { - int nhlen = 0, maclen = ETH_HLEN; + int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; /* Essentially this is skb_protocol(skb, true) -- cgit v1.2.3 From 806a5198c05987b748b50f3d0c0cfb3d417381a4 Mon Sep 17 00:00:00 2001 From: Luiz Augusto von Dentz Date: Mon, 20 May 2024 16:03:07 -0400 Subject: Bluetooth: L2CAP: Fix rejecting L2CAP_CONN_PARAM_UPDATE_REQ This removes the bogus check for max > hcon->le_conn_max_interval since the later is just the initial maximum conn interval not the maximum the stack could support which is really 3200=4000ms. In order to pass GAP/CONN/CPUP/BV-05-C one shall probably enter values of the following fields in IXIT that would cause hci_check_conn_params to fail: TSPX_conn_update_int_min TSPX_conn_update_int_max TSPX_conn_update_peripheral_latency TSPX_conn_update_supervision_timeout Link: https://github.com/bluez/bluez/issues/847 Fixes: e4b019515f95 ("Bluetooth: Enforce validation on max value of connection interval") Signed-off-by: Luiz Augusto von Dentz --- include/net/bluetooth/hci_core.h | 36 ++++++++++++++++++++++++++++++++---- net/bluetooth/l2cap_core.c | 8 +------- 2 files changed, 33 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9231396fe96f..c43716edf205 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2113,18 +2113,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, { u16 max_latency; - if (min > max || min < 6 || max > 3200) + if (min > max) { + BT_WARN("min %d > max %d", min, max); return -EINVAL; + } + + if (min < 6) { + BT_WARN("min %d < 6", min); + return -EINVAL; + } + + if (max > 3200) { + BT_WARN("max %d > 3200", max); + return -EINVAL; + } + + if (to_multiplier < 10) { + BT_WARN("to_multiplier %d < 10", to_multiplier); + return -EINVAL; + } - if (to_multiplier < 10 || to_multiplier > 3200) + if (to_multiplier > 3200) { + BT_WARN("to_multiplier %d > 3200", to_multiplier); return -EINVAL; + } - if (max >= to_multiplier * 8) + if (max >= to_multiplier * 8) { + BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); return -EINVAL; + } max_latency = (to_multiplier * 4 / max) - 1; - if (latency > 499 || latency > max_latency) + if (latency > 499) { + BT_WARN("latency %d > 499", latency); return -EINVAL; + } + + if (latency > max_latency) { + BT_WARN("latency %d > max_latency %d", latency, max_latency); + return -EINVAL; + } return 0; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5b509b767557..c49e0d4b3c0d 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4647,13 +4647,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else -- cgit v1.2.3 From 8a74e4eaa72c14f997df6d820effb6aac400d470 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 10 Jun 2024 10:20:53 +0200 Subject: PCI: switchtec: Make switchtec_class constant Now that the driver core allows for struct class to be in read-only memory, we should make all 'class' structures declared at build time placing them into read-only memory, instead of having to be dynamically allocated at runtime. Link: https://lore.kernel.org/r/2024061053-online-unwound-b173@gregkh Signed-off-by: Greg Kroah-Hartman Signed-off-by: Bjorn Helgaas Reviewed-by: Dave Jiang Reviewed-By: Logan Gunthorpe Cc: Kurt Schwemmer Cc: Jon Mason Cc: Allen Hubbe --- drivers/ntb/hw/mscc/ntb_hw_switchtec.c | 2 +- drivers/pci/switch/switchtec.c | 16 ++++++++-------- include/linux/switchtec.h | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c index d6bbcc7b5b90..31946387badf 100644 --- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c +++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c @@ -1565,7 +1565,7 @@ static struct class_interface switchtec_interface = { static int __init switchtec_ntb_init(void) { - switchtec_interface.class = switchtec_class; + switchtec_interface.class = &switchtec_class; return class_interface_register(&switchtec_interface); } module_init(switchtec_ntb_init); diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 5a4adf6c04cf..c7e1089ffdaf 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -37,7 +37,9 @@ MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful fo static dev_t switchtec_devt; static DEFINE_IDA(switchtec_minor_ida); -struct class *switchtec_class; +const struct class switchtec_class = { + .name = "switchtec", +}; EXPORT_SYMBOL_GPL(switchtec_class); enum mrpc_state { @@ -1363,7 +1365,7 @@ static struct switchtec_dev *stdev_create(struct pci_dev *pdev) dev = &stdev->dev; device_initialize(dev); - dev->class = switchtec_class; + dev->class = &switchtec_class; dev->parent = &pdev->dev; dev->groups = switchtec_device_groups; dev->release = stdev_release; @@ -1851,11 +1853,9 @@ static int __init switchtec_init(void) if (rc) return rc; - switchtec_class = class_create("switchtec"); - if (IS_ERR(switchtec_class)) { - rc = PTR_ERR(switchtec_class); + rc = class_register(&switchtec_class); + if (rc) goto err_create_class; - } rc = pci_register_driver(&switchtec_pci_driver); if (rc) @@ -1866,7 +1866,7 @@ static int __init switchtec_init(void) return 0; err_pci_register: - class_destroy(switchtec_class); + class_unregister(&switchtec_class); err_create_class: unregister_chrdev_region(switchtec_devt, max_devices); @@ -1878,7 +1878,7 @@ module_init(switchtec_init); static void __exit switchtec_exit(void) { pci_unregister_driver(&switchtec_pci_driver); - class_destroy(switchtec_class); + class_unregister(&switchtec_class); unregister_chrdev_region(switchtec_devt, max_devices); ida_destroy(&switchtec_minor_ida); diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 8d8fac1626bd..cdb58d61c152 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -521,6 +521,6 @@ static inline struct switchtec_dev *to_stdev(struct device *dev) return container_of(dev, struct switchtec_dev, dev); } -extern struct class *switchtec_class; +extern const struct class switchtec_class; #endif -- cgit v1.2.3 From 1d4ce389da2b84e0e24aad5e83fe9c742adc3f99 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 7 Jun 2024 14:22:33 -0700 Subject: ice: add and use roundup_u64 instead of open coding equivalent In ice_ptp_cfg_clkout(), the ice driver needs to calculate the nearest next second of a current time value specified in nanoseconds. It implements this using div64_u64, because the time value is a u64. It could use div_u64 since NSEC_PER_SEC is smaller than 32-bits. Ideally this would be implemented directly with roundup(), but that can't work on all platforms due to a division which requires using the specific macros and functions due to platform restrictions, and to ensure that the most appropriate and fast instructions are used. The kernel doesn't currently provide any 64-bit equivalents for doing roundup. Attempting to use roundup() on a 32-bit platform will result in a link failure due to not having a direct 64-bit division. The closest equivalent for this is DIV64_U64_ROUND_UP, which does a division always rounding up. However, this only computes the division, and forces use of the div64_u64 in cases where the divisor is a 32bit value and could make use of div_u64. Introduce DIV_U64_ROUND_UP based on div_u64, and then use it to implement roundup_u64 which takes a u64 input value and a u32 rounding value. The name roundup_u64 matches the naming scheme of div_u64, and future patches could implement roundup64_u64 if they need to round by a multiple that is greater than 32-bits. Replace the logic in ice_ptp.c which does this equivalent with the newly added roundup_u64. Tested-by: Pucha Himasekhar Reddy Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20240607-next-2024-06-03-intel-next-batch-v3-2-d1470cee3347@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_ptp.c | 3 +-- include/linux/math64.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index adbb9cffe20c..b7ab6fdf710d 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1773,8 +1773,7 @@ static int ice_ptp_cfg_clkout(struct ice_pf *pf, unsigned int chan, * maintaining phase */ if (start_time < current_time) - start_time = div64_u64(current_time + NSEC_PER_SEC - 1, - NSEC_PER_SEC) * NSEC_PER_SEC + phase; + start_time = roundup_u64(current_time, NSEC_PER_SEC) + phase; if (ice_is_e810(hw)) start_time -= E810_OUT_PROP_DELAY_NS; diff --git a/include/linux/math64.h b/include/linux/math64.h index d34def7f9a8c..6aaccc1626ab 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -297,6 +297,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); #define DIV64_U64_ROUND_UP(ll, d) \ ({ u64 _tmp = (d); div64_u64((ll) + _tmp - 1, _tmp); }) +/** + * DIV_U64_ROUND_UP - unsigned 64bit divide with 32bit divisor rounded up + * @ll: unsigned 64bit dividend + * @d: unsigned 32bit divisor + * + * Divide unsigned 64bit dividend by unsigned 32bit divisor + * and round up. + * + * Return: dividend / divisor rounded up + */ +#define DIV_U64_ROUND_UP(ll, d) \ + ({ u32 _tmp = (d); div_u64((ll) + _tmp - 1, _tmp); }) + /** * DIV64_U64_ROUND_CLOSEST - unsigned 64bit divide with 64bit divisor rounded to nearest integer * @dividend: unsigned 64bit dividend @@ -342,4 +355,19 @@ u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div); div_s64((__x - (__d / 2)), __d); \ } \ ) + +/** + * roundup_u64 - Round up a 64bit value to the next specified 32bit multiple + * @x: the value to up + * @y: 32bit multiple to round up to + * + * Rounds @x to the next multiple of @y. For 32bit @x values, see roundup and + * the faster round_up() for powers of 2. + * + * Return: rounded up value. + */ +static inline u64 roundup_u64(u64 x, u32 y) +{ + return DIV_U64_ROUND_UP(x, y) * y; +} #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From 5f7fb89a115d53b4a10bf7ba2733e78df281e98d Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 10 Jun 2024 23:09:36 -0400 Subject: function_graph: Everyone uses HAVE_FUNCTION_GRAPH_RET_ADDR_PTR, remove it All architectures that implement function graph also implements HAVE_FUNCTION_GRAPH_RET_ADDR_PTR. Remove it, as it is no longer a differentiator. Link: https://lore.kernel.org/linux-trace-kernel/20240611031737.982047614@goodmis.org Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Mathieu Desnoyers Cc: Jonathan Corbet Cc: Catalin Marinas Cc: Will Deacon Cc: Guo Ren Cc: Huacai Chen Cc: WANG Xuerui Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Christophe Leroy Cc: "Naveen N. Rao" Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Cc: Christian Borntraeger Cc: Sven Schnelle Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/ftrace-design.rst | 12 ------------ arch/arm64/include/asm/ftrace.h | 11 ----------- arch/csky/include/asm/ftrace.h | 2 -- arch/loongarch/include/asm/ftrace.h | 1 - arch/powerpc/include/asm/ftrace.h | 2 -- arch/riscv/include/asm/ftrace.h | 1 - arch/s390/include/asm/ftrace.h | 1 - arch/x86/include/asm/ftrace.h | 2 -- include/linux/ftrace.h | 2 -- kernel/trace/fgraph.c | 35 +---------------------------------- 10 files changed, 1 insertion(+), 68 deletions(-) (limited to 'include') diff --git a/Documentation/trace/ftrace-design.rst b/Documentation/trace/ftrace-design.rst index 6893399157f0..dc82d64b3a44 100644 --- a/Documentation/trace/ftrace-design.rst +++ b/Documentation/trace/ftrace-design.rst @@ -217,18 +217,6 @@ along to ftrace_push_return_trace() instead of a stub value of 0. Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer. -HAVE_FUNCTION_GRAPH_RET_ADDR_PTR --------------------------------- - -An arch may pass in a pointer to the return address on the stack. This -prevents potential stack unwinding issues where the unwinder gets out of -sync with ret_stack and the wrong addresses are reported by -ftrace_graph_ret_addr(). - -Adding support for it is easy: just define the macro in asm/ftrace.h and -pass the return address pointer as the 'retp' argument to -ftrace_push_return_trace(). - HAVE_SYSCALL_TRACEPOINTS ------------------------ diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index ab158196480c..dc9cf0bd2a4c 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -12,17 +12,6 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -/* - * HAVE_FUNCTION_GRAPH_RET_ADDR_PTR means that the architecture can provide a - * "return address pointer" which can be used to uniquely identify a return - * address which has been overwritten. - * - * On arm64 we use the address of the caller's frame record, which remains the - * same for the lifetime of the instrumented function, unlike the return - * address in the LR. - */ -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS #define ARCH_SUPPORTS_FTRACE_OPS 1 #else diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h index fd215c38ef27..00f9f7647e3f 100644 --- a/arch/csky/include/asm/ftrace.h +++ b/arch/csky/include/asm/ftrace.h @@ -7,8 +7,6 @@ #define HAVE_FUNCTION_GRAPH_FP_TEST -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #define ARCH_SUPPORTS_FTRACE_OPS 1 #define MCOUNT_ADDR ((unsigned long)_mcount) diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h index de891c2c83d4..c0a682808e07 100644 --- a/arch/loongarch/include/asm/ftrace.h +++ b/arch/loongarch/include/asm/ftrace.h @@ -28,7 +28,6 @@ struct dyn_ftrace; struct dyn_arch_ftrace { }; #define ARCH_SUPPORTS_FTRACE_OPS 1 -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ftrace_init_nop ftrace_init_nop int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 107fc5a48456..559560286e6d 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -8,8 +8,6 @@ #define MCOUNT_ADDR ((unsigned long)(_mcount)) #define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */ -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - /* Ignore unused weak functions which will have larger offsets */ #if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) #define FTRACE_MCOUNT_MAX_OFFSET 16 diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 9eb31a7ea0aa..2cddd79ff21b 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -11,7 +11,6 @@ #if defined(CONFIG_FUNCTION_GRAPH_TRACER) && defined(CONFIG_FRAME_POINTER) #define HAVE_FUNCTION_GRAPH_FP_TEST #endif -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 77e479d44f1e..fbadca645af7 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -2,7 +2,6 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR #define ARCH_SUPPORTS_FTRACE_OPS 1 #define MCOUNT_INSN_SIZE 6 diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 897cf02c20b1..0152a81d9b4a 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -20,8 +20,6 @@ #define ARCH_SUPPORTS_FTRACE_OPS 1 #endif -#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR - #ifndef __ASSEMBLY__ extern void __fentry__(void); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4135dc171447..845c2ab0bc1c 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1071,9 +1071,7 @@ struct ftrace_ret_stack { #ifdef HAVE_FUNCTION_GRAPH_FP_TEST unsigned long fp; #endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long *retp; -#endif }; /* diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index 91f1eef256af..8317d1a7f43a 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -593,9 +593,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, #ifdef HAVE_FUNCTION_GRAPH_FP_TEST ret_stack->fp = frame_pointer; #endif -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR ret_stack->retp = retp; -#endif return offset; } @@ -887,10 +885,8 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int idx) * will be assigned that location so that if called again, it will continue * where it left off. * - * @retp is a pointer to the return address on the stack. It's ignored if - * the arch doesn't have HAVE_FUNCTION_GRAPH_RET_ADDR_PTR defined. + * @retp is a pointer to the return address on the stack. */ -#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret, unsigned long *retp) { @@ -926,35 +922,6 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, return ret; } -#else /* !HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ -unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, - unsigned long ret, unsigned long *retp) -{ - struct ftrace_ret_stack *ret_stack; - unsigned long return_handler = (unsigned long)dereference_kernel_function_descriptor(return_to_handler); - int offset = task->curr_ret_stack; - int i; - - if (ret != return_handler) - return ret; - - if (!idx) - return ret; - - i = *idx; - do { - ret_stack = get_ret_stack(task, offset, &offset); - if (ret_stack && ret_stack->ret == return_handler) - continue; - i--; - } while (i >= 0 && ret_stack); - - if (ret_stack) - return ret_stack->ret; - - return ret; -} -#endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */ static struct ftrace_ops graph_ops = { .func = ftrace_graph_func, -- cgit v1.2.3 From e8343410ddf08fc36a9b9cc7c51a4e53a262d4c6 Mon Sep 17 00:00:00 2001 From: Jai Luthra Date: Tue, 11 Jun 2024 18:02:55 +0530 Subject: ALSA: dmaengine: Synchronize dma channel after drop() Sometimes the stream may be stopped due to XRUN events, in which case the userspace can call snd_pcm_drop() and snd_pcm_prepare() to stop and start the stream again. In these cases, we must wait for the DMA channel to synchronize before marking the stream as prepared for playback, as the DMA channel gets stopped by drop() without any synchronization. Make sure the ALSA core synchronizes the DMA channel by adding a sync_stop() hook. Reviewed-by: Peter Ujfalusi Signed-off-by: Jai Luthra Link: https://lore.kernel.org/r/20240611-asoc_next-v3-1-fcfd84b12164@ti.com Signed-off-by: Mark Brown --- include/sound/dmaengine_pcm.h | 1 + sound/core/pcm_dmaengine.c | 10 ++++++++++ sound/soc/soc-generic-dmaengine-pcm.c | 8 ++++++++ 3 files changed, 19 insertions(+) (limited to 'include') diff --git a/include/sound/dmaengine_pcm.h b/include/sound/dmaengine_pcm.h index c11aaf8079fb..f6baa9a01868 100644 --- a/include/sound/dmaengine_pcm.h +++ b/include/sound/dmaengine_pcm.h @@ -36,6 +36,7 @@ snd_pcm_uframes_t snd_dmaengine_pcm_pointer_no_residue(struct snd_pcm_substream int snd_dmaengine_pcm_open(struct snd_pcm_substream *substream, struct dma_chan *chan); int snd_dmaengine_pcm_close(struct snd_pcm_substream *substream); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream); int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, dma_filter_fn filter_fn, void *filter_data); diff --git a/sound/core/pcm_dmaengine.c b/sound/core/pcm_dmaengine.c index 12aa1cef11a1..ed07fa5693d2 100644 --- a/sound/core/pcm_dmaengine.c +++ b/sound/core/pcm_dmaengine.c @@ -349,6 +349,16 @@ int snd_dmaengine_pcm_open_request_chan(struct snd_pcm_substream *substream, } EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_open_request_chan); +int snd_dmaengine_pcm_sync_stop(struct snd_pcm_substream *substream) +{ + struct dmaengine_pcm_runtime_data *prtd = substream_to_prtd(substream); + + dmaengine_synchronize(prtd->dma_chan); + + return 0; +} +EXPORT_SYMBOL_GPL(snd_dmaengine_pcm_sync_stop); + /** * snd_dmaengine_pcm_close - Close a dmaengine based PCM substream * @substream: PCM substream diff --git a/sound/soc/soc-generic-dmaengine-pcm.c b/sound/soc/soc-generic-dmaengine-pcm.c index ea3bc9318412..a63e942fdc0b 100644 --- a/sound/soc/soc-generic-dmaengine-pcm.c +++ b/sound/soc/soc-generic-dmaengine-pcm.c @@ -318,6 +318,12 @@ static int dmaengine_copy(struct snd_soc_component *component, return 0; } +static int dmaengine_pcm_sync_stop(struct snd_soc_component *component, + struct snd_pcm_substream *substream) +{ + return snd_dmaengine_pcm_sync_stop(substream); +} + static const struct snd_soc_component_driver dmaengine_pcm_component = { .name = SND_DMAENGINE_PCM_DRV_NAME, .probe_order = SND_SOC_COMP_ORDER_LATE, @@ -327,6 +333,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component = { .trigger = dmaengine_pcm_trigger, .pointer = dmaengine_pcm_pointer, .pcm_construct = dmaengine_pcm_new, + .sync_stop = dmaengine_pcm_sync_stop, }; static const struct snd_soc_component_driver dmaengine_pcm_component_process = { @@ -339,6 +346,7 @@ static const struct snd_soc_component_driver dmaengine_pcm_component_process = { .pointer = dmaengine_pcm_pointer, .copy = dmaengine_copy, .pcm_construct = dmaengine_pcm_new, + .sync_stop = dmaengine_pcm_sync_stop, }; static const char * const dmaengine_pcm_dma_channel_names[] = { -- cgit v1.2.3 From 85542adb65ecd7cc0e442e8befef74f2ed07f5f6 Mon Sep 17 00:00:00 2001 From: Thomas Prescher Date: Wed, 8 May 2024 15:25:01 +0200 Subject: KVM: x86: Add KVM_RUN_X86_GUEST_MODE kvm_run flag When a vCPU is interrupted by a signal while running a nested guest, KVM will exit to userspace with L2 state. However, userspace has no way to know whether it sees L1 or L2 state (besides calling KVM_GET_STATS_FD, which does not have a stable ABI). This causes multiple problems: The simplest one is L2 state corruption when userspace marks the sregs as dirty. See this mailing list thread [1] for a complete discussion. Another problem is that if userspace decides to continue by emulating instructions, it will unknowingly emulate with L2 state as if L1 doesn't exist, which can be considered a weird guest escape. Introduce a new flag, KVM_RUN_X86_GUEST_MODE, in the kvm_run data structure, which is set when the vCPU exited while running a nested guest. Also introduce a new capability, KVM_CAP_X86_GUEST_MODE, to advertise the functionality to userspace. [1] https://lore.kernel.org/kvm/20240416123558.212040-1-julian.stecklina@cyberus-technology.de/T/#m280aadcb2e10ae02c191a7dc4ed4b711a74b1f55 Signed-off-by: Thomas Prescher Signed-off-by: Julian Stecklina Link: https://lore.kernel.org/r/20240508132502.184428-1-julian.stecklina@cyberus-technology.de Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 17 +++++++++++++++++ arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/x86.c | 3 +++ include/uapi/linux/kvm.h | 1 + 4 files changed, 22 insertions(+) (limited to 'include') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 67edb84317ef..42d1d9518bf2 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6419,6 +6419,9 @@ affect the device's behavior. Current defined flags:: #define KVM_RUN_X86_SMM (1 << 0) /* x86, set if bus lock detected in VM */ #define KVM_RUN_X86_BUS_LOCK (1 << 1) + /* x86, set if the VCPU is executing a nested (L2) guest */ + #define KVM_RUN_X86_GUEST_MODE (1 << 2) + /* arm64, set for KVM_EXIT_DEBUG */ #define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0) @@ -8089,6 +8092,20 @@ by KVM_CHECK_EXTENSION. Note: Userspace is responsible for correctly configuring CPUID 0x15, a.k.a. the core crystal clock frequency, if a non-zero CPUID 0x15 is exposed to the guest. +7.36 KVM_CAP_X86_GUEST_MODE +------------------------------ + +:Architectures: x86 +:Returns: Informational only, -EINVAL on direct KVM_ENABLE_CAP. + +The presence of this capability indicates that KVM_RUN will update the +KVM_RUN_X86_GUEST_MODE bit in kvm_run.flags to indicate whether the +vCPU was executing nested guest code when it exited. + +KVM exits with the register state of either the L1 or L2 guest +depending on which executed at the time of an exit. Userspace must +take care to differentiate between these cases. + 8. Other capabilities. ====================== diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529..b85671d9c8aa 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bfe3dba56e24..33e41103fcde 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4704,6 +4704,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VM_DISABLE_NX_HUGE_PAGES: case KVM_CAP_IRQFD_RESAMPLE: case KVM_CAP_MEMORY_FAULT_INFO: + case KVM_CAP_X86_GUEST_MODE: r = 1; break; case KVM_CAP_X86_APIC_BUS_CYCLES_NS: @@ -10277,6 +10278,8 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu) if (is_smm(vcpu)) kvm_run->flags |= KVM_RUN_X86_SMM; + if (is_guest_mode(vcpu)) + kvm_run->flags |= KVM_RUN_X86_GUEST_MODE; } static void update_cr8_intercept(struct kvm_vcpu *vcpu) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ec998e6b6555..a6ac00ec77ad 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -918,6 +918,7 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 236 +#define KVM_CAP_X86_GUEST_MODE 237 struct kvm_irq_routing_irqchip { __u32 irqchip; -- cgit v1.2.3 From f328a26eeb5380bc74e58cb9c3280a4908452df7 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 3 Jun 2024 17:55:55 -0400 Subject: dlm: introduce DLM_LSFL_SOFTIRQ_SAFE Introduce a new external lockspace flag DLM_LSFL_SOFTIRQ_SAFE. A lockspace user will set this flag if it can handle dlm running the callback functions from softirq context. When not set, dlm will continue to run callback functions from the dlm_callback workqueue. The new lockspace flag cannot be used for user space lockspaces, so a uapi placeholder definition is used for the new flag value. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 3 +++ include/linux/dlm.h | 17 ++++++++++++++++- include/uapi/linux/dlm.h | 2 ++ 3 files changed, 21 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 8a4351ee9a42..a7ac0fcb4ef3 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -629,6 +629,9 @@ int dlm_new_user_lockspace(const char *name, const char *cluster, void *ops_arg, int *ops_result, dlm_lockspace_t **lockspace) { + if (flags & DLM_LSFL_SOFTIRQ) + return -EINVAL; + return __dlm_new_lockspace(name, cluster, flags, lvblen, ops, ops_arg, ops_result, lockspace); } diff --git a/include/linux/dlm.h b/include/linux/dlm.h index c58c4f790c04..bacda9898f2b 100644 --- a/include/linux/dlm.h +++ b/include/linux/dlm.h @@ -35,6 +35,9 @@ struct dlm_lockspace_ops { int num_slots, int our_slot, uint32_t generation); }; +/* only relevant for kernel lockspaces, will be removed in future */ +#define DLM_LSFL_SOFTIRQ __DLM_LSFL_RESERVED0 + /* * dlm_new_lockspace * @@ -55,6 +58,11 @@ struct dlm_lockspace_ops { * used to select the directory node. Must be the same on all nodes. * DLM_LSFL_NEWEXCL * dlm_new_lockspace() should return -EEXIST if the lockspace exists. + * DLM_LSFL_SOFTIRQ + * dlm request callbacks (ast, bast) are softirq safe. Flag should be + * preferred by users. Will be default in some future. If set the + * strongest context for ast, bast callback is softirq as it avoids + * an additional context switch. * * lvblen: length of lvb in bytes. Must be multiple of 8. * dlm_new_lockspace() returns an error if this does not match @@ -121,7 +129,14 @@ int dlm_release_lockspace(dlm_lockspace_t *lockspace, int force); * call. * * AST routines should not block (at least not for long), but may make - * any locking calls they please. + * any locking calls they please. If DLM_LSFL_SOFTIRQ for kernel + * users of dlm_new_lockspace() is passed the ast and bast callbacks + * can be processed in softirq context. Also some of the callback + * contexts are in the same context as the DLM lock request API, users + * must not hold locks while calling dlm lock request API and trying + * to acquire this lock in the callback again, this will end in a + * lock recursion. For newer implementation the DLM_LSFL_SOFTIRQ + * should be used. */ int dlm_lock(dlm_lockspace_t *lockspace, diff --git a/include/uapi/linux/dlm.h b/include/uapi/linux/dlm.h index e7e905fb0bb2..4eaf835780b0 100644 --- a/include/uapi/linux/dlm.h +++ b/include/uapi/linux/dlm.h @@ -71,6 +71,8 @@ struct dlm_lksb { /* DLM_LSFL_TIMEWARN is deprecated and reserved. DO NOT USE! */ #define DLM_LSFL_TIMEWARN 0x00000002 #define DLM_LSFL_NEWEXCL 0x00000008 +/* currently reserved due in-kernel use */ +#define __DLM_LSFL_RESERVED0 0x00000010 #endif /* _UAPI__DLM_DOT_H__ */ -- cgit v1.2.3 From 10b8e0fd3f7234a38db2c8d2c8dec0bd6eeede44 Mon Sep 17 00:00:00 2001 From: Amelie Delaunay Date: Fri, 31 May 2024 17:07:10 +0200 Subject: dmaengine: add channel device name to channel registration Channel device name is used for sysfs, but also by dmatest filter function. With dynamic channel registration, channels can be registered after dma controller registration. Users may want to have specific channel names. If name is NULL, the channel name relies on previous implementation, dmachan. Signed-off-by: Amelie Delaunay Link: https://lore.kernel.org/r/20240531150712.2503554-11-amelie.delaunay@foss.st.com Signed-off-by: Vinod Koul --- drivers/dma/dmaengine.c | 16 ++++++++++------ drivers/dma/idxd/dma.c | 2 +- include/linux/dmaengine.h | 3 ++- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 491b22240221..c380a4dda77a 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -1037,7 +1037,8 @@ static int get_dma_id(struct dma_device *device) } static int __dma_async_device_channel_register(struct dma_device *device, - struct dma_chan *chan) + struct dma_chan *chan, + const char *name) { int rc; @@ -1066,8 +1067,10 @@ static int __dma_async_device_channel_register(struct dma_device *device, chan->dev->device.parent = device->dev; chan->dev->chan = chan; chan->dev->dev_id = device->dev_id; - dev_set_name(&chan->dev->device, "dma%dchan%d", - device->dev_id, chan->chan_id); + if (!name) + dev_set_name(&chan->dev->device, "dma%dchan%d", device->dev_id, chan->chan_id); + else + dev_set_name(&chan->dev->device, name); rc = device_register(&chan->dev->device); if (rc) goto err_out_ida; @@ -1087,11 +1090,12 @@ static int __dma_async_device_channel_register(struct dma_device *device, } int dma_async_device_channel_register(struct dma_device *device, - struct dma_chan *chan) + struct dma_chan *chan, + const char *name) { int rc; - rc = __dma_async_device_channel_register(device, chan); + rc = __dma_async_device_channel_register(device, chan, name); if (rc < 0) return rc; @@ -1203,7 +1207,7 @@ int dma_async_device_register(struct dma_device *device) /* represent channels in sysfs. Probably want devs too */ list_for_each_entry(chan, &device->channels, device_node) { - rc = __dma_async_device_channel_register(device, chan); + rc = __dma_async_device_channel_register(device, chan, NULL); if (rc < 0) goto err_out; } diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index cd835eabd31b..dbecd699237e 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -269,7 +269,7 @@ static int idxd_register_dma_channel(struct idxd_wq *wq) desc->txd.tx_submit = idxd_dma_tx_submit; } - rc = dma_async_device_channel_register(dma, chan); + rc = dma_async_device_channel_register(dma, chan, NULL); if (rc < 0) { kfree(idxd_chan); return rc; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 752dbde4cec1..73537fddbb52 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -1575,7 +1575,8 @@ int dma_async_device_register(struct dma_device *device); int dmaenginem_async_device_register(struct dma_device *device); void dma_async_device_unregister(struct dma_device *device); int dma_async_device_channel_register(struct dma_device *device, - struct dma_chan *chan); + struct dma_chan *chan, + const char *name); void dma_async_device_channel_unregister(struct dma_device *device, struct dma_chan *chan); void dma_run_dependencies(struct dma_async_tx_descriptor *tx); -- cgit v1.2.3 From 3ff1180a39fbc43ae69d4238e6922c57e3278910 Mon Sep 17 00:00:00 2001 From: Andrew Davis Date: Mon, 10 Jun 2024 08:53:13 -0500 Subject: gpiolib: Remove data-less gpiochip_add() function GPIO chips should be added with driver-private data associated with the chip. If none is needed, NULL can be used. All users already do this except one, fix that here. With no more users of the base gpiochip_add() we can drop this function so no more users show up later. Signed-off-by: Andrew Davis Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20240610135313.142571-1-afd@ti.com Signed-off-by: Bartosz Golaszewski --- Documentation/driver-api/gpio/driver.rst | 5 ++--- drivers/staging/greybus/gpio.c | 2 +- include/linux/gpio/driver.h | 4 ---- 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/Documentation/driver-api/gpio/driver.rst b/Documentation/driver-api/gpio/driver.rst index e541bd2e898b..ae433261e11a 100644 --- a/Documentation/driver-api/gpio/driver.rst +++ b/Documentation/driver-api/gpio/driver.rst @@ -69,9 +69,8 @@ driver code: The code implementing a gpio_chip should support multiple instances of the controller, preferably using the driver model. That code will configure each -gpio_chip and issue gpiochip_add(), gpiochip_add_data(), or -devm_gpiochip_add_data(). Removing a GPIO controller should be rare; use -gpiochip_remove() when it is unavoidable. +gpio_chip and issue gpiochip_add_data() or devm_gpiochip_add_data(). Removing +a GPIO controller should be rare; use gpiochip_remove() when it is unavoidable. Often a gpio_chip is part of an instance-specific structure with states not exposed by the GPIO interfaces, such as addressing, power management, and more. diff --git a/drivers/staging/greybus/gpio.c b/drivers/staging/greybus/gpio.c index 2a115a8fc263..5217aacfcf54 100644 --- a/drivers/staging/greybus/gpio.c +++ b/drivers/staging/greybus/gpio.c @@ -579,7 +579,7 @@ static int gb_gpio_probe(struct gbphy_device *gbphy_dev, if (ret) goto exit_line_free; - ret = gpiochip_add(gpio); + ret = gpiochip_add_data(gpio, NULL); if (ret) { dev_err(&gbphy_dev->dev, "failed to add gpio chip: %d\n", ret); goto exit_line_free; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0032bb6e7d8f..6d31388dde0a 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -632,10 +632,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, devm_gpiochip_add_data_with_key(dev, gc, data, NULL, NULL) #endif /* CONFIG_LOCKDEP */ -static inline int gpiochip_add(struct gpio_chip *gc) -{ - return gpiochip_add_data(gc, NULL); -} void gpiochip_remove(struct gpio_chip *gc); int devm_gpiochip_add_data_with_key(struct device *dev, struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, -- cgit v1.2.3 From fbe4a7e881d4408bfabbb4fd538f10fd686cd8ab Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Mon, 6 May 2024 18:17:49 +0800 Subject: KVM: Setup empty IRQ routing when creating a VM Setup empty IRQ routing during VM creation so that x86 and s390 don't need to set empty/dummy IRQ routing during KVM_CREATE_IRQCHIP (in future patches). Initializing IRQ routing before there are any potential readers allows KVM to avoid the synchronize_srcu() in kvm_set_irq_routing(), which can introduces 20+ milliseconds of latency in the VM creation path. Ensuring that all VMs have non-NULL IRQ routing also hardens KVM against misbehaving userspace VMMs, e.g. RISC-V dynamically instantiates its interrupt controller, but doesn't override kvm_arch_intc_initialized() or kvm_arch_irqfd_allowed(), and so can likely reach kvm_irq_map_gsi() without fully initialized IRQ routing. Signed-off-by: Yi Wang Acked-by: Christian Borntraeger Link: https://lore.kernel.org/r/20240506101751.3145407-2-foxywang@tencent.com [sean: init refcount after IRQ routing, fix stub, massage changelog] Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 6 ++++++ virt/kvm/irqchip.c | 24 ++++++++++++++++++++++++ virt/kvm/kvm_main.c | 7 +++++++ 3 files changed, 37 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c80fe03a1fa4..eaa70e9b1218 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2094,6 +2094,7 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_init_irq_routing(struct kvm *kvm); int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue); @@ -2103,6 +2104,11 @@ void kvm_free_irq_routing(struct kvm *kvm); static inline void kvm_free_irq_routing(struct kvm *kvm) {} +static inline int kvm_init_irq_routing(struct kvm *kvm) +{ + return 0; +} + #endif int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 1e567d1f6d3d..162d8ed889f2 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -237,3 +237,27 @@ out: return r; } + +/* + * Allocate empty IRQ routing by default so that additional setup isn't needed + * when userspace-driven IRQ routing is activated, and so that kvm->irq_routing + * is guaranteed to be non-NULL. + */ +int kvm_init_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_table *new; + int chip_size; + + new = kzalloc(struct_size(new, map, 1), GFP_KERNEL_ACCOUNT); + if (!new) + return -ENOMEM; + + new->nr_rt_entries = 1; + + chip_size = sizeof(int) * KVM_NR_IRQCHIPS * KVM_IRQCHIP_NUM_PINS; + memset(new->chip, -1, chip_size); + + RCU_INIT_POINTER(kvm->irq_routing, new); + + return 0; +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f4f5a435b225..7bf1ef0fc10b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1186,7 +1186,12 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname) if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; + r = kvm_init_irq_routing(kvm); + if (r) + goto out_err_no_irq_routing; + refcount_set(&kvm->users_count, 1); + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { for (j = 0; j < 2; j++) { slots = &kvm->__memslots[i][j]; @@ -1265,6 +1270,8 @@ out_err_no_arch_destroy_vm: WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count)); for (i = 0; i < KVM_NR_BUSES; i++) kfree(kvm_get_bus(kvm, i)); + kvm_free_irq_routing(kvm); +out_err_no_irq_routing: cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); -- cgit v1.2.3 From d1ae567fb8b559401a9f65290bbb0cef5e987bfe Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 May 2024 18:40:08 -0700 Subject: KVM: Add a flag to track if a loaded vCPU is scheduled out Add a kvm_vcpu.scheduled_out flag to track if a vCPU is in the process of being scheduled out (vCPU put path), or if the vCPU is being reloaded after being scheduled out (vCPU load path). In the short term, this will allow dropping kvm_arch_sched_in(), as arch code can query scheduled_out during kvm_arch_vcpu_load(). Longer term, scheduled_out opens up other potential optimizations, without creating subtle/brittle dependencies. E.g. it allows KVM to keep guest state (that is managed via kvm_arch_vcpu_{load,put}()) loaded across kvm_sched_{out,in}(), if KVM knows the state isn't accessed by the host kernel. Forcing arch code to coordinate between kvm_arch_sched_{in,out}() and kvm_arch_vcpu_{load,put}() is awkward, not reusable, and relies on the exact ordering of calls into arch code. Adding scheduled_out also obviates the need for a kvm_arch_sched_out() hook, e.g. if arch code needs to do something novel when putting vCPU state. And even if KVM never uses scheduled_out for anything beyond dropping kvm_arch_sched_in(), just being able to remove all of the arch stubs makes it worth adding the flag. Link: https://lore.kernel.org/all/20240430224431.490139-1-seanjc@google.com Cc: Oliver Upton Reviewed-by: Oliver Upton Acked-by: Kai Huang Link: https://lore.kernel.org/r/20240522014013.1672962-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eaa70e9b1218..5e04c6cae34a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -380,6 +380,7 @@ struct kvm_vcpu { #endif bool preempted; bool ready; + bool scheduled_out; struct kvm_vcpu_arch arch; struct kvm_vcpu_stat stat; char stats_id[KVM_STATS_NAME_SIZE]; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7bf1ef0fc10b..007d8380062b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -6294,6 +6294,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) __this_cpu_write(kvm_running_vcpu, vcpu); kvm_arch_sched_in(vcpu, cpu); kvm_arch_vcpu_load(vcpu, cpu); + + WRITE_ONCE(vcpu->scheduled_out, false); } static void kvm_sched_out(struct preempt_notifier *pn, @@ -6301,6 +6303,8 @@ static void kvm_sched_out(struct preempt_notifier *pn, { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + WRITE_ONCE(vcpu->scheduled_out, true); + if (current->on_rq) { WRITE_ONCE(vcpu->preempted, true); WRITE_ONCE(vcpu->ready, true); -- cgit v1.2.3 From 2a27c431400797e0044872283d1971aa372fcd3a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 May 2024 18:40:11 -0700 Subject: KVM: Delete the now unused kvm_arch_sched_in() Delete kvm_arch_sched_in() now that all implementations are nops. Reviewed-by: Bibo Mao Acked-by: Kai Huang Link: https://lore.kernel.org/r/20240522014013.1672962-5-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/arm64/include/asm/kvm_host.h | 1 - arch/loongarch/include/asm/kvm_host.h | 1 - arch/mips/include/asm/kvm_host.h | 1 - arch/powerpc/include/asm/kvm_host.h | 1 - arch/riscv/include/asm/kvm_host.h | 1 - arch/s390/include/asm/kvm_host.h | 1 - arch/x86/kvm/pmu.c | 6 +++--- arch/x86/kvm/x86.c | 5 ----- include/linux/kvm_host.h | 2 -- virt/kvm/kvm_main.c | 1 - 10 files changed, 3 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8170c04fde91..615e7a2e5590 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1225,7 +1225,6 @@ static inline bool kvm_system_needs_idmapped_vectors(void) } static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} void kvm_arm_init_debug(void); void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index c87b6ea0ec47..4162a252cdf6 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -261,7 +261,6 @@ static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *arch) static inline void kvm_arch_hardware_unsetup(void) {} static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 179f320cc231..6743a57c1ab4 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -890,7 +890,6 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8abac532146e..c4fb6a27fb92 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -897,7 +897,6 @@ struct kvm_vcpu_arch { static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index d96281278586..dd77c2db6819 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -286,7 +286,6 @@ struct kvm_vcpu_arch { }; static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} #define KVM_RISCV_GSTAGE_TLB_MIN_ORDER 12 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 95990461888f..e9fcaf4607a6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1045,7 +1045,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); static inline void kvm_arch_sync_events(struct kvm *kvm) {} -static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {} diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a593b03c9aed..f9149c9fc275 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -521,9 +521,9 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) } /* - * Unused perf_events are only released if the corresponding MSRs - * weren't accessed during the last vCPU time slice. kvm_arch_sched_in - * triggers KVM_REQ_PMU if cleanup is needed. + * Release unused perf_events if the corresponding guest MSRs weren't + * accessed during the last vCPU time slice (need_cleanup is set when + * the vCPU is scheduled back in). */ if (unlikely(pmu->need_cleanup)) kvm_pmu_cleanup(vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75d413ffcd5f..6830cd389fc2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12575,11 +12575,6 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; } -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) -{ - -} - void kvm_arch_free_vm(struct kvm *kvm) { #if IS_ENABLED(CONFIG_HYPERV) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5e04c6cae34a..7b9d2633a931 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1495,8 +1495,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg); int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu); -void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu); - void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu); void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 007d8380062b..b60186b9c1d3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -6292,7 +6292,6 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu) WRITE_ONCE(vcpu->ready, false); __this_cpu_write(kvm_running_vcpu, vcpu); - kvm_arch_sched_in(vcpu, cpu); kvm_arch_vcpu_load(vcpu, cpu); WRITE_ONCE(vcpu->scheduled_out, false); -- cgit v1.2.3 From 190fec72df4a5d4d98b1e783c333f471e5e5f344 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 12 Jun 2024 08:44:27 +0900 Subject: uprobe: Wire up uretprobe system call Wiring up uretprobe system call, which comes in following changes. We need to do the wiring before, because the uretprobe implementation needs the syscall number. Note at the moment uretprobe syscall is supported only for native 64-bit process. Link: https://lore.kernel.org/all/20240611112158.40795-3-jolsa@kernel.org/ Reviewed-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- arch/x86/entry/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 2 ++ include/uapi/asm-generic/unistd.h | 5 ++++- kernel/sys_ni.c | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..6452c2ec469a 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -384,6 +384,7 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal +463 64 uretprobe sys_uretprobe # # Due to a historical design error, certain syscalls are numbered differently diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 9104952d323d..494f5e0f61f7 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -973,6 +973,8 @@ asmlinkage long sys_lsm_list_modules(u64 *ids, u32 *size, u32 flags); /* x86 */ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int on); +asmlinkage long sys_uretprobe(void); + /* pciconfig: alpha, arm, arm64, ia64, sparc */ asmlinkage long sys_pciconfig_read(unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index d983c48a3b6a..2378f88d5ad4 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -845,8 +845,11 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) +#define __NR_uretprobe 463 +__SYSCALL(__NR_uretprobe, sys_uretprobe) + #undef __NR_syscalls -#define __NR_syscalls 463 +#define __NR_syscalls 464 /* * 32 bit systems traditionally used different diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index d7eee421d4bc..5ce9fa0dc195 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -392,3 +392,5 @@ COND_SYSCALL(setuid16); /* restartable sequence */ COND_SYSCALL(rseq); + +COND_SYSCALL(uretprobe); -- cgit v1.2.3 From ff474a78cef5cb5f32be52fe25b78441327a2e7c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 12 Jun 2024 08:44:28 +0900 Subject: uprobe: Add uretprobe syscall to speed up return probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding uretprobe syscall instead of trap to speed up return probe. At the moment the uretprobe setup/path is: - install entry uprobe - when the uprobe is hit, it overwrites probed function's return address on stack with address of the trampoline that contains breakpoint instruction - the breakpoint trap code handles the uretprobe consumers execution and jumps back to original return address This patch replaces the above trampoline's breakpoint instruction with new ureprobe syscall call. This syscall does exactly the same job as the trap with some more extra work: - syscall trampoline must save original value for rax/r11/rcx registers on stack - rax is set to syscall number and r11/rcx are changed and used by syscall instruction - the syscall code reads the original values of those registers and restore those values in task's pt_regs area - only caller from trampoline exposed in '[uprobes]' is allowed, the process will receive SIGILL signal otherwise Even with some extra work, using the uretprobes syscall shows speed improvement (compared to using standard breakpoint): On Intel (11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz) current: uretprobe-nop : 1.498 ± 0.000M/s uretprobe-push : 1.448 ± 0.001M/s uretprobe-ret : 0.816 ± 0.001M/s with the fix: uretprobe-nop : 1.969 ± 0.002M/s < 31% speed up uretprobe-push : 1.910 ± 0.000M/s < 31% speed up uretprobe-ret : 0.934 ± 0.000M/s < 14% speed up On Amd (AMD Ryzen 7 5700U) current: uretprobe-nop : 0.778 ± 0.001M/s uretprobe-push : 0.744 ± 0.001M/s uretprobe-ret : 0.540 ± 0.001M/s with the fix: uretprobe-nop : 0.860 ± 0.001M/s < 10% speed up uretprobe-push : 0.818 ± 0.001M/s < 10% speed up uretprobe-ret : 0.578 ± 0.000M/s < 7% speed up The performance test spawns a thread that runs loop which triggers uprobe with attached bpf program that increments the counter that gets printed in results above. The uprobe (and uretprobe) kind is determined by which instruction is being patched with breakpoint instruction. That's also important for uretprobes, because uprobe is installed for each uretprobe. The performance test is part of bpf selftests: tools/testing/selftests/bpf/run_bench_uprobes.sh Note at the moment uretprobe syscall is supported only for native 64-bit process, compat process still uses standard breakpoint. Note that when shadow stack is enabled the uretprobe syscall returns via iret, which is slower than return via sysret, but won't cause the shadow stack violation. Link: https://lore.kernel.org/all/20240611112158.40795-4-jolsa@kernel.org/ Suggested-by: Andrii Nakryiko Reviewed-by: Oleg Nesterov Reviewed-by: Masami Hiramatsu (Google) Acked-by: Andrii Nakryiko Signed-off-by: Oleg Nesterov Signed-off-by: Jiri Olsa Signed-off-by: Masami Hiramatsu (Google) --- arch/x86/include/asm/shstk.h | 2 + arch/x86/kernel/shstk.c | 5 ++ arch/x86/kernel/uprobes.c | 117 +++++++++++++++++++++++++++++++++++++++++++ include/linux/uprobes.h | 3 ++ kernel/events/uprobes.c | 24 ++++++--- 5 files changed, 144 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h index 896909f306e3..4cb77e004615 100644 --- a/arch/x86/include/asm/shstk.h +++ b/arch/x86/include/asm/shstk.h @@ -22,6 +22,7 @@ void shstk_free(struct task_struct *p); int setup_signal_shadow_stack(struct ksignal *ksig); int restore_signal_shadow_stack(void); int shstk_update_last_frame(unsigned long val); +bool shstk_is_enabled(void); #else static inline long shstk_prctl(struct task_struct *task, int option, unsigned long arg2) { return -EINVAL; } @@ -33,6 +34,7 @@ static inline void shstk_free(struct task_struct *p) {} static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; } static inline int restore_signal_shadow_stack(void) { return 0; } static inline int shstk_update_last_frame(unsigned long val) { return 0; } +static inline bool shstk_is_enabled(void) { return false; } #endif /* CONFIG_X86_USER_SHADOW_STACK */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/shstk.c b/arch/x86/kernel/shstk.c index 9797d4cdb78a..059685612362 100644 --- a/arch/x86/kernel/shstk.c +++ b/arch/x86/kernel/shstk.c @@ -588,3 +588,8 @@ int shstk_update_last_frame(unsigned long val) ssp = get_user_shstk_addr(); return write_user_shstk_64((u64 __user *)ssp, (u64)val); } + +bool shstk_is_enabled(void) +{ + return features_enabled(ARCH_SHSTK_SHSTK); +} diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 6402fb3089d2..5a952c5ea66b 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -308,6 +309,122 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool } #ifdef CONFIG_X86_64 + +asm ( + ".pushsection .rodata\n" + ".global uretprobe_trampoline_entry\n" + "uretprobe_trampoline_entry:\n" + "pushq %rax\n" + "pushq %rcx\n" + "pushq %r11\n" + "movq $" __stringify(__NR_uretprobe) ", %rax\n" + "syscall\n" + ".global uretprobe_syscall_check\n" + "uretprobe_syscall_check:\n" + "popq %r11\n" + "popq %rcx\n" + + /* The uretprobe syscall replaces stored %rax value with final + * return address, so we don't restore %rax in here and just + * call ret. + */ + "retq\n" + ".global uretprobe_trampoline_end\n" + "uretprobe_trampoline_end:\n" + ".popsection\n" +); + +extern u8 uretprobe_trampoline_entry[]; +extern u8 uretprobe_trampoline_end[]; +extern u8 uretprobe_syscall_check[]; + +void *arch_uprobe_trampoline(unsigned long *psize) +{ + static uprobe_opcode_t insn = UPROBE_SWBP_INSN; + struct pt_regs *regs = task_pt_regs(current); + + /* + * At the moment the uretprobe syscall trampoline is supported + * only for native 64-bit process, the compat process still uses + * standard breakpoint. + */ + if (user_64bit_mode(regs)) { + *psize = uretprobe_trampoline_end - uretprobe_trampoline_entry; + return uretprobe_trampoline_entry; + } + + *psize = UPROBE_SWBP_INSN_SIZE; + return &insn; +} + +static unsigned long trampoline_check_ip(void) +{ + unsigned long tramp = uprobe_get_trampoline_vaddr(); + + return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry); +} + +SYSCALL_DEFINE0(uretprobe) +{ + struct pt_regs *regs = task_pt_regs(current); + unsigned long err, ip, sp, r11_cx_ax[3]; + + if (regs->ip != trampoline_check_ip()) + goto sigill; + + err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax)); + if (err) + goto sigill; + + /* expose the "right" values of r11/cx/ax/sp to uprobe_consumer/s */ + regs->r11 = r11_cx_ax[0]; + regs->cx = r11_cx_ax[1]; + regs->ax = r11_cx_ax[2]; + regs->sp += sizeof(r11_cx_ax); + regs->orig_ax = -1; + + ip = regs->ip; + sp = regs->sp; + + uprobe_handle_trampoline(regs); + + /* + * Some of the uprobe consumers has changed sp, we can do nothing, + * just return via iret. + * .. or shadow stack is enabled, in which case we need to skip + * return through the user space stack address. + */ + if (regs->sp != sp || shstk_is_enabled()) + return regs->ax; + regs->sp -= sizeof(r11_cx_ax); + + /* for the case uprobe_consumer has changed r11/cx */ + r11_cx_ax[0] = regs->r11; + r11_cx_ax[1] = regs->cx; + + /* + * ax register is passed through as return value, so we can use + * its space on stack for ip value and jump to it through the + * trampoline's ret instruction + */ + r11_cx_ax[2] = regs->ip; + regs->ip = ip; + + err = copy_to_user((void __user *)regs->sp, r11_cx_ax, sizeof(r11_cx_ax)); + if (err) + goto sigill; + + /* ensure sysret, see do_syscall_64() */ + regs->r11 = regs->flags; + regs->cx = regs->ip; + + return regs->ax; + +sigill: + force_sig(SIGILL); + return -1; +} + /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index f46e0ca0169c..b503fafb7fb3 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -138,6 +138,9 @@ extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); +extern void uprobe_handle_trampoline(struct pt_regs *regs); +extern void *arch_uprobe_trampoline(unsigned long *psize); +extern unsigned long uprobe_get_trampoline_vaddr(void); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2c83ba776fc7..2816e65729ac 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1474,11 +1474,20 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) return ret; } +void * __weak arch_uprobe_trampoline(unsigned long *psize) +{ + static uprobe_opcode_t insn = UPROBE_SWBP_INSN; + + *psize = UPROBE_SWBP_INSN_SIZE; + return &insn; +} + static struct xol_area *__create_xol_area(unsigned long vaddr) { struct mm_struct *mm = current->mm; - uprobe_opcode_t insn = UPROBE_SWBP_INSN; + unsigned long insns_size; struct xol_area *area; + void *insns; area = kmalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) @@ -1502,7 +1511,8 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) /* Reserve the 1st slot for get_trampoline_vaddr() */ set_bit(0, area->bitmap); atomic_set(&area->slot_count, 1); - arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE); + insns = arch_uprobe_trampoline(&insns_size); + arch_uprobe_copy_ixol(area->pages[0], 0, insns, insns_size); if (!xol_add_vma(mm, area)) return area; @@ -1827,7 +1837,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags) * * Returns -1 in case the xol_area is not allocated. */ -static unsigned long get_trampoline_vaddr(void) +unsigned long uprobe_get_trampoline_vaddr(void) { struct xol_area *area; unsigned long trampoline_vaddr = -1; @@ -1878,7 +1888,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs) if (!ri) return; - trampoline_vaddr = get_trampoline_vaddr(); + trampoline_vaddr = uprobe_get_trampoline_vaddr(); orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs); if (orig_ret_vaddr == -1) goto fail; @@ -2123,7 +2133,7 @@ static struct return_instance *find_next_ret_chain(struct return_instance *ri) return ri; } -static void handle_trampoline(struct pt_regs *regs) +void uprobe_handle_trampoline(struct pt_regs *regs) { struct uprobe_task *utask; struct return_instance *ri, *next; @@ -2187,8 +2197,8 @@ static void handle_swbp(struct pt_regs *regs) int is_swbp; bp_vaddr = uprobe_get_swbp_addr(regs); - if (bp_vaddr == get_trampoline_vaddr()) - return handle_trampoline(regs); + if (bp_vaddr == uprobe_get_trampoline_vaddr()) + return uprobe_handle_trampoline(regs); uprobe = find_active_uprobe(bp_vaddr, &is_swbp); if (!uprobe) { -- cgit v1.2.3 From 90e6f08915ec6efe46570420412a65050ec826b2 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 11 Jun 2024 17:34:35 +0900 Subject: scsi: mpi3mr: Fix ATA NCQ priority support The function mpi3mr_qcmd() of the mpi3mr driver is able to indicate to the HBA if a read or write command directed at an ATA device should be translated to an NCQ read/write command with the high prioiryt bit set when the request uses the RT priority class and the user has enabled NCQ priority through sysfs. However, unlike the mpt3sas driver, the mpi3mr driver does not define the sas_ncq_prio_supported and sas_ncq_prio_enable sysfs attributes, so the ncq_prio_enable field of struct mpi3mr_sdev_priv_data is never actually set and NCQ Priority cannot ever be used. Fix this by defining these missing atributes to allow a user to check if an ATA device supports NCQ priority and to enable/disable the use of NCQ priority. To do this, lift the function scsih_ncq_prio_supp() out of the mpt3sas driver and make it the generic SCSI SAS transport function sas_ata_ncq_prio_supported(). Nothing in that function is hardware specific, so this function can be used in both the mpt3sas driver and the mpi3mr driver. Reported-by: Scott McCoy Fixes: 023ab2a9b4ed ("scsi: mpi3mr: Add support for queue command processing") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20240611083435.92961-1-dlemoal@kernel.org Reviewed-by: Niklas Cassel Signed-off-by: Martin K. Petersen --- drivers/scsi/mpi3mr/mpi3mr_app.c | 62 ++++++++++++++++++++++++++++++++++++ drivers/scsi/mpt3sas/mpt3sas_base.h | 3 -- drivers/scsi/mpt3sas/mpt3sas_ctl.c | 4 +-- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 23 ------------- drivers/scsi/scsi_transport_sas.c | 23 +++++++++++++ include/scsi/scsi_transport_sas.h | 2 ++ 6 files changed, 89 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 1638109a68a0..cd261b48eb46 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -2163,10 +2163,72 @@ persistent_id_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(persistent_id); +/** + * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority + * @dev: pointer to embedded device + * @attr: sas_ncq_prio_supported attribute descriptor + * @buf: the buffer returned + * + * A sysfs 'read-only' sdev attribute, only works with SATA devices + */ +static ssize_t +sas_ncq_prio_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + + return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); +} +static DEVICE_ATTR_RO(sas_ncq_prio_supported); + +/** + * sas_ncq_prio_enable_show - send prioritized io commands to device + * @dev: pointer to embedded device + * @attr: sas_ncq_prio_enable attribute descriptor + * @buf: the buffer returned + * + * A sysfs 'read/write' sdev attribute, only works with SATA devices + */ +static ssize_t +sas_ncq_prio_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; + + if (!sdev_priv_data) + return 0; + + return sysfs_emit(buf, "%d\n", sdev_priv_data->ncq_prio_enable); +} + +static ssize_t +sas_ncq_prio_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; + bool ncq_prio_enable = 0; + + if (kstrtobool(buf, &ncq_prio_enable)) + return -EINVAL; + + if (!sas_ata_ncq_prio_supported(sdev)) + return -EINVAL; + + sdev_priv_data->ncq_prio_enable = ncq_prio_enable; + + return strlen(buf); +} +static DEVICE_ATTR_RW(sas_ncq_prio_enable); + static struct attribute *mpi3mr_dev_attrs[] = { &dev_attr_sas_address.attr, &dev_attr_device_handle.attr, &dev_attr_persistent_id.attr, + &dev_attr_sas_ncq_prio_supported.attr, + &dev_attr_sas_ncq_prio_enable.attr, NULL, }; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index bf100a4ebfc3..fe1e96fda284 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -2048,9 +2048,6 @@ void mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, struct _raid_device *raid_device, Mpi25SCSIIORequest_t *mpi_request); -/* NCQ Prio Handling Check */ -bool scsih_ncq_prio_supp(struct scsi_device *sdev); - void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc); void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc); void mpt3sas_init_debugfs(void); diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 1c9fd26195b8..87784c96249a 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -4088,7 +4088,7 @@ sas_ncq_prio_supported_show(struct device *dev, { struct scsi_device *sdev = to_scsi_device(dev); - return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev)); + return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); } static DEVICE_ATTR_RO(sas_ncq_prio_supported); @@ -4123,7 +4123,7 @@ sas_ncq_prio_enable_store(struct device *dev, if (kstrtobool(buf, &ncq_prio_enable)) return -EINVAL; - if (!scsih_ncq_prio_supp(sdev)) + if (!sas_ata_ncq_prio_supported(sdev)) return -EINVAL; sas_device_priv_data->ncq_prio_enable = ncq_prio_enable; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 12d08d8ba538..870ec2cb4af4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -12571,29 +12571,6 @@ scsih_pci_mmio_enabled(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } -/** - * scsih_ncq_prio_supp - Check for NCQ command priority support - * @sdev: scsi device struct - * - * This is called when a user indicates they would like to enable - * ncq command priorities. This works only on SATA devices. - */ -bool scsih_ncq_prio_supp(struct scsi_device *sdev) -{ - struct scsi_vpd *vpd; - bool ncq_prio_supp = false; - - rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (!vpd || vpd->len < 214) - goto out; - - ncq_prio_supp = (vpd->data[213] >> 4) & 1; -out: - rcu_read_unlock(); - - return ncq_prio_supp; -} /* * The pci device ids are defined in mpi/mpi2_cnfg.h. */ diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 424a89513814..4e33f1661e4c 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -416,6 +416,29 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *sdev) } EXPORT_SYMBOL_GPL(sas_is_tlr_enabled); +/** + * sas_ata_ncq_prio_supported - Check for ATA NCQ command priority support + * @sdev: SCSI device + * + * Check if an ATA device supports NCQ priority using VPD page 89h (ATA + * Information). Since this VPD page is implemented only for ATA devices, + * this function always returns false for SCSI devices. + */ +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev) +{ + struct scsi_vpd *vpd; + bool ncq_prio_supported = false; + + rcu_read_lock(); + vpd = rcu_dereference(sdev->vpd_pg89); + if (vpd && vpd->len >= 214) + ncq_prio_supported = (vpd->data[213] >> 4) & 1; + rcu_read_unlock(); + + return ncq_prio_supported; +} +EXPORT_SYMBOL_GPL(sas_ata_ncq_prio_supported); + /* * SAS Phy attributes */ diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 0e75b9277c8c..e3b6ce3cbf88 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -200,6 +200,8 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *); void sas_disable_tlr(struct scsi_device *); void sas_enable_tlr(struct scsi_device *); +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev); + extern struct sas_rphy *sas_end_device_alloc(struct sas_port *); extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type); void sas_rphy_free(struct sas_rphy *); -- cgit v1.2.3 From fa59dc2f6fc616fde3941f62ccd4adcaa21ccd47 Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 7 Jun 2024 18:25:24 +0800 Subject: net: core,vrf: Change pcpu_dstat fields to u64_stats_t The pcpu_sw_netstats and pcpu_lstats structs both contain a set of u64_stats_t fields for individual stats, but pcpu_dstats uses u64s instead. Make this consistent by using u64_stats_t across all stats types. The per-cpu dstats are only used by the vrf driver at present, so update that driver as part of this change. Signed-off-by: Jeremy Kerr Reviewed-by: Simon Horman Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607-dstats-v3-1-cc781fe116f7@codeconstruct.com.au Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 38 ++++++++++++++++++++++---------------- include/linux/netdevice.h | 12 ++++++------ 2 files changed, 28 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 3a252ac5dd28..5018831b2a79 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -126,8 +126,8 @@ static void vrf_rx_stats(struct net_device *dev, int len) struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); - dstats->rx_packets++; - dstats->rx_bytes += len; + u64_stats_inc(&dstats->rx_packets); + u64_stats_add(&dstats->rx_bytes, len); u64_stats_update_end(&dstats->syncp); } @@ -150,11 +150,11 @@ static void vrf_get_stats64(struct net_device *dev, dstats = per_cpu_ptr(dev->dstats, i); do { start = u64_stats_fetch_begin(&dstats->syncp); - tbytes = dstats->tx_bytes; - tpkts = dstats->tx_packets; - tdrops = dstats->tx_drops; - rbytes = dstats->rx_bytes; - rpkts = dstats->rx_packets; + tbytes = u64_stats_read(&dstats->tx_bytes); + tpkts = u64_stats_read(&dstats->tx_packets); + tdrops = u64_stats_read(&dstats->tx_drops); + rbytes = u64_stats_read(&dstats->rx_bytes); + rpkts = u64_stats_read(&dstats->rx_packets); } while (u64_stats_fetch_retry(&dstats->syncp, start)); stats->tx_bytes += tbytes; stats->tx_packets += tpkts; @@ -408,10 +408,15 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb->protocol = eth_type_trans(skb, dev); - if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { vrf_rx_stats(dev, len); - else - this_cpu_inc(dev->dstats->rx_drops); + } else { + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_drops); + u64_stats_update_end(&dstats->syncp); + } return NETDEV_TX_OK; } @@ -599,19 +604,20 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); + u64_stats_update_begin(&dstats->syncp); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - u64_stats_update_begin(&dstats->syncp); - dstats->tx_packets++; - dstats->tx_bytes += len; - u64_stats_update_end(&dstats->syncp); + u64_stats_inc(&dstats->tx_packets); + u64_stats_add(&dstats->tx_bytes, len); } else { - this_cpu_inc(dev->dstats->tx_drops); + u64_stats_inc(&dstats->tx_drops); } + u64_stats_update_end(&dstats->syncp); return ret; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d20c6c99eb88..f148a01dd1d1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2731,12 +2731,12 @@ struct pcpu_sw_netstats { } __aligned(4 * sizeof(u64)); struct pcpu_dstats { - u64 rx_packets; - u64 rx_bytes; - u64 rx_drops; - u64 tx_packets; - u64 tx_bytes; - u64 tx_drops; + u64_stats_t rx_packets; + u64_stats_t rx_bytes; + u64_stats_t rx_drops; + u64_stats_t tx_packets; + u64_stats_t tx_bytes; + u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); -- cgit v1.2.3 From 144ba8580bcb82b2686c3d1a043299d844b9a682 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Mon, 10 Jun 2024 10:34:26 +0200 Subject: net: pse-pd: Use EOPNOTSUPP error code instead of ENOTSUPP ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP as reported by checkpatch script. Fixes: 18ff0bcda6d1 ("ethtool: add interface to interact with Ethernet Power Equipment") Reviewed-by: Andrew Lunn Acked-by: Oleksij Rempel Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240610083426.740660-1-kory.maincent@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/pse-pd/pse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index 6d07c95dabb9..6eec24ffa866 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -167,14 +167,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, struct pse_control_status *status) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline bool pse_has_podl(struct pse_control *psec) -- cgit v1.2.3 From 3966a668bfeef49e265e4975a2cdc55fb931036d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:55 +0100 Subject: net/tcp: Use static_branch_tcp_{md5,ao} to drop ifdefs It's possible to clean-up some ifdefs by hiding that tcp_{md5,ao}_needed static branch is defined and compiled only under related configs, since commit 4c8530dc7d7d ("net/tcp: Only produce AO/MD5 logs if there are any keys"). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 14 ++++---------- net/ipv4/tcp_ipv4.c | 8 ++------ 2 files changed, 6 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index a70fc39090fe..e5427b05129b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2386,21 +2386,15 @@ static inline void tcp_get_current_key(const struct sock *sk, static inline bool tcp_key_is_md5(const struct tcp_key *key) { -#ifdef CONFIG_TCP_MD5SIG - if (static_branch_unlikely(&tcp_md5_needed.key) && - key->type == TCP_KEY_MD5) - return true; -#endif + if (static_branch_tcp_md5()) + return key->type == TCP_KEY_MD5; return false; } static inline bool tcp_key_is_ao(const struct tcp_key *key) { -#ifdef CONFIG_TCP_AO - if (static_branch_unlikely(&tcp_ao_needed.key) && - key->type == TCP_KEY_AO) - return true; -#endif + if (static_branch_tcp_ao()) + return key->type == TCP_KEY_AO; return false; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e61c7c974745..de0c8f43448a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1054,12 +1054,10 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) #else if (0) { #endif -#ifdef CONFIG_TCP_MD5SIG - } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_tcp_md5()) { key.md5_key = tcp_twsk_md5_key(tcptw); if (key.md5_key) key.type = TCP_KEY_MD5; -#endif } tcp_v4_send_ack(sk, skb, @@ -1128,8 +1126,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, #else if (0) { #endif -#ifdef CONFIG_TCP_MD5SIG - } else if (static_branch_unlikely(&tcp_md5_needed.key)) { + } else if (static_branch_tcp_md5()) { const union tcp_md5_addr *addr; int l3index; @@ -1138,7 +1135,6 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.md5_key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET); if (key.md5_key) key.type = TCP_KEY_MD5; -#endif } tcp_v4_send_ack(sk, skb, seq, -- cgit v1.2.3 From 72863087f635367323693b9ab83c3107e0353c5f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:56 +0100 Subject: net/tcp: Add a helper tcp_ao_hdr_maclen() It's going to be used more in TCP-AO tracepoints. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 5 +++++ net/ipv4/tcp_ao.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 5d8e9ed2c005..198e02004ad2 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -19,6 +19,11 @@ struct tcp_ao_hdr { u8 rnext_keyid; }; +static inline u8 tcp_ao_hdr_maclen(const struct tcp_ao_hdr *aoh) +{ + return aoh->length - sizeof(struct tcp_ao_hdr); +} + struct tcp_ao_counters { atomic64_t pkt_good; atomic64_t pkt_bad; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 37c42b63ff99..50ae43c92829 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -884,8 +884,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, const struct tcp_ao_hdr *aoh, struct tcp_ao_key *key, u8 *traffic_key, u8 *phash, u32 sne, int l3index) { - u8 maclen = aoh->length - sizeof(struct tcp_ao_hdr); const struct tcphdr *th = tcp_hdr(skb); + u8 maclen = tcp_ao_hdr_maclen(aoh); void *hash_buf = NULL; if (maclen != tcp_ao_maclen(key)) { -- cgit v1.2.3 From 811efc06e5f30a57030451b2d1998aa81273baf8 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:57 +0100 Subject: net/tcp: Move tcp_inbound_hash() from headers Two reasons: 1. It's grown up enough 2. In order to not do header spaghetti by including , which is necessary for TCP tracepoints. While at it, unexport and make static tcp_inbound_ao_hash(). Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp.h | 78 +++---------------------------------------------------- net/ipv4/tcp.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index e5427b05129b..2aac11e7e1cc 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1863,12 +1863,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return __tcp_md5_do_lookup(sk, 0, addr, family, true); } -enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location); - - #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key) #else static inline struct tcp_md5sig_key * @@ -1885,13 +1879,6 @@ tcp_md5_do_lookup_any_l3index(const struct sock *sk, return NULL; } -static inline enum skb_drop_reason -tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int l3index, const __u8 *hash_location) -{ - return SKB_NOT_DROPPED_YET; -} #define tcp_twsk_md5_key(twsk) NULL #endif @@ -2806,66 +2793,9 @@ static inline bool tcp_ao_required(struct sock *sk, const void *saddr, return false; } -/* Called with rcu_read_lock() */ -static inline enum skb_drop_reason -tcp_inbound_hash(struct sock *sk, const struct request_sock *req, - const struct sk_buff *skb, - const void *saddr, const void *daddr, - int family, int dif, int sdif) -{ - const struct tcphdr *th = tcp_hdr(skb); - const struct tcp_ao_hdr *aoh; - const __u8 *md5_location; - int l3index; - - /* Invalid option or two times meet any of auth options */ - if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); - return SKB_DROP_REASON_TCP_AUTH_HDR; - } - - if (req) { - if (tcp_rsk_used_ao(req) != !!aoh) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); - return SKB_DROP_REASON_TCP_AOFAILURE; - } - } - - /* sdif set, means packet ingressed via a device - * in an L3 domain and dif is set to the l3mdev - */ - l3index = sdif ? dif : 0; - - /* Fast path: unsigned segments */ - if (likely(!md5_location && !aoh)) { - /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid - * for the remote peer. On TCP-AO established connection - * the last key is impossible to remove, so there's - * always at least one current_key. - */ - if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_AONOTFOUND; - } - if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); - return SKB_DROP_REASON_TCP_MD5NOTFOUND; - } - return SKB_NOT_DROPPED_YET; - } - - if (aoh) - return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); - - return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, - l3index, md5_location); -} +enum skb_drop_reason tcp_inbound_hash(struct sock *sk, + const struct request_sock *req, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif); #endif /* _TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6553221694ec..17a4a8e4855d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4463,7 +4463,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp, EXPORT_SYMBOL(tcp_md5_hash_key); /* Called with rcu_read_lock() */ -enum skb_drop_reason +static enum skb_drop_reason tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, const void *saddr, const void *daddr, int family, int l3index, const __u8 *hash_location) @@ -4517,10 +4517,80 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, } return SKB_NOT_DROPPED_YET; } -EXPORT_SYMBOL(tcp_inbound_md5_hash); +#else +static inline enum skb_drop_reason +tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int l3index, const __u8 *hash_location) +{ + return SKB_NOT_DROPPED_YET; +} #endif +/* Called with rcu_read_lock() */ +enum skb_drop_reason +tcp_inbound_hash(struct sock *sk, const struct request_sock *req, + const struct sk_buff *skb, + const void *saddr, const void *daddr, + int family, int dif, int sdif) +{ + const struct tcphdr *th = tcp_hdr(skb); + const struct tcp_ao_hdr *aoh; + const __u8 *md5_location; + int l3index; + + /* Invalid option or two times meet any of auth options */ + if (tcp_parse_auth_options(th, &md5_location, &aoh)) { + tcp_hash_fail("TCP segment has incorrect auth options set", + family, skb, ""); + return SKB_DROP_REASON_TCP_AUTH_HDR; + } + + if (req) { + if (tcp_rsk_used_ao(req) != !!aoh) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); + tcp_hash_fail("TCP connection can't start/end using TCP-AO", + family, skb, "%s", + !aoh ? "missing AO" : "AO signed"); + return SKB_DROP_REASON_TCP_AOFAILURE; + } + } + + /* sdif set, means packet ingressed via a device + * in an L3 domain and dif is set to the l3mdev + */ + l3index = sdif ? dif : 0; + + /* Fast path: unsigned segments */ + if (likely(!md5_location && !aoh)) { + /* Drop if there's TCP-MD5 or TCP-AO key with any rcvid/sndid + * for the remote peer. On TCP-AO established connection + * the last key is impossible to remove, so there's + * always at least one current_key. + */ + if (tcp_ao_required(sk, saddr, family, l3index, true)) { + tcp_hash_fail("AO hash is required, but not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_AONOTFOUND; + } + if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); + tcp_hash_fail("MD5 Hash not found", + family, skb, "L3 index %d", l3index); + return SKB_DROP_REASON_TCP_MD5NOTFOUND; + } + return SKB_NOT_DROPPED_YET; + } + + if (aoh) + return tcp_inbound_ao_hash(sk, skb, family, req, l3index, aoh); + + return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, + l3index, md5_location); +} +EXPORT_SYMBOL_GPL(tcp_inbound_hash); + void tcp_done(struct sock *sk) { struct request_sock *req; -- cgit v1.2.3 From 96be3dcd013df6aa79acf32e739c0a35b89a4f50 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:58 +0100 Subject: net/tcp: Add tcp-md5 and tcp-ao tracepoints Instead of forcing userspace to parse dmesg (that's what currently is happening, at least in codebase of my current company), provide a better way, that can be enabled/disabled in runtime. Currently, there are already tcp events, add hashing related ones there, too. Rasdaemon currently exercises net_dev_xmit_timeout, devlink_health_report, but it'll be trivial to teach it to deal with failed hashes. Otherwise, BGP may trace/log them itself. Especially exciting for possible investigations is key rotation (RNext_key requests). Suggested-by: Jakub Kicinski Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/trace/events/tcp.h | 317 +++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 17 +++ net/ipv4/tcp_ao.c | 13 ++ net/ipv4/tcp_input.c | 8 +- net/ipv4/tcp_output.c | 2 + 5 files changed, 355 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h index 49b5ee091cf6..1c8bd8e186b8 100644 --- a/include/trace/events/tcp.h +++ b/include/trace/events/tcp.h @@ -411,6 +411,323 @@ TRACE_EVENT(tcp_cong_state_set, __entry->cong_state) ); +DECLARE_EVENT_CLASS(tcp_hash_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + + TP_ARGS(sk, skb), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c]", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ') +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_bad_header, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_unexpected, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_md5_mismatch, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DEFINE_EVENT(tcp_hash_event, tcp_hash_ao_required, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb), + TP_ARGS(sk, skb) +); + +DECLARE_EVENT_CLASS(tcp_ao_event, + + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + + TP_ARGS(sk, skb, keyid, rnext, maclen), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skbaddr) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + __field(int, l3index) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(bool, fin) + __field(bool, syn) + __field(bool, rst) + __field(bool, psh) + __field(bool, ack) + + __field(__u8, keyid) + __field(__u8, rnext) + __field(__u8, maclen) + ), + + TP_fast_assign( + const struct tcphdr *th = (const struct tcphdr *)skb->data; + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skbaddr = skb; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS_SKB(skb, th, __entry->saddr, __entry->daddr); + __entry->l3index = inet_sdif(skb) ? inet_iif(skb) : 0; + + /* For filtering use */ + __entry->sport = ntohs(th->source); + __entry->dport = ntohs(th->dest); + __entry->family = sk->sk_family; + + __entry->fin = th->fin; + __entry->syn = th->syn; + __entry->rst = th->rst; + __entry->psh = th->psh; + __entry->ack = th->ack; + + __entry->keyid = keyid; + __entry->rnext = rnext; + __entry->maclen = maclen; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->l3index, + __entry->fin ? 'F' : ' ', + __entry->syn ? 'S' : ' ', + __entry->rst ? 'R' : ' ', + __entry->psh ? 'P' : ' ', + __entry->ack ? '.' : ' ', + __entry->keyid, __entry->rnext, __entry->maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_handshake_failure, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_wrong_maclen, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_mismatch, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_key_not_found, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DEFINE_EVENT(tcp_ao_event, tcp_ao_rnext_request, + TP_PROTO(const struct sock *sk, const struct sk_buff *skb, + const __u8 keyid, const __u8 rnext, const __u8 maclen), + TP_ARGS(sk, skb, keyid, rnext, maclen) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sk, + + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + + TP_ARGS(sk, keyid, rnext), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u8, keyid) + __field(__u8, rnext) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->keyid = keyid; + __entry->rnext = rnext; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc keyid=%u rnext=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->keyid, __entry->rnext) +); + +DEFINE_EVENT(tcp_ao_event_sk, tcp_ao_synack_no_key, + TP_PROTO(const struct sock *sk, const __u8 keyid, const __u8 rnext), + TP_ARGS(sk, keyid, rnext) +); + +DECLARE_EVENT_CLASS(tcp_ao_event_sne, + + TP_PROTO(const struct sock *sk, __u32 new_sne), + + TP_ARGS(sk, new_sne), + + TP_STRUCT__entry( + __field(__u64, net_cookie) + __field(const void *, skaddr) + __field(int, state) + + /* sockaddr_in6 is always bigger than sockaddr_in */ + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) + + __field(__u16, sport) + __field(__u16, dport) + __field(__u16, family) + + __field(__u32, new_sne) + ), + + TP_fast_assign( + const struct inet_sock *inet = inet_sk(sk); + + __entry->net_cookie = sock_net(sk)->net_cookie; + __entry->skaddr = sk; + __entry->state = sk->sk_state; + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + TP_STORE_ADDR_PORTS(__entry, inet, sk); + + /* For filtering use */ + __entry->sport = ntohs(inet->inet_sport); + __entry->dport = ntohs(inet->inet_dport); + __entry->family = sk->sk_family; + + __entry->new_sne = new_sne; + ), + + TP_printk("net=%llu state=%s family=%s src=%pISpc dest=%pISpc sne=%u", + __entry->net_cookie, + show_tcp_state_name(__entry->state), + show_family_name(__entry->family), + __entry->saddr, __entry->daddr, + __entry->new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_snd_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + +DEFINE_EVENT(tcp_ao_event_sne, tcp_ao_rcv_sne_update, + TP_PROTO(const struct sock *sk, __u32 new_sne), + TP_ARGS(sk, new_sne) +); + #endif /* _TRACE_TCP_H */ /* This part must be outside protection */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17a4a8e4855d..73152ce1367e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,6 +282,7 @@ #include #include #include +#include #include /* Track pending CMSGs. */ @@ -4484,6 +4485,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); + trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4513,6 +4515,7 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, l3index); } } + trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; } return SKB_NOT_DROPPED_YET; @@ -4544,15 +4547,27 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, if (tcp_parse_auth_options(th, &md5_location, &aoh)) { tcp_hash_fail("TCP segment has incorrect auth options set", family, skb, ""); + trace_tcp_hash_bad_header(sk, skb); return SKB_DROP_REASON_TCP_AUTH_HDR; } if (req) { if (tcp_rsk_used_ao(req) != !!aoh) { + u8 keyid, rnext, maclen; + + if (aoh) { + keyid = aoh->keyid; + rnext = aoh->rnext_keyid; + maclen = tcp_ao_hdr_maclen(aoh); + } else { + keyid = rnext = maclen = 0; + } + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); tcp_hash_fail("TCP connection can't start/end using TCP-AO", family, skb, "%s", !aoh ? "missing AO" : "AO signed"); + trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } } @@ -4572,12 +4587,14 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, if (tcp_ao_required(sk, saddr, family, l3index, true)) { tcp_hash_fail("AO hash is required, but not found", family, skb, "L3 index %d", l3index); + trace_tcp_hash_ao_required(sk, skb); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); tcp_hash_fail("MD5 Hash not found", family, skb, "L3 index %d", l3index); + trace_tcp_hash_md5_required(sk, skb); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } return SKB_NOT_DROPPED_YET; diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 50ae43c92829..1e5087c6cd7d 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -16,6 +16,7 @@ #include #include #include +#include DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_ao_needed, HZ); @@ -895,6 +896,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, tcp_hash_fail("AO hash wrong length", family, skb, "%u != %d L3index: %d", maclen, tcp_ao_maclen(key), l3index); + trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -911,6 +914,8 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, atomic64_inc(&key->pkt_bad); tcp_hash_fail("AO hash mismatch", family, skb, "L3index: %d", l3index); + trace_tcp_ao_mismatch(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); kfree(hash_buf); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -927,6 +932,7 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, int l3index, const struct tcp_ao_hdr *aoh) { const struct tcphdr *th = tcp_hdr(skb); + u8 maclen = tcp_ao_hdr_maclen(aoh); u8 *phash = (u8 *)(aoh + 1); /* hash goes just after the header */ struct tcp_ao_info *info; enum skb_drop_reason ret; @@ -941,6 +947,8 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); tcp_hash_fail("AO key not found", family, skb, "keyid: %u L3index: %d", aoh->keyid, l3index); + trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOUNEXPECTED; } @@ -981,6 +989,9 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, current_key = READ_ONCE(info->current_key); /* Key rotation: the peer asks us to use new key (RNext) */ if (unlikely(aoh->rnext_keyid != current_key->sndid)) { + trace_tcp_ao_rnext_request(sk, skb, current_key->sndid, + aoh->rnext_keyid, + tcp_ao_hdr_maclen(aoh)); /* If the key is not found we do nothing. */ key = tcp_ao_established_key(info, aoh->rnext_keyid, -1); if (key) @@ -1048,6 +1059,8 @@ key_not_found: atomic64_inc(&info->counters.key_not_found); tcp_hash_fail("Requested by the peer AO key id not found", family, skb, "L3index: %d", l3index); + trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, + aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index eb187450e4d7..d0a1e34d69f6 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3578,8 +3578,10 @@ static void tcp_snd_sne_update(struct tcp_sock *tp, u32 ack) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && ack < tp->snd_una) + if (ao && ack < tp->snd_una) { ao->snd_sne++; + trace_tcp_ao_snd_sne_update((struct sock *)tp, ao->snd_sne); + } #endif } @@ -3604,8 +3606,10 @@ static void tcp_rcv_sne_update(struct tcp_sock *tp, u32 seq) ao = rcu_dereference_protected(tp->ao_info, lockdep_sock_is_held((struct sock *)tp)); - if (ao && seq < tp->rcv_nxt) + if (ao && seq < tp->rcv_nxt) { ao->rcv_sne++; + trace_tcp_ao_rcv_sne_update((struct sock *)tp, ao->rcv_sne); + } #endif } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 090fb0c24599..16c48df8df4c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3768,6 +3768,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, #ifdef CONFIG_TCP_AO struct tcp_ao_key *ao_key = NULL; u8 keyid = tcp_rsk(req)->ao_keyid; + u8 rnext = tcp_rsk(req)->ao_rcv_next; ao_key = tcp_sk(sk)->af_specific->ao_lookup(sk, req_to_sk(req), keyid, -1); @@ -3777,6 +3778,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, * ao_keyid (RFC5925 RNextKeyID), so let's keep it simple here. */ if (unlikely(!ao_key)) { + trace_tcp_ao_synack_no_key(sk, keyid, rnext); rcu_read_unlock(); kfree_skb(skb); net_warn_ratelimited("TCP-AO: the keyid %u from SYN packet is not present - not sending SYNACK\n", -- cgit v1.2.3 From 78b1b27db91c7a94297a8b6a665fe7e86dfc5750 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov <0x7f454c46@gmail.com> Date: Fri, 7 Jun 2024 00:25:59 +0100 Subject: net/tcp: Remove tcp_hash_fail() Now there are tracepoints, that cover all functionality of tcp_hash_fail(), but also wire up missing places They are also faster, can be disabled and provide filtering. This potentially may create a regression if a userspace depends on dmesg logs. Fingers crossed, let's see if anyone complains in reality. Reviewed-by: Eric Dumazet Signed-off-by: Dmitry Safonov <0x7f454c46@gmail.com> Signed-off-by: David S. Miller --- include/net/tcp_ao.h | 37 ------------------------------------- net/ipv4/tcp.c | 25 ------------------------- net/ipv4/tcp_ao.c | 9 --------- 3 files changed, 71 deletions(-) (limited to 'include') diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h index 198e02004ad2..1d46460d0fef 100644 --- a/include/net/tcp_ao.h +++ b/include/net/tcp_ao.h @@ -149,43 +149,6 @@ extern struct static_key_false_deferred tcp_ao_needed; #define static_branch_tcp_ao() false #endif -static inline bool tcp_hash_should_produce_warnings(void) -{ - return static_branch_tcp_md5() || static_branch_tcp_ao(); -} - -#define tcp_hash_fail(msg, family, skb, fmt, ...) \ -do { \ - const struct tcphdr *th = tcp_hdr(skb); \ - char hdr_flags[6]; \ - char *f = hdr_flags; \ - \ - if (!tcp_hash_should_produce_warnings()) \ - break; \ - if (th->fin) \ - *f++ = 'F'; \ - if (th->syn) \ - *f++ = 'S'; \ - if (th->rst) \ - *f++ = 'R'; \ - if (th->psh) \ - *f++ = 'P'; \ - if (th->ack) \ - *f++ = '.'; \ - *f = 0; \ - if ((family) == AF_INET) { \ - net_info_ratelimited("%s for %pI4.%d->%pI4.%d [%s] " fmt "\n", \ - msg, &ip_hdr(skb)->saddr, ntohs(th->source), \ - &ip_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } else { \ - net_info_ratelimited("%s for [%pI6c].%d->[%pI6c].%d [%s]" fmt "\n", \ - msg, &ipv6_hdr(skb)->saddr, ntohs(th->source), \ - &ipv6_hdr(skb)->daddr, ntohs(th->dest), \ - hdr_flags, ##__VA_ARGS__); \ - } \ -} while (0) - #ifdef CONFIG_TCP_AO /* TCP-AO structures and functions */ struct tcp4_ao_context { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 73152ce1367e..e03a342c9162 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4484,7 +4484,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, if (!key && hash_location) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - tcp_hash_fail("Unexpected MD5 Hash found", family, skb, ""); trace_tcp_hash_md5_unexpected(sk, skb); return SKB_DROP_REASON_TCP_MD5UNEXPECTED; } @@ -4500,21 +4499,6 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); - if (family == AF_INET) { - tcp_hash_fail("MD5 Hash failed", AF_INET, skb, "%s L3 index %d", - genhash ? "tcp_v4_calc_md5_hash failed" - : "", l3index); - } else { - if (genhash) { - tcp_hash_fail("MD5 Hash failed", - AF_INET6, skb, "L3 index %d", - l3index); - } else { - tcp_hash_fail("MD5 Hash mismatch", - AF_INET6, skb, "L3 index %d", - l3index); - } - } trace_tcp_hash_md5_mismatch(sk, skb); return SKB_DROP_REASON_TCP_MD5FAILURE; } @@ -4545,8 +4529,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, /* Invalid option or two times meet any of auth options */ if (tcp_parse_auth_options(th, &md5_location, &aoh)) { - tcp_hash_fail("TCP segment has incorrect auth options set", - family, skb, ""); trace_tcp_hash_bad_header(sk, skb); return SKB_DROP_REASON_TCP_AUTH_HDR; } @@ -4564,9 +4546,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, } NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); - tcp_hash_fail("TCP connection can't start/end using TCP-AO", - family, skb, "%s", - !aoh ? "missing AO" : "AO signed"); trace_tcp_ao_handshake_failure(sk, skb, keyid, rnext, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; } @@ -4585,15 +4564,11 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, * always at least one current_key. */ if (tcp_ao_required(sk, saddr, family, l3index, true)) { - tcp_hash_fail("AO hash is required, but not found", - family, skb, "L3 index %d", l3index); trace_tcp_hash_ao_required(sk, skb); return SKB_DROP_REASON_TCP_AONOTFOUND; } if (unlikely(tcp_md5_do_lookup(sk, l3index, saddr, family))) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - tcp_hash_fail("MD5 Hash not found", - family, skb, "L3 index %d", l3index); trace_tcp_hash_md5_required(sk, skb); return SKB_DROP_REASON_TCP_MD5NOTFOUND; } diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 1e5087c6cd7d..0de863aa5f66 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -893,9 +893,6 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash wrong length", family, skb, - "%u != %d L3index: %d", maclen, - tcp_ao_maclen(key), l3index); trace_tcp_ao_wrong_maclen(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOFAILURE; @@ -912,8 +909,6 @@ tcp_ao_verify_hash(const struct sock *sk, const struct sk_buff *skb, NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOBAD); atomic64_inc(&info->counters.pkt_bad); atomic64_inc(&key->pkt_bad); - tcp_hash_fail("AO hash mismatch", family, skb, - "L3index: %d", l3index); trace_tcp_ao_mismatch(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); kfree(hash_buf); @@ -945,8 +940,6 @@ tcp_inbound_ao_hash(struct sock *sk, const struct sk_buff *skb, info = rcu_dereference(tcp_sk(sk)->ao_info); if (!info) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); - tcp_hash_fail("AO key not found", family, skb, - "keyid: %u L3index: %d", aoh->keyid, l3index); trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOUNEXPECTED; @@ -1057,8 +1050,6 @@ verify_hash: key_not_found: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAOKEYNOTFOUND); atomic64_inc(&info->counters.key_not_found); - tcp_hash_fail("Requested by the peer AO key id not found", - family, skb, "L3index: %d", l3index); trace_tcp_ao_key_not_found(sk, skb, aoh->keyid, aoh->rnext_keyid, maclen); return SKB_DROP_REASON_TCP_AOKEYNOTFOUND; -- cgit v1.2.3 From 249ebf3f65f8530beb2cbfb91bff1d83ba88d23c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 5 Jun 2024 14:38:49 +0200 Subject: power: sequencing: implement the pwrseq core Implement the power sequencing subsystem allowing devices to share complex powering-up and down procedures. It's split into the consumer and provider parts but does not implement any new DT bindings so that the actual power sequencing is never revealed in the DT representation. Tested-by: Amit Pundir Tested-by: Neil Armstrong # on SM8550-QRD, SM8650-QRD & SM8650-HDK Tested-by: Caleb Connolly # OnePlus 8T Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240605123850.24857-2-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 8 + drivers/power/Kconfig | 1 + drivers/power/Makefile | 1 + drivers/power/sequencing/Kconfig | 12 + drivers/power/sequencing/Makefile | 4 + drivers/power/sequencing/core.c | 1105 +++++++++++++++++++++++++++++++++++++ include/linux/pwrseq/consumer.h | 56 ++ include/linux/pwrseq/provider.h | 75 +++ 8 files changed, 1262 insertions(+) create mode 100644 drivers/power/sequencing/Kconfig create mode 100644 drivers/power/sequencing/Makefile create mode 100644 drivers/power/sequencing/core.c create mode 100644 include/linux/pwrseq/consumer.h create mode 100644 include/linux/pwrseq/provider.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index aacccb376c28..6df73a7e07f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17890,6 +17890,14 @@ F: include/linux/pm_* F: include/linux/powercap.h F: kernel/configs/nopm.config +POWER SEQUENCING +M: Bartosz Golaszewski +L: linux-pm@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git +F: drivers/power/sequencing/ +F: include/linux/pwrseq/ + POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland M: Lorenzo Pieralisi diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig index 696bf77a7042..9a8e44ca9ae4 100644 --- a/drivers/power/Kconfig +++ b/drivers/power/Kconfig @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only source "drivers/power/reset/Kconfig" +source "drivers/power/sequencing/Kconfig" source "drivers/power/supply/Kconfig" diff --git a/drivers/power/Makefile b/drivers/power/Makefile index effbf0377f32..962a2cd30a51 100644 --- a/drivers/power/Makefile +++ b/drivers/power/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_POWER_RESET) += reset/ +obj-$(CONFIG_POWER_SEQUENCING) += sequencing/ obj-$(CONFIG_POWER_SUPPLY) += supply/ diff --git a/drivers/power/sequencing/Kconfig b/drivers/power/sequencing/Kconfig new file mode 100644 index 000000000000..ba5732b1dbf8 --- /dev/null +++ b/drivers/power/sequencing/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menuconfig POWER_SEQUENCING + tristate "Power Sequencing support" + help + Say Y here to enable the Power Sequencing subsystem. + + This subsystem is designed to control power to devices that share + complex resources and/or require specific power sequences to be run + during power-up. + + If unsure, say no. diff --git a/drivers/power/sequencing/Makefile b/drivers/power/sequencing/Makefile new file mode 100644 index 000000000000..dcdf8c0c159e --- /dev/null +++ b/drivers/power/sequencing/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_POWER_SEQUENCING) += pwrseq-core.o +pwrseq-core-y := core.o diff --git a/drivers/power/sequencing/core.c b/drivers/power/sequencing/core.c new file mode 100644 index 000000000000..9e606c9f6f04 --- /dev/null +++ b/drivers/power/sequencing/core.c @@ -0,0 +1,1105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Power-sequencing framework for linux. + * + * This subsystem allows power sequence providers to register a set of targets + * that consumers may request and power-up/down. + * + * Glossary: + * + * Unit - a unit is a discreet chunk of a power sequence. For instance one unit + * may enable a set of regulators, another may enable a specific GPIO. Units + * can define dependencies in the form of other units that must be enabled + * before it itself can be. + * + * Target - a target is a set of units (composed of the "final" unit and its + * dependencies) that a consumer selects by its name when requesting a handle + * to the power sequencer. Via the dependency system, multiple targets may + * share the same parts of a power sequence but ignore parts that are + * irrelevant. + * + * Descriptor - a handle passed by the pwrseq core to every consumer that + * serves as the entry point to the provider layer. It ensures coherence + * between different users and keeps reference counting consistent. + * + * Each provider must define a .match() callback whose role is to determine + * whether a potential consumer is in fact associated with this sequencer. + * This allows creating abstraction layers on top of regular device-tree + * resources like regulators, clocks and other nodes connected to the consumer + * via phandle. + */ + +static DEFINE_IDA(pwrseq_ida); + +/* + * Protects the device list on the pwrseq bus from concurrent modifications + * but allows simultaneous read-only access. + */ +static DECLARE_RWSEM(pwrseq_sem); + +/** + * struct pwrseq_unit - Private power-sequence unit data. + * @ref: Reference count for this object. When it goes to 0, the object is + * destroyed. + * @name: Name of this target. + * @list: Link to siblings on the list of all units of a single sequencer. + * @deps: List of units on which this unit depends. + * @enable: Callback running the part of the power-on sequence provided by + * this unit. + * @disable: Callback running the part of the power-off sequence provided + * by this unit. + * @enable_count: Current number of users that enabled this unit. May be the + * consumer of the power sequencer or other units that depend + * on this one. + */ +struct pwrseq_unit { + struct kref ref; + const char *name; + struct list_head list; + struct list_head deps; + pwrseq_power_state_func enable; + pwrseq_power_state_func disable; + unsigned int enable_count; +}; + +static struct pwrseq_unit *pwrseq_unit_new(const struct pwrseq_unit_data *data) +{ + struct pwrseq_unit *unit; + + unit = kzalloc(sizeof(*unit), GFP_KERNEL); + if (!unit) + return NULL; + + unit->name = kstrdup_const(data->name, GFP_KERNEL); + if (!unit->name) { + kfree(unit); + return NULL; + } + + kref_init(&unit->ref); + INIT_LIST_HEAD(&unit->deps); + unit->enable = data->enable; + unit->disable = data->disable; + + return unit; +} + +static struct pwrseq_unit *pwrseq_unit_get(struct pwrseq_unit *unit) +{ + kref_get(&unit->ref); + + return unit; +} + +static void pwrseq_unit_release(struct kref *ref); + +static void pwrseq_unit_put(struct pwrseq_unit *unit) +{ + kref_put(&unit->ref, pwrseq_unit_release); +} + +/** + * struct pwrseq_unit_dep - Wrapper around a reference to the unit structure + * allowing to keep it on multiple dependency lists + * in different units. + * @list: Siblings on the list. + * @unit: Address of the referenced unit. + */ +struct pwrseq_unit_dep { + struct list_head list; + struct pwrseq_unit *unit; +}; + +static struct pwrseq_unit_dep *pwrseq_unit_dep_new(struct pwrseq_unit *unit) +{ + struct pwrseq_unit_dep *dep; + + dep = kzalloc(sizeof(*dep), GFP_KERNEL); + if (!dep) + return NULL; + + dep->unit = unit; + + return dep; +} + +static void pwrseq_unit_dep_free(struct pwrseq_unit_dep *ref) +{ + pwrseq_unit_put(ref->unit); + kfree(ref); +} + +static void pwrseq_unit_free_deps(struct list_head *list) +{ + struct pwrseq_unit_dep *dep, *next; + + list_for_each_entry_safe(dep, next, list, list) { + list_del(&dep->list); + pwrseq_unit_dep_free(dep); + } +} + +static void pwrseq_unit_release(struct kref *ref) +{ + struct pwrseq_unit *unit = container_of(ref, struct pwrseq_unit, ref); + + pwrseq_unit_free_deps(&unit->deps); + list_del(&unit->list); + kfree_const(unit->name); + kfree(unit); +} + +/** + * struct pwrseq_target - Private power-sequence target data. + * @list: Siblings on the list of all targets exposed by a power sequencer. + * @name: Name of the target. + * @unit: Final unit for this target. + * @post_enable: Callback run after the target unit has been enabled, *after* + * the state lock has been released. It's useful for implementing + * boot-up delays without blocking other users from powering up + * using the same power sequencer. + */ +struct pwrseq_target { + struct list_head list; + const char *name; + struct pwrseq_unit *unit; + pwrseq_power_state_func post_enable; +}; + +static struct pwrseq_target * +pwrseq_target_new(const struct pwrseq_target_data *data) +{ + struct pwrseq_target *target; + + target = kzalloc(sizeof(*target), GFP_KERNEL); + if (!target) + return NULL; + + target->name = kstrdup_const(data->name, GFP_KERNEL); + if (!target->name) { + kfree(target); + return NULL; + } + + target->post_enable = data->post_enable; + + return target; +} + +static void pwrseq_target_free(struct pwrseq_target *target) +{ + pwrseq_unit_put(target->unit); + kfree_const(target->name); + kfree(target); +} + +/** + * struct pwrseq_device - Private power sequencing data. + * @dev: Device struct associated with this sequencer. + * @id: Device ID. + * @owner: Prevents removal of active power sequencing providers. + * @rw_lock: Protects the device from being unregistered while in use. + * @state_lock: Prevents multiple users running the power sequence at the same + * time. + * @match: Power sequencer matching callback. + * @targets: List of targets exposed by this sequencer. + * @units: List of all units supported by this sequencer. + */ +struct pwrseq_device { + struct device dev; + int id; + struct module *owner; + struct rw_semaphore rw_lock; + struct mutex state_lock; + pwrseq_match_func match; + struct list_head targets; + struct list_head units; +}; + +static struct pwrseq_device *to_pwrseq_device(struct device *dev) +{ + return container_of(dev, struct pwrseq_device, dev); +} + +static struct pwrseq_device *pwrseq_device_get(struct pwrseq_device *pwrseq) +{ + get_device(&pwrseq->dev); + + return pwrseq; +} + +static void pwrseq_device_put(struct pwrseq_device *pwrseq) +{ + put_device(&pwrseq->dev); +} + +/** + * struct pwrseq_desc - Wraps access to the pwrseq_device and ensures that one + * user cannot break the reference counting for others. + * @pwrseq: Reference to the power sequencing device. + * @target: Reference to the target this descriptor allows to control. + * @powered_on: Power state set by the holder of the descriptor (not necessarily + * corresponding to the actual power state of the device). + */ +struct pwrseq_desc { + struct pwrseq_device *pwrseq; + struct pwrseq_target *target; + bool powered_on; +}; + +static const struct bus_type pwrseq_bus = { + .name = "pwrseq", +}; + +static void pwrseq_release(struct device *dev) +{ + struct pwrseq_device *pwrseq = to_pwrseq_device(dev); + struct pwrseq_target *target, *pos; + + list_for_each_entry_safe(target, pos, &pwrseq->targets, list) { + list_del(&target->list); + pwrseq_target_free(target); + } + + mutex_destroy(&pwrseq->state_lock); + ida_free(&pwrseq_ida, pwrseq->id); + kfree(pwrseq); +} + +static const struct device_type pwrseq_device_type = { + .name = "power_sequencer", + .release = pwrseq_release, +}; + +static int pwrseq_check_unit_deps(const struct pwrseq_unit_data *data, + struct radix_tree_root *visited_units) +{ + const struct pwrseq_unit_data *tmp, **cur; + int ret; + + ret = radix_tree_insert(visited_units, (unsigned long)data, + (void *)data); + if (ret) + return ret; + + for (cur = data->deps; cur && *cur; cur++) { + tmp = radix_tree_lookup(visited_units, (unsigned long)*cur); + if (tmp) { + WARN(1, "Circular dependency in power sequencing flow detected!\n"); + return -EINVAL; + } + + ret = pwrseq_check_unit_deps(*cur, visited_units); + if (ret) + return ret; + } + + return 0; +} + +static int pwrseq_check_target_deps(const struct pwrseq_target_data *data) +{ + struct radix_tree_root visited_units; + struct radix_tree_iter iter; + void __rcu **slot; + int ret; + + if (!data->unit) + return -EINVAL; + + INIT_RADIX_TREE(&visited_units, GFP_KERNEL); + ret = pwrseq_check_unit_deps(data->unit, &visited_units); + radix_tree_for_each_slot(slot, &visited_units, &iter, 0) + radix_tree_delete(&visited_units, iter.index); + + return ret; +} + +static int pwrseq_unit_setup_deps(const struct pwrseq_unit_data **data, + struct list_head *dep_list, + struct list_head *unit_list, + struct radix_tree_root *processed_units); + +static struct pwrseq_unit * +pwrseq_unit_setup(const struct pwrseq_unit_data *data, + struct list_head *unit_list, + struct radix_tree_root *processed_units) +{ + struct pwrseq_unit *unit; + int ret; + + unit = radix_tree_lookup(processed_units, (unsigned long)data); + if (unit) + return pwrseq_unit_get(unit); + + unit = pwrseq_unit_new(data); + if (!unit) + return ERR_PTR(-ENOMEM); + + if (data->deps) { + ret = pwrseq_unit_setup_deps(data->deps, &unit->deps, + unit_list, processed_units); + if (ret) { + pwrseq_unit_put(unit); + return ERR_PTR(ret); + } + } + + ret = radix_tree_insert(processed_units, (unsigned long)data, unit); + if (ret) { + pwrseq_unit_put(unit); + return ERR_PTR(ret); + } + + list_add_tail(&unit->list, unit_list); + + return unit; +} + +static int pwrseq_unit_setup_deps(const struct pwrseq_unit_data **data, + struct list_head *dep_list, + struct list_head *unit_list, + struct radix_tree_root *processed_units) +{ + const struct pwrseq_unit_data *pos; + struct pwrseq_unit_dep *dep; + struct pwrseq_unit *unit; + int i; + + for (i = 0; data[i]; i++) { + pos = data[i]; + + unit = pwrseq_unit_setup(pos, unit_list, processed_units); + if (IS_ERR(unit)) + return PTR_ERR(unit); + + dep = pwrseq_unit_dep_new(unit); + if (!dep) { + pwrseq_unit_put(unit); + return -ENOMEM; + } + + list_add_tail(&dep->list, dep_list); + } + + return 0; +} + +static int pwrseq_do_setup_targets(const struct pwrseq_target_data **data, + struct pwrseq_device *pwrseq, + struct radix_tree_root *processed_units) +{ + const struct pwrseq_target_data *pos; + struct pwrseq_target *target; + int ret, i; + + for (i = 0; data[i]; i++) { + pos = data[i]; + + ret = pwrseq_check_target_deps(pos); + if (ret) + return ret; + + target = pwrseq_target_new(pos); + if (!target) + return -ENOMEM; + + target->unit = pwrseq_unit_setup(pos->unit, &pwrseq->units, + processed_units); + if (IS_ERR(target->unit)) { + ret = PTR_ERR(target->unit); + pwrseq_target_free(target); + return ret; + } + + list_add_tail(&target->list, &pwrseq->targets); + } + + return 0; +} + +static int pwrseq_setup_targets(const struct pwrseq_target_data **targets, + struct pwrseq_device *pwrseq) +{ + struct radix_tree_root processed_units; + struct radix_tree_iter iter; + void __rcu **slot; + int ret; + + INIT_RADIX_TREE(&processed_units, GFP_KERNEL); + ret = pwrseq_do_setup_targets(targets, pwrseq, &processed_units); + radix_tree_for_each_slot(slot, &processed_units, &iter, 0) + radix_tree_delete(&processed_units, iter.index); + + return ret; +} + +/** + * pwrseq_device_register() - Register a new power sequencer. + * @config: Configuration of the new power sequencing device. + * + * The config structure is only used during the call and can be freed after + * the function returns. The config structure *must* have the parent device + * as well as the match() callback and at least one target set. + * + * Returns: + * Returns the address of the new pwrseq device or ERR_PTR() on failure. + */ +struct pwrseq_device * +pwrseq_device_register(const struct pwrseq_config *config) +{ + struct pwrseq_device *pwrseq; + int ret, id; + + if (!config->parent || !config->match || !config->targets || + !config->targets[0]) + return ERR_PTR(-EINVAL); + + pwrseq = kzalloc(sizeof(*pwrseq), GFP_KERNEL); + if (!pwrseq) + return ERR_PTR(-ENOMEM); + + pwrseq->dev.type = &pwrseq_device_type; + pwrseq->dev.bus = &pwrseq_bus; + pwrseq->dev.parent = config->parent; + device_set_node(&pwrseq->dev, dev_fwnode(config->parent)); + dev_set_drvdata(&pwrseq->dev, config->drvdata); + + id = ida_alloc(&pwrseq_ida, GFP_KERNEL); + if (id < 0) { + kfree(pwrseq); + return ERR_PTR(id); + } + + pwrseq->id = id; + + /* + * From this point onwards the device's release() callback is + * responsible for freeing resources. + */ + device_initialize(&pwrseq->dev); + + ret = dev_set_name(&pwrseq->dev, "pwrseq.%d", pwrseq->id); + if (ret) + goto err_put_pwrseq; + + pwrseq->owner = config->owner ?: THIS_MODULE; + pwrseq->match = config->match; + + init_rwsem(&pwrseq->rw_lock); + mutex_init(&pwrseq->state_lock); + INIT_LIST_HEAD(&pwrseq->targets); + INIT_LIST_HEAD(&pwrseq->units); + + ret = pwrseq_setup_targets(config->targets, pwrseq); + if (ret) + goto err_put_pwrseq; + + scoped_guard(rwsem_write, &pwrseq_sem) { + ret = device_add(&pwrseq->dev); + if (ret) + goto err_put_pwrseq; + } + + return pwrseq; + +err_put_pwrseq: + pwrseq_device_put(pwrseq); + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(pwrseq_device_register); + +/** + * pwrseq_device_unregister() - Unregister the power sequencer. + * @pwrseq: Power sequencer to unregister. + */ +void pwrseq_device_unregister(struct pwrseq_device *pwrseq) +{ + struct device *dev = &pwrseq->dev; + struct pwrseq_target *target; + + scoped_guard(mutex, &pwrseq->state_lock) { + guard(rwsem_write)(&pwrseq->rw_lock); + + list_for_each_entry(target, &pwrseq->targets, list) + WARN(target->unit->enable_count, + "REMOVING POWER SEQUENCER WITH ACTIVE USERS\n"); + + guard(rwsem_write)(&pwrseq_sem); + + device_del(dev); + } + + pwrseq_device_put(pwrseq); +} +EXPORT_SYMBOL_GPL(pwrseq_device_unregister); + +static void devm_pwrseq_device_unregister(void *data) +{ + struct pwrseq_device *pwrseq = data; + + pwrseq_device_unregister(pwrseq); +} + +/** + * devm_pwrseq_device_register() - Managed variant of pwrseq_device_register(). + * @dev: Managing device. + * @config: Configuration of the new power sequencing device. + * + * Returns: + * Returns the address of the new pwrseq device or ERR_PTR() on failure. + */ +struct pwrseq_device * +devm_pwrseq_device_register(struct device *dev, + const struct pwrseq_config *config) +{ + struct pwrseq_device *pwrseq; + int ret; + + pwrseq = pwrseq_device_register(config); + if (IS_ERR(pwrseq)) + return pwrseq; + + ret = devm_add_action_or_reset(dev, devm_pwrseq_device_unregister, + pwrseq); + if (ret) + return ERR_PTR(ret); + + return pwrseq; +} +EXPORT_SYMBOL_GPL(devm_pwrseq_device_register); + +/** + * pwrseq_device_get_drvdata() - Get the driver private data associated with + * this sequencer. + * @pwrseq: Power sequencer object. + * + * Returns: + * Address of the private driver data. + */ +void *pwrseq_device_get_drvdata(struct pwrseq_device *pwrseq) +{ + return dev_get_drvdata(&pwrseq->dev); +} +EXPORT_SYMBOL_GPL(pwrseq_device_get_drvdata); + +struct pwrseq_match_data { + struct pwrseq_desc *desc; + struct device *dev; + const char *target; +}; + +static int pwrseq_match_device(struct device *pwrseq_dev, void *data) +{ + struct pwrseq_device *pwrseq = to_pwrseq_device(pwrseq_dev); + struct pwrseq_match_data *match_data = data; + struct pwrseq_target *target; + int ret; + + lockdep_assert_held_read(&pwrseq_sem); + + guard(rwsem_read)(&pwrseq->rw_lock); + if (!device_is_registered(&pwrseq->dev)) + return 0; + + ret = pwrseq->match(pwrseq, match_data->dev); + if (ret <= 0) + return ret; + + /* We got the matching device, let's find the right target. */ + list_for_each_entry(target, &pwrseq->targets, list) { + if (strcmp(target->name, match_data->target)) + continue; + + match_data->desc->target = target; + } + + /* + * This device does not have this target. No point in deferring as it + * will not get a new target dynamically later. + */ + if (!match_data->desc->target) + return -ENOENT; + + if (!try_module_get(pwrseq->owner)) + return -EPROBE_DEFER; + + match_data->desc->pwrseq = pwrseq_device_get(pwrseq); + + return 1; +} + +/** + * pwrseq_get() - Get the power sequencer associated with this device. + * @dev: Device for which to get the sequencer. + * @target: Name of the target exposed by the sequencer this device wants to + * reach. + * + * Returns: + * New power sequencer descriptor for use by the consumer driver or ERR_PTR() + * on failure. + */ +struct pwrseq_desc *pwrseq_get(struct device *dev, const char *target) +{ + struct pwrseq_match_data match_data; + int ret; + + struct pwrseq_desc *desc __free(kfree) = kzalloc(sizeof(*desc), + GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); + + match_data.desc = desc; + match_data.dev = dev; + match_data.target = target; + + guard(rwsem_read)(&pwrseq_sem); + + ret = bus_for_each_dev(&pwrseq_bus, NULL, &match_data, + pwrseq_match_device); + if (ret < 0) + return ERR_PTR(ret); + if (ret == 0) + /* No device matched. */ + return ERR_PTR(-EPROBE_DEFER); + + return no_free_ptr(desc); +} +EXPORT_SYMBOL_GPL(pwrseq_get); + +/** + * pwrseq_put() - Release the power sequencer descriptor. + * @desc: Descriptor to release. + */ +void pwrseq_put(struct pwrseq_desc *desc) +{ + struct pwrseq_device *pwrseq; + + if (!desc) + return; + + pwrseq = desc->pwrseq; + + if (desc->powered_on) + pwrseq_power_off(desc); + + kfree(desc); + module_put(pwrseq->owner); + pwrseq_device_put(pwrseq); +} +EXPORT_SYMBOL_GPL(pwrseq_put); + +static void devm_pwrseq_put(void *data) +{ + struct pwrseq_desc *desc = data; + + pwrseq_put(desc); +} + +/** + * devm_pwrseq_get() - Managed variant of pwrseq_get(). + * @dev: Device for which to get the sequencer and which also manages its + * lifetime. + * @target: Name of the target exposed by the sequencer this device wants to + * reach. + * + * Returns: + * New power sequencer descriptor for use by the consumer driver or ERR_PTR() + * on failure. + */ +struct pwrseq_desc *devm_pwrseq_get(struct device *dev, const char *target) +{ + struct pwrseq_desc *desc; + int ret; + + desc = pwrseq_get(dev, target); + if (IS_ERR(desc)) + return desc; + + ret = devm_add_action_or_reset(dev, devm_pwrseq_put, desc); + if (ret) + return ERR_PTR(ret); + + return desc; +} +EXPORT_SYMBOL_GPL(devm_pwrseq_get); + +static int pwrseq_unit_enable(struct pwrseq_device *pwrseq, + struct pwrseq_unit *target); +static int pwrseq_unit_disable(struct pwrseq_device *pwrseq, + struct pwrseq_unit *target); + +static int pwrseq_unit_enable_deps(struct pwrseq_device *pwrseq, + struct list_head *list) +{ + struct pwrseq_unit_dep *pos; + int ret = 0; + + list_for_each_entry(pos, list, list) { + ret = pwrseq_unit_enable(pwrseq, pos->unit); + if (ret) { + list_for_each_entry_continue_reverse(pos, list, list) + pwrseq_unit_disable(pwrseq, pos->unit); + break; + } + } + + return ret; +} + +static int pwrseq_unit_disable_deps(struct pwrseq_device *pwrseq, + struct list_head *list) +{ + struct pwrseq_unit_dep *pos; + int ret = 0; + + list_for_each_entry_reverse(pos, list, list) { + ret = pwrseq_unit_disable(pwrseq, pos->unit); + if (ret) { + list_for_each_entry_continue(pos, list, list) + pwrseq_unit_enable(pwrseq, pos->unit); + break; + } + } + + return ret; +} + +static int pwrseq_unit_enable(struct pwrseq_device *pwrseq, + struct pwrseq_unit *unit) +{ + int ret; + + lockdep_assert_held_read(&pwrseq->rw_lock); + lockdep_assert_held(&pwrseq->state_lock); + + if (unit->enable_count != 0) { + unit->enable_count++; + return 0; + } + + ret = pwrseq_unit_enable_deps(pwrseq, &unit->deps); + if (ret) { + dev_err(&pwrseq->dev, + "Failed to enable dependencies before power-on for target '%s': %d\n", + unit->name, ret); + return ret; + } + + if (unit->enable) { + ret = unit->enable(pwrseq); + if (ret) { + dev_err(&pwrseq->dev, + "Failed to enable target '%s': %d\n", + unit->name, ret); + pwrseq_unit_disable_deps(pwrseq, &unit->deps); + return ret; + } + } + + unit->enable_count++; + + return 0; +} + +static int pwrseq_unit_disable(struct pwrseq_device *pwrseq, + struct pwrseq_unit *unit) +{ + int ret; + + lockdep_assert_held_read(&pwrseq->rw_lock); + lockdep_assert_held(&pwrseq->state_lock); + + if (unit->enable_count == 0) { + WARN(1, "Unmatched power-off for target '%s'\n", + unit->name); + return -EBUSY; + } + + if (unit->enable_count != 1) { + unit->enable_count--; + return 0; + } + + if (unit->disable) { + ret = unit->disable(pwrseq); + if (ret) { + dev_err(&pwrseq->dev, + "Failed to disable target '%s': %d\n", + unit->name, ret); + return ret; + } + } + + ret = pwrseq_unit_disable_deps(pwrseq, &unit->deps); + if (ret) { + dev_err(&pwrseq->dev, + "Failed to disable dependencies after power-off for target '%s': %d\n", + unit->name, ret); + if (unit->enable) + unit->enable(pwrseq); + return ret; + } + + unit->enable_count--; + + return 0; +} + +/** + * pwrseq_power_on() - Issue a power-on request on behalf of the consumer + * device. + * @desc: Descriptor referencing the power sequencer. + * + * This function tells the power sequencer that the consumer wants to be + * powered-up. The sequencer may already have powered-up the device in which + * case the function returns 0. If the power-up sequence is already in + * progress, the function will block until it's done and return 0. If this is + * the first request, the device will be powered up. + * + * Returns: + * 0 on success, negative error number on failure. + */ +int pwrseq_power_on(struct pwrseq_desc *desc) +{ + struct pwrseq_device *pwrseq; + struct pwrseq_target *target; + struct pwrseq_unit *unit; + int ret; + + might_sleep(); + + if (!desc || desc->powered_on) + return 0; + + pwrseq = desc->pwrseq; + target = desc->target; + unit = target->unit; + + guard(rwsem_read)(&pwrseq->rw_lock); + if (!device_is_registered(&pwrseq->dev)) + return -ENODEV; + + scoped_guard(mutex, &pwrseq->state_lock) { + ret = pwrseq_unit_enable(pwrseq, unit); + if (!ret) + desc->powered_on = true; + } + + if (target->post_enable) { + ret = target->post_enable(pwrseq); + if (ret) { + pwrseq_unit_disable(pwrseq, unit); + desc->powered_on = false; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(pwrseq_power_on); + +/** + * pwrseq_power_off() - Issue a power-off request on behalf of the consumer + * device. + * @desc: Descriptor referencing the power sequencer. + * + * This undoes the effects of pwrseq_power_on(). It issues a power-off request + * on behalf of the consumer and when the last remaining user does so, the + * power-down sequence will be started. If one is in progress, the function + * will block until it's complete and then return. + * + * Returns: + * 0 on success, negative error number on failure. + */ +int pwrseq_power_off(struct pwrseq_desc *desc) +{ + struct pwrseq_device *pwrseq; + struct pwrseq_unit *unit; + int ret; + + might_sleep(); + + if (!desc || !desc->powered_on) + return 0; + + pwrseq = desc->pwrseq; + unit = desc->target->unit; + + guard(rwsem_read)(&pwrseq->rw_lock); + if (!device_is_registered(&pwrseq->dev)) + return -ENODEV; + + guard(mutex)(&pwrseq->state_lock); + + ret = pwrseq_unit_disable(pwrseq, unit); + if (!ret) + desc->powered_on = false; + + return ret; +} +EXPORT_SYMBOL_GPL(pwrseq_power_off); + +#if IS_ENABLED(CONFIG_DEBUG_FS) + +struct pwrseq_debugfs_count_ctx { + struct device *dev; + loff_t index; +}; + +static int pwrseq_debugfs_seq_count(struct device *dev, void *data) +{ + struct pwrseq_debugfs_count_ctx *ctx = data; + + ctx->dev = dev; + + return ctx->index-- ? 0 : 1; +} + +static void *pwrseq_debugfs_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct pwrseq_debugfs_count_ctx ctx; + + ctx.dev = NULL; + ctx.index = *pos; + + /* + * We're holding the lock for the entire printout so no need to fiddle + * with device reference count. + */ + down_read(&pwrseq_sem); + + bus_for_each_dev(&pwrseq_bus, NULL, &ctx, pwrseq_debugfs_seq_count); + if (!ctx.index) + return NULL; + + return ctx.dev; +} + +static void *pwrseq_debugfs_seq_next(struct seq_file *seq, void *data, + loff_t *pos) +{ + struct device *curr = data; + + ++*pos; + + struct device *next __free(put_device) = + bus_find_next_device(&pwrseq_bus, curr); + return next; +} + +static void pwrseq_debugfs_seq_show_target(struct seq_file *seq, + struct pwrseq_target *target) +{ + seq_printf(seq, " target: [%s] (target unit: [%s])\n", + target->name, target->unit->name); +} + +static void pwrseq_debugfs_seq_show_unit(struct seq_file *seq, + struct pwrseq_unit *unit) +{ + struct pwrseq_unit_dep *ref; + + seq_printf(seq, " unit: [%s] - enable count: %u\n", + unit->name, unit->enable_count); + + if (list_empty(&unit->deps)) + return; + + seq_puts(seq, " dependencies:\n"); + list_for_each_entry(ref, &unit->deps, list) + seq_printf(seq, " [%s]\n", ref->unit->name); +} + +static int pwrseq_debugfs_seq_show(struct seq_file *seq, void *data) +{ + struct device *dev = data; + struct pwrseq_device *pwrseq = to_pwrseq_device(dev); + struct pwrseq_target *target; + struct pwrseq_unit *unit; + + seq_printf(seq, "%s:\n", dev_name(dev)); + + seq_puts(seq, " targets:\n"); + list_for_each_entry(target, &pwrseq->targets, list) + pwrseq_debugfs_seq_show_target(seq, target); + + seq_puts(seq, " units:\n"); + list_for_each_entry(unit, &pwrseq->units, list) + pwrseq_debugfs_seq_show_unit(seq, unit); + + return 0; +} + +static void pwrseq_debugfs_seq_stop(struct seq_file *seq, void *data) +{ + up_read(&pwrseq_sem); +} + +static const struct seq_operations pwrseq_debugfs_sops = { + .start = pwrseq_debugfs_seq_start, + .next = pwrseq_debugfs_seq_next, + .show = pwrseq_debugfs_seq_show, + .stop = pwrseq_debugfs_seq_stop, +}; +DEFINE_SEQ_ATTRIBUTE(pwrseq_debugfs); + +static struct dentry *pwrseq_debugfs_dentry; + +#endif /* CONFIG_DEBUG_FS */ + +static int __init pwrseq_init(void) +{ + int ret; + + ret = bus_register(&pwrseq_bus); + if (ret) { + pr_err("Failed to register the power sequencer bus\n"); + return ret; + } + +#if IS_ENABLED(CONFIG_DEBUG_FS) + pwrseq_debugfs_dentry = debugfs_create_file("pwrseq", 0444, NULL, NULL, + &pwrseq_debugfs_fops); +#endif /* CONFIG_DEBUG_FS */ + + return 0; +} +subsys_initcall(pwrseq_init); + +static void __exit pwrseq_exit(void) +{ +#if IS_ENABLED(CONFIG_DEBUG_FS) + debugfs_remove_recursive(pwrseq_debugfs_dentry); +#endif /* CONFIG_DEBUG_FS */ + + bus_unregister(&pwrseq_bus); +} +module_exit(pwrseq_exit); + +MODULE_AUTHOR("Bartosz Golaszewski "); +MODULE_DESCRIPTION("Power Sequencing subsystem core"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/pwrseq/consumer.h b/include/linux/pwrseq/consumer.h new file mode 100644 index 000000000000..7d583b4f266e --- /dev/null +++ b/include/linux/pwrseq/consumer.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __POWER_SEQUENCING_CONSUMER_H__ +#define __POWER_SEQUENCING_CONSUMER_H__ + +#include + +struct device; +struct pwrseq_desc; + +#if IS_ENABLED(CONFIG_POWER_SEQUENCING) + +struct pwrseq_desc * __must_check +pwrseq_get(struct device *dev, const char *target); +void pwrseq_put(struct pwrseq_desc *desc); + +struct pwrseq_desc * __must_check +devm_pwrseq_get(struct device *dev, const char *target); + +int pwrseq_power_on(struct pwrseq_desc *desc); +int pwrseq_power_off(struct pwrseq_desc *desc); + +#else /* CONFIG_POWER_SEQUENCING */ + +static inline struct pwrseq_desc * __must_check +pwrseq_get(struct device *dev, const char *target) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void pwrseq_put(struct pwrseq_desc *desc) +{ +} + +static inline struct pwrseq_desc * __must_check +devm_pwrseq_get(struct device *dev, const char *target) +{ + return ERR_PTR(-ENOSYS); +} + +static inline int pwrseq_power_on(struct pwrseq_desc *desc) +{ + return -ENOSYS; +} + +static inline int pwrseq_power_off(struct pwrseq_desc *desc) +{ + return -ENOSYS; +} + +#endif /* CONFIG_POWER_SEQUENCING */ + +#endif /* __POWER_SEQUENCING_CONSUMER_H__ */ diff --git a/include/linux/pwrseq/provider.h b/include/linux/pwrseq/provider.h new file mode 100644 index 000000000000..cbc3607cbfcf --- /dev/null +++ b/include/linux/pwrseq/provider.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __POWER_SEQUENCING_PROVIDER_H__ +#define __POWER_SEQUENCING_PROVIDER_H__ + +struct device; +struct module; +struct pwrseq_device; + +typedef int (*pwrseq_power_state_func)(struct pwrseq_device *); +typedef int (*pwrseq_match_func)(struct pwrseq_device *, struct device *); + +/** + * struct pwrseq_unit_data - Configuration of a single power sequencing + * unit. + * @name: Name of the unit. + * @deps: Units that must be enabled before this one and disabled after it + * in the order they come in this array. Must be NULL-terminated. + * @enable: Callback running the part of the power-on sequence provided by + * this unit. + * @disable: Callback running the part of the power-off sequence provided + * by this unit. + */ +struct pwrseq_unit_data { + const char *name; + const struct pwrseq_unit_data **deps; + pwrseq_power_state_func enable; + pwrseq_power_state_func disable; +}; + +/** + * struct pwrseq_target_data - Configuration of a power sequencing target. + * @name: Name of the target. + * @unit: Final unit that this target must reach in order to be considered + * enabled. + * @post_enable: Callback run after the target unit has been enabled, *after* + * the state lock has been released. It's useful for implementing + * boot-up delays without blocking other users from powering up + * using the same power sequencer. + */ +struct pwrseq_target_data { + const char *name; + const struct pwrseq_unit_data *unit; + pwrseq_power_state_func post_enable; +}; + +/** + * struct pwrseq_config - Configuration used for registering a new provider. + * @parent: Parent device for the sequencer. Must be set. + * @owner: Module providing this device. + * @drvdata: Private driver data. + * @match: Provider callback used to match the consumer device to the sequencer. + * @targets: Array of targets for this power sequencer. Must be NULL-terminated. + */ +struct pwrseq_config { + struct device *parent; + struct module *owner; + void *drvdata; + pwrseq_match_func match; + const struct pwrseq_target_data **targets; +}; + +struct pwrseq_device * +pwrseq_device_register(const struct pwrseq_config *config); +void pwrseq_device_unregister(struct pwrseq_device *pwrseq); +struct pwrseq_device * +devm_pwrseq_device_register(struct device *dev, + const struct pwrseq_config *config); + +void *pwrseq_device_get_drvdata(struct pwrseq_device *pwrseq); + +#endif /* __POWER_SEQUENCING_PROVIDER_H__ */ -- cgit v1.2.3 From 000d1940c90984a9a2af9c02bc17e3ca0d87f71d Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Jun 2024 16:22:58 +0300 Subject: drm/connector: hdmi: allow disabling Audio Infoframe Add drm_atomic_helper_connector_hdmi_disable_audio_infoframe(), an API to allow the driver disable sending the Audio Infoframe. This is to be used by the drivers if setup of the infoframes is not tightly coupled with the audio functionality and just disabling the audio playback doesn't stop the HDMI hardware from sending the Infoframe. Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240607-bridge-hdmi-connector-v5-1-ab384e6021af@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/display/drm_hdmi_state_helper.c | 36 +++++++++++++++++++++++++ include/drm/display/drm_hdmi_state_helper.h | 1 + 2 files changed, 37 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_hdmi_state_helper.c b/drivers/gpu/drm/display/drm_hdmi_state_helper.c index 437270c29210..2dab3ad8ce64 100644 --- a/drivers/gpu/drm/display/drm_hdmi_state_helper.c +++ b/drivers/gpu/drm/display/drm_hdmi_state_helper.c @@ -714,3 +714,39 @@ drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *co return ret; } EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_update_audio_infoframe); + +/** + * drm_atomic_helper_connector_hdmi_disable_audio_infoframe - Stop sending the Audio Infoframe + * @connector: A pointer to the HDMI connector + * + * This function is meant for HDMI connector drivers to stop sending their + * audio infoframe. It will typically be used in one of the ALSA hooks + * (most likely shutdown). + * + * Returns: + * Zero on success, error code on failure. + */ +int +drm_atomic_helper_connector_hdmi_disable_audio_infoframe(struct drm_connector *connector) +{ + struct drm_connector_hdmi_infoframe *infoframe = + &connector->hdmi.infoframes.audio; + struct drm_display_info *info = &connector->display_info; + int ret; + + if (!info->is_hdmi) + return 0; + + mutex_lock(&connector->hdmi.infoframes.lock); + + infoframe->set = false; + + ret = clear_infoframe(connector, infoframe); + + memset(&infoframe->data, 0, sizeof(infoframe->data)); + + mutex_unlock(&connector->hdmi.infoframes.lock); + + return ret; +} +EXPORT_SYMBOL(drm_atomic_helper_connector_hdmi_disable_audio_infoframe); diff --git a/include/drm/display/drm_hdmi_state_helper.h b/include/drm/display/drm_hdmi_state_helper.h index eb162ff24de0..285f366cf716 100644 --- a/include/drm/display/drm_hdmi_state_helper.h +++ b/include/drm/display/drm_hdmi_state_helper.h @@ -16,6 +16,7 @@ int drm_atomic_helper_connector_hdmi_check(struct drm_connector *connector, int drm_atomic_helper_connector_hdmi_update_audio_infoframe(struct drm_connector *connector, struct hdmi_audio_infoframe *frame); +int drm_atomic_helper_connector_hdmi_disable_audio_infoframe(struct drm_connector *connector); int drm_atomic_helper_connector_hdmi_update_infoframes(struct drm_connector *connector, struct drm_atomic_state *state); -- cgit v1.2.3 From 6b4468b0c6ba37a16795da567b58dc80bc7fb439 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 7 Jun 2024 16:23:00 +0300 Subject: drm/bridge-connector: implement glue code for HDMI connector In order to let bridge chains implement HDMI connector infrastructure, add necessary glue code to the drm_bridge_connector. In case there is a bridge that sets DRM_BRIDGE_OP_HDMI, drm_bridge_connector will register itself as a HDMI connector and provide proxy drm_connector_hdmi_funcs implementation. Note, to simplify implementation, there can be only one bridge in a chain that sets DRM_BRIDGE_OP_HDMI. Setting more than one is considered an error. This limitation can be lifted later, if the need arises. Acked-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240607-bridge-hdmi-connector-v5-3-ab384e6021af@linaro.org Signed-off-by: Dmitry Baryshkov --- drivers/gpu/drm/drm_bridge_connector.c | 94 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/drm_debugfs.c | 2 + include/drm/drm_bridge.h | 81 +++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index e093fc8928dc..0869b663f17e 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -18,6 +18,7 @@ #include #include #include +#include /** * DOC: overview @@ -87,6 +88,13 @@ struct drm_bridge_connector { * connector modes detection, if any (see &DRM_BRIDGE_OP_MODES). */ struct drm_bridge *bridge_modes; + /** + * @bridge_hdmi: + * + * The bridge in the chain that implements necessary support for the + * HDMI connector infrastructure, if any (see &DRM_BRIDGE_OP_HDMI). + */ + struct drm_bridge *bridge_hdmi; }; #define to_drm_bridge_connector(x) \ @@ -287,6 +295,60 @@ static const struct drm_connector_helper_funcs drm_bridge_connector_helper_funcs .disable_hpd = drm_bridge_connector_disable_hpd, }; +static enum drm_mode_status +drm_bridge_connector_tmds_char_rate_valid(const struct drm_connector *connector, + const struct drm_display_mode *mode, + unsigned long long tmds_rate) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return MODE_ERROR; + + if (bridge->funcs->hdmi_tmds_char_rate_valid) + return bridge->funcs->hdmi_tmds_char_rate_valid(bridge, mode, tmds_rate); + else + return MODE_OK; +} + +static int drm_bridge_connector_clear_infoframe(struct drm_connector *connector, + enum hdmi_infoframe_type type) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return -EINVAL; + + return bridge->funcs->hdmi_clear_infoframe(bridge, type); +} + +static int drm_bridge_connector_write_infoframe(struct drm_connector *connector, + enum hdmi_infoframe_type type, + const u8 *buffer, size_t len) +{ + struct drm_bridge_connector *bridge_connector = + to_drm_bridge_connector(connector); + struct drm_bridge *bridge; + + bridge = bridge_connector->bridge_hdmi; + if (!bridge) + return -EINVAL; + + return bridge->funcs->hdmi_write_infoframe(bridge, type, buffer, len); +} + +static const struct drm_connector_hdmi_funcs drm_bridge_connector_hdmi_funcs = { + .tmds_char_rate_valid = drm_bridge_connector_tmds_char_rate_valid, + .clear_infoframe = drm_bridge_connector_clear_infoframe, + .write_infoframe = drm_bridge_connector_write_infoframe, +}; + /* ----------------------------------------------------------------------------- * Bridge Connector Initialisation */ @@ -312,6 +374,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, struct drm_connector *connector; struct i2c_adapter *ddc = NULL; struct drm_bridge *bridge, *panel_bridge = NULL; + unsigned int supported_formats = BIT(HDMI_COLORSPACE_RGB); + unsigned int max_bpc = 8; int connector_type; int ret; @@ -348,6 +412,20 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, bridge_connector->bridge_detect = bridge; if (bridge->ops & DRM_BRIDGE_OP_MODES) bridge_connector->bridge_modes = bridge; + if (bridge->ops & DRM_BRIDGE_OP_HDMI) { + if (bridge_connector->bridge_hdmi) + return ERR_PTR(-EBUSY); + if (!bridge->funcs->hdmi_write_infoframe || + !bridge->funcs->hdmi_clear_infoframe) + return ERR_PTR(-EINVAL); + + bridge_connector->bridge_hdmi = bridge; + + if (bridge->supported_formats) + supported_formats = bridge->supported_formats; + if (bridge->max_bpc) + max_bpc = bridge->max_bpc; + } if (!drm_bridge_get_next_bridge(bridge)) connector_type = bridge->type; @@ -370,9 +448,19 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, return ERR_PTR(-EINVAL); } - ret = drmm_connector_init(drm, connector, - &drm_bridge_connector_funcs, - connector_type, ddc); + if (bridge_connector->bridge_hdmi) + ret = drmm_connector_hdmi_init(drm, connector, + bridge_connector->bridge_hdmi->vendor, + bridge_connector->bridge_hdmi->product, + &drm_bridge_connector_funcs, + &drm_bridge_connector_hdmi_funcs, + connector_type, ddc, + supported_formats, + max_bpc); + else + ret = drmm_connector_init(drm, connector, + &drm_bridge_connector_funcs, + connector_type, ddc); if (ret) { kfree(bridge_connector); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c index 8bec99251bee..6b239a24f1df 100644 --- a/drivers/gpu/drm/drm_debugfs.c +++ b/drivers/gpu/drm/drm_debugfs.c @@ -762,6 +762,8 @@ static int bridges_show(struct seq_file *m, void *data) drm_puts(&p, " hpd"); if (bridge->ops & DRM_BRIDGE_OP_MODES) drm_puts(&p, " modes"); + if (bridge->ops & DRM_BRIDGE_OP_HDMI) + drm_puts(&p, " hdmi"); drm_puts(&p, "\n"); } diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h index 5cf41f92d1f0..75019d16be64 100644 --- a/include/drm/drm_bridge.h +++ b/include/drm/drm_bridge.h @@ -630,6 +630,52 @@ struct drm_bridge_funcs { */ void (*hpd_disable)(struct drm_bridge *bridge); + /** + * @hdmi_tmds_char_rate_valid: + * + * Check whether a particular TMDS character rate is supported by the + * driver. + * + * This callback is optional and should only be implemented by the + * bridges that take part in the HDMI connector implementation. Bridges + * that implement it shall set the DRM_BRIDGE_OP_HDMI flag in their + * &drm_bridge->ops. + * + * Returns: + * + * Either &drm_mode_status.MODE_OK or one of the failure reasons + * in &enum drm_mode_status. + */ + enum drm_mode_status + (*hdmi_tmds_char_rate_valid)(const struct drm_bridge *bridge, + const struct drm_display_mode *mode, + unsigned long long tmds_rate); + + /** + * @hdmi_clear_infoframe: + * + * This callback clears the infoframes in the hardware during commit. + * It will be called multiple times, once for every disabled infoframe + * type. + * + * This callback is optional but it must be implemented by bridges that + * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops. + */ + int (*hdmi_clear_infoframe)(struct drm_bridge *bridge, + enum hdmi_infoframe_type type); + /** + * @hdmi_write_infoframe: + * + * Program the infoframe into the hardware. It will be called multiple + * times, once for every updated infoframe type. + * + * This callback is optional but it must be implemented by bridges that + * set the DRM_BRIDGE_OP_HDMI flag in their &drm_bridge->ops. + */ + int (*hdmi_write_infoframe)(struct drm_bridge *bridge, + enum hdmi_infoframe_type type, + const u8 *buffer, size_t len); + /** * @debugfs_init: * @@ -705,6 +751,16 @@ enum drm_bridge_ops { * this flag shall implement the &drm_bridge_funcs->get_modes callback. */ DRM_BRIDGE_OP_MODES = BIT(3), + /** + * @DRM_BRIDGE_OP_HDMI: The bridge provides HDMI connector operations, + * including infoframes support. Bridges that set this flag must + * implement the &drm_bridge_funcs->write_infoframe callback. + * + * Note: currently there can be at most one bridge in a chain that sets + * this bit. This is to simplify corresponding glue code in connector + * drivers. + */ + DRM_BRIDGE_OP_HDMI = BIT(4), }; /** @@ -773,6 +829,31 @@ struct drm_bridge { * @hpd_cb. */ void *hpd_data; + + /** + * @vendor: Vendor of the product to be used for the SPD InfoFrame + * generation. This is required if @DRM_BRIDGE_OP_HDMI is set. + */ + const char *vendor; + + /** + * @product: Name of the product to be used for the SPD InfoFrame + * generation. This is required if @DRM_BRIDGE_OP_HDMI is set. + */ + const char *product; + + /** + * @supported_formats: Bitmask of @hdmi_colorspace listing supported + * output formats. This is only relevant if @DRM_BRIDGE_OP_HDMI is set. + */ + unsigned int supported_formats; + + /** + * @max_bpc: Maximum bits per char the HDMI bridge supports. Allowed + * values are 8, 10 and 12. This is only relevant if + * @DRM_BRIDGE_OP_HDMI is set. + */ + unsigned int max_bpc; }; static inline struct drm_bridge * -- cgit v1.2.3 From 8682ad36870790ad6a9a03ac237c8d87d34b26d5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:43 +0200 Subject: wifi: cfg80211: use BIT() for flag enums Use BIT(x) instead of 1< Reviewed-by: Ilan Peer Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.c21598fbf49c.Ib8f26c5e9f508aee19fdfa1fd4b5995f084c46d4@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 102 ++++++++++++++++++++++++------------------------- 1 file changed, 51 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5da9bb0ac6a4..051d4eb227bf 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -127,31 +127,31 @@ struct wiphy; * even if it is otherwise disabled. */ enum ieee80211_channel_flags { - IEEE80211_CHAN_DISABLED = 1<<0, - IEEE80211_CHAN_NO_IR = 1<<1, - IEEE80211_CHAN_PSD = 1<<2, - IEEE80211_CHAN_RADAR = 1<<3, - IEEE80211_CHAN_NO_HT40PLUS = 1<<4, - IEEE80211_CHAN_NO_HT40MINUS = 1<<5, - IEEE80211_CHAN_NO_OFDM = 1<<6, - IEEE80211_CHAN_NO_80MHZ = 1<<7, - IEEE80211_CHAN_NO_160MHZ = 1<<8, - IEEE80211_CHAN_INDOOR_ONLY = 1<<9, - IEEE80211_CHAN_IR_CONCURRENT = 1<<10, - IEEE80211_CHAN_NO_20MHZ = 1<<11, - IEEE80211_CHAN_NO_10MHZ = 1<<12, - IEEE80211_CHAN_NO_HE = 1<<13, - IEEE80211_CHAN_1MHZ = 1<<14, - IEEE80211_CHAN_2MHZ = 1<<15, - IEEE80211_CHAN_4MHZ = 1<<16, - IEEE80211_CHAN_8MHZ = 1<<17, - IEEE80211_CHAN_16MHZ = 1<<18, - IEEE80211_CHAN_NO_320MHZ = 1<<19, - IEEE80211_CHAN_NO_EHT = 1<<20, - IEEE80211_CHAN_DFS_CONCURRENT = 1<<21, - IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = 1<<22, - IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = 1<<23, - IEEE80211_CHAN_CAN_MONITOR = 1<<24, + IEEE80211_CHAN_DISABLED = BIT(0), + IEEE80211_CHAN_NO_IR = BIT(1), + IEEE80211_CHAN_PSD = BIT(2), + IEEE80211_CHAN_RADAR = BIT(3), + IEEE80211_CHAN_NO_HT40PLUS = BIT(4), + IEEE80211_CHAN_NO_HT40MINUS = BIT(5), + IEEE80211_CHAN_NO_OFDM = BIT(6), + IEEE80211_CHAN_NO_80MHZ = BIT(7), + IEEE80211_CHAN_NO_160MHZ = BIT(8), + IEEE80211_CHAN_INDOOR_ONLY = BIT(9), + IEEE80211_CHAN_IR_CONCURRENT = BIT(10), + IEEE80211_CHAN_NO_20MHZ = BIT(11), + IEEE80211_CHAN_NO_10MHZ = BIT(12), + IEEE80211_CHAN_NO_HE = BIT(13), + IEEE80211_CHAN_1MHZ = BIT(14), + IEEE80211_CHAN_2MHZ = BIT(15), + IEEE80211_CHAN_4MHZ = BIT(16), + IEEE80211_CHAN_8MHZ = BIT(17), + IEEE80211_CHAN_16MHZ = BIT(18), + IEEE80211_CHAN_NO_320MHZ = BIT(19), + IEEE80211_CHAN_NO_EHT = BIT(20), + IEEE80211_CHAN_DFS_CONCURRENT = BIT(21), + IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), + IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), + IEEE80211_CHAN_CAN_MONITOR = BIT(24), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -229,13 +229,13 @@ struct ieee80211_channel { * @IEEE80211_RATE_SUPPORTS_10MHZ: Rate can be used in 10 MHz mode */ enum ieee80211_rate_flags { - IEEE80211_RATE_SHORT_PREAMBLE = 1<<0, - IEEE80211_RATE_MANDATORY_A = 1<<1, - IEEE80211_RATE_MANDATORY_B = 1<<2, - IEEE80211_RATE_MANDATORY_G = 1<<3, - IEEE80211_RATE_ERP_G = 1<<4, - IEEE80211_RATE_SUPPORTS_5MHZ = 1<<5, - IEEE80211_RATE_SUPPORTS_10MHZ = 1<<6, + IEEE80211_RATE_SHORT_PREAMBLE = BIT(0), + IEEE80211_RATE_MANDATORY_A = BIT(1), + IEEE80211_RATE_MANDATORY_B = BIT(2), + IEEE80211_RATE_MANDATORY_G = BIT(3), + IEEE80211_RATE_ERP_G = BIT(4), + IEEE80211_RATE_SUPPORTS_5MHZ = BIT(5), + IEEE80211_RATE_SUPPORTS_10MHZ = BIT(6), }; /** @@ -1957,9 +1957,9 @@ struct rate_info { * @BSS_PARAM_FLAGS_SHORT_SLOT_TIME: whether short slot time is enabled */ enum bss_param_flags { - BSS_PARAM_FLAGS_CTS_PROT = 1<<0, - BSS_PARAM_FLAGS_SHORT_PREAMBLE = 1<<1, - BSS_PARAM_FLAGS_SHORT_SLOT_TIME = 1<<2, + BSS_PARAM_FLAGS_CTS_PROT = BIT(0), + BSS_PARAM_FLAGS_SHORT_PREAMBLE = BIT(1), + BSS_PARAM_FLAGS_SHORT_SLOT_TIME = BIT(2), }; /** @@ -2266,13 +2266,13 @@ static inline int cfg80211_get_station(struct net_device *dev, * @MONITOR_FLAG_ACTIVE: active monitor, ACKs frames on its MAC address */ enum monitor_flags { - MONITOR_FLAG_CHANGED = 1<<__NL80211_MNTR_FLAG_INVALID, - MONITOR_FLAG_FCSFAIL = 1< Date: Thu, 23 May 2024 12:09:44 +0200 Subject: wifi: ieee80211: remove unused enum ieee80211_client_reg_power This has never been used, and it's really not directly representing the spec, so shouldn't have been here in the first place. Remove it. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.32ed8fc1522d.Id4480d162e1921478e33d145890dc16c263b57bf@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 769008a51809..456a55bd6c6b 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2426,24 +2426,6 @@ enum ieee80211_ap_reg_power { IEEE80211_REG_AP_POWER_AFTER_LAST - 1, }; -/** - * enum ieee80211_client_reg_power - regulatory power for a client - * - * @IEEE80211_REG_UNSET_CLIENT: Client has no regulatory power mode - * @IEEE80211_REG_DEFAULT_CLIENT: Default Client - * @IEEE80211_REG_SUBORDINATE_CLIENT: Subordinate Client - * @IEEE80211_REG_CLIENT_POWER_AFTER_LAST: internal - * @IEEE80211_REG_CLIENT_POWER_MAX: maximum value - */ -enum ieee80211_client_reg_power { - IEEE80211_REG_UNSET_CLIENT, - IEEE80211_REG_DEFAULT_CLIENT, - IEEE80211_REG_SUBORDINATE_CLIENT, - IEEE80211_REG_CLIENT_POWER_AFTER_LAST, - IEEE80211_REG_CLIENT_POWER_MAX = - IEEE80211_REG_CLIENT_POWER_AFTER_LAST - 1, -}; - /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 -- cgit v1.2.3 From 0a9314ad5f45aeb10326dce33c5d70b85f74ef2d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:45 +0200 Subject: wifi: cfg80211: move enum ieee80211_ap_reg_power to cfg80211 This really shouldn't have been in ieee80211.h, since it doesn't directly represent the spec. Move it to cfg80211 rather than mac80211 since upcoming changes will use it there. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.962b16c831cd.I5745962525b1b176c5b90d37b3720fc100eee406@changeid Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 20 -------------------- include/net/cfg80211.h | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 456a55bd6c6b..30cef3b940eb 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2406,26 +2406,6 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); -/** - * enum ieee80211_ap_reg_power - regulatory power for a Access Point - * - * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode - * @IEEE80211_REG_LPI_AP: Indoor Access Point - * @IEEE80211_REG_SP_AP: Standard power Access Point - * @IEEE80211_REG_VLP_AP: Very low power Access Point - * @IEEE80211_REG_AP_POWER_AFTER_LAST: internal - * @IEEE80211_REG_AP_POWER_MAX: maximum value - */ -enum ieee80211_ap_reg_power { - IEEE80211_REG_UNSET_AP, - IEEE80211_REG_LPI_AP, - IEEE80211_REG_SP_AP, - IEEE80211_REG_VLP_AP, - IEEE80211_REG_AP_POWER_AFTER_LAST, - IEEE80211_REG_AP_POWER_MAX = - IEEE80211_REG_AP_POWER_AFTER_LAST - 1, -}; - /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 051d4eb227bf..fb7d76513e1c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6081,6 +6081,21 @@ void wiphy_delayed_work_cancel(struct wiphy *wiphy, void wiphy_delayed_work_flush(struct wiphy *wiphy, struct wiphy_delayed_work *dwork); +/** + * enum ieee80211_ap_reg_power - regulatory power for an Access Point + * + * @IEEE80211_REG_UNSET_AP: Access Point has no regulatory power mode + * @IEEE80211_REG_LPI_AP: Indoor Access Point + * @IEEE80211_REG_SP_AP: Standard power Access Point + * @IEEE80211_REG_VLP_AP: Very low power Access Point + */ +enum ieee80211_ap_reg_power { + IEEE80211_REG_UNSET_AP, + IEEE80211_REG_LPI_AP, + IEEE80211_REG_SP_AP, + IEEE80211_REG_VLP_AP, +}; + /** * struct wireless_dev - wireless device state * -- cgit v1.2.3 From 9fd171a71b9d4cd8855891c0ba7e2a152139b41a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:48 +0200 Subject: wifi: cfg80211: refactor regulatory beaconing checking There are two functions exported now, with different settings, refactor to just export a single function that take a struct with different settings. This will make it easier to add more parameters. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.d44c34dadfc2.I59b4403108e0dbf7fc6ae8f7522e1af520cffb1c@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 54 ++++++++++++++++++++++++++++++++++++++++++++------ net/wireless/chan.c | 30 +++++++++++----------------- 2 files changed, 60 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fb7d76513e1c..45ffeb110d36 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -8800,6 +8800,31 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, sig_dbm); } +/** + * struct cfg80211_beaconing_check_config - beacon check configuration + * @iftype: the interface type to check for + * @relax: allow IR-relaxation conditions to apply (e.g. another + * interface connected already on the same channel) + * NOTE: If this is set, wiphy mutex must be held. + */ +struct cfg80211_beaconing_check_config { + enum nl80211_iftype iftype; + bool relax; +}; + +/** + * cfg80211_reg_check_beaconing - check if beaconing is allowed + * @wiphy: the wiphy + * @chandef: the channel definition + * @cfg: additional parameters for the checking + * + * Return: %true if there is no secondary channel or the secondary channel(s) + * can be used for beaconing (i.e. is not a radar channel etc.) + */ +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg); + /** * cfg80211_reg_can_beacon - check if beaconing is allowed * @wiphy: the wiphy @@ -8809,9 +8834,17 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * Return: %true if there is no secondary channel or the secondary channel(s) * can be used for beaconing (i.e. is not a radar channel etc.) */ -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_reg_can_beacon_relax - check if beaconing is allowed with relaxation @@ -8826,9 +8859,18 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy, * * Context: Requires the wiphy mutex to be held. */ -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype); +static inline bool +cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + enum nl80211_iftype iftype) +{ + struct cfg80211_beaconing_check_config config = { + .iftype = iftype, + .relax = true, + }; + + return cfg80211_reg_check_beaconing(wiphy, chandef, &config); +} /** * cfg80211_ch_switch_notify - update wdev channel and notify userspace diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 360480604515..8b1796130b28 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1550,22 +1550,12 @@ static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, return res; } -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) -{ - return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, true); -} -EXPORT_SYMBOL(cfg80211_reg_can_beacon); - -bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype) +bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef, + struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - bool check_no_ir; - - lockdep_assert_held(&rdev->wiphy.mtx); + bool check_no_ir = true; /* * Under certain conditions suggested by some regulatory bodies a @@ -1573,12 +1563,16 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * only if such relaxations are not enabled and the conditions are not * met. */ - check_no_ir = !cfg80211_ir_permissive_chan(wiphy, iftype, - chandef->chan); + if (cfg->relax) { + lockdep_assert_held(&rdev->wiphy.mtx); + check_no_ir = !cfg80211_ir_permissive_chan(wiphy, cfg->iftype, + chandef->chan); + } - return _cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, + check_no_ir); } -EXPORT_SYMBOL(cfg80211_reg_can_beacon_relax); +EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct cfg80211_chan_def *chandef) -- cgit v1.2.3 From c1d8bd8d777d55f6708ca6e47c54dbe9f66f9bbb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 23 May 2024 12:09:49 +0200 Subject: wifi: cfg80211: add regulatory flag to allow VLP AP operation Add a regulatory flag to allow VLP AP operation even on channels otherwise marked NO_IR, which may be possible in some regulatory domains/countries. Note that this requires checking also when the beacon is changed, since that may change the regulatory power type. Reviewed-by: Miriam Rachel Korenblit Signed-off-by: Johannes Berg Link: https://msgid.link/20240523120945.63792ce19790.Ie2a02750d283b78fbf3c686b10565fb0388889e2@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/chan.c | 37 ++++++++++++++++++++++++++----------- net/wireless/nl80211.c | 27 +++++++++++++++++++++++++-- net/wireless/reg.c | 2 ++ net/wireless/trace.h | 15 +++++++++------ 6 files changed, 74 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 45ffeb110d36..6f992aff74ae 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -125,6 +125,8 @@ struct wiphy; * @IEEE80211_CHAN_CAN_MONITOR: This channel can be used for monitor * mode even in the presence of other (regulatory) restrictions, * even if it is otherwise disabled. + * @IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP: Allow using this channel for AP operation + * with very low power (VLP), even if otherwise set to NO_IR. */ enum ieee80211_channel_flags { IEEE80211_CHAN_DISABLED = BIT(0), @@ -152,6 +154,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT = BIT(22), IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT = BIT(23), IEEE80211_CHAN_CAN_MONITOR = BIT(24), + IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP = BIT(25), }; #define IEEE80211_CHAN_NO_HT40 \ @@ -8806,9 +8809,12 @@ static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy, * @relax: allow IR-relaxation conditions to apply (e.g. another * interface connected already on the same channel) * NOTE: If this is set, wiphy mutex must be held. + * @reg_power: &enum ieee80211_ap_reg_power value indicating the + * advertised/used 6 GHz regulatory power setting */ struct cfg80211_beaconing_check_config { enum nl80211_iftype iftype; + enum ieee80211_ap_reg_power reg_power; bool relax; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index f917bc6c9b6f..6ae3997061b6 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4277,6 +4277,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_CAN_MONITOR: This channel can be used in monitor * mode despite other (regulatory) restrictions, even if the channel is * otherwise completely disabled. + * @NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP: This channel can be used for a + * very low power (VLP) AP, despite being NO_IR. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -4320,6 +4322,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT, NL80211_FREQUENCY_ATTR_CAN_MONITOR, + NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -4529,6 +4532,8 @@ enum nl80211_sched_scan_match_attr { * Should be used together with %NL80211_RRF_DFS only. * @NL80211_RRF_NO_6GHZ_VLP_CLIENT: Client connection to VLP AP not allowed * @NL80211_RRF_NO_6GHZ_AFC_CLIENT: Client connection to AFC AP not allowed + * @NL80211_RRF_ALLOW_6GHZ_VLP_AP: Very low power (VLP) AP can be permitted + * despite NO_IR configuration. */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -4553,6 +4558,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_DFS_CONCURRENT = 1<<21, NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1<<22, NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1<<23, + NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1<<24, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 8b1796130b28..bf2fdcd42019 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1523,28 +1523,38 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype, - bool check_no_ir) + u32 prohibited_flags, + u32 permitting_flags) { - bool res; - u32 prohibited_flags = IEEE80211_CHAN_DISABLED; + bool res, check_radar; int dfs_required; - trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir); + trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, + prohibited_flags, + permitting_flags); - if (check_no_ir) - prohibited_flags |= IEEE80211_CHAN_NO_IR; + if (!_cfg80211_chandef_usable(wiphy, chandef, + IEEE80211_CHAN_DISABLED, 0)) + return false; dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype); - if (dfs_required != 0) - prohibited_flags |= IEEE80211_CHAN_RADAR; + check_radar = dfs_required != 0; if (dfs_required > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ - prohibited_flags = IEEE80211_CHAN_DISABLED; + prohibited_flags &= ~IEEE80211_CHAN_NO_IR; + check_radar = false; } - res = _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); + if (check_radar && + !_cfg80211_chandef_usable(wiphy, chandef, + IEEE80211_CHAN_RADAR, 0)) + return false; + + res = _cfg80211_chandef_usable(wiphy, chandef, + prohibited_flags, + permitting_flags); trace_cfg80211_return_bool(res); return res; @@ -1555,6 +1565,7 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + u32 permitting_flags = 0; bool check_no_ir = true; /* @@ -1569,8 +1580,12 @@ bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, chandef->chan); } + if (cfg->reg_power == IEEE80211_REG_VLP_AP) + permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; + return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, - check_no_ir); + check_no_ir ? IEEE80211_CHAN_NO_IR : 0, + permitting_flags); } EXPORT_SYMBOL(cfg80211_reg_check_beaconing); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4628280abf1d..a94e73c133f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1207,6 +1207,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -5954,6 +5957,7 @@ static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -6103,8 +6107,13 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } - if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, ¶ms->chandef, - wdev->iftype)) { + beacon_check.iftype = wdev->iftype; + beacon_check.relax = true; + beacon_check.reg_power = + cfg80211_get_6ghz_power_type(params->beacon.tail, + params->beacon.tail_len); + if (!cfg80211_reg_check_beaconing(&rdev->wiphy, ¶ms->chandef, + &beacon_check)) { err = -EINVAL; goto out; } @@ -6261,6 +6270,7 @@ out: static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -6287,6 +6297,19 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (err) goto out; + /* recheck beaconing is permitted with possibly changed power type */ + beacon_check.iftype = wdev->iftype; + beacon_check.relax = true; + beacon_check.reg_power = + cfg80211_get_6ghz_power_type(params->beacon.tail, + params->beacon.tail_len); + if (!cfg80211_reg_check_beaconing(&rdev->wiphy, + &wdev->links[link_id].ap.chandef, + &beacon_check)) { + err = -EINVAL; + goto out; + } + attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; if (attr) { err = nl80211_parse_fils_discovery(rdev, attr, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3cef0021a3db..4a27f3823e25 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1600,6 +1600,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT; if (rd_flags & NL80211_RRF_PSD) channel_flags |= IEEE80211_CHAN_PSD; + if (rd_flags & NL80211_RRF_ALLOW_6GHZ_VLP_AP) + channel_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; return channel_flags; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 6ef9294747e3..5c26f065bd68 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3389,23 +3389,26 @@ TRACE_EVENT(cfg80211_cqm_rssi_notify, TRACE_EVENT(cfg80211_reg_can_beacon, TP_PROTO(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, - enum nl80211_iftype iftype, bool check_no_ir), - TP_ARGS(wiphy, chandef, iftype, check_no_ir), + enum nl80211_iftype iftype, u32 prohibited_flags, + u32 permitting_flags), + TP_ARGS(wiphy, chandef, iftype, prohibited_flags, permitting_flags), TP_STRUCT__entry( WIPHY_ENTRY CHAN_DEF_ENTRY __field(enum nl80211_iftype, iftype) - __field(bool, check_no_ir) + __field(u32, prohibited_flags) + __field(u32, permitting_flags) ), TP_fast_assign( WIPHY_ASSIGN; CHAN_DEF_ASSIGN(chandef); __entry->iftype = iftype; - __entry->check_no_ir = check_no_ir; + __entry->prohibited_flags = prohibited_flags; + __entry->permitting_flags = permitting_flags; ), - TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d check_no_ir=%s", + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", iftype=%d prohibited_flags=0x%x permitting_flags=0x%x", WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->iftype, - BOOL_TO_STR(__entry->check_no_ir)) + __entry->prohibited_flags, __entry->permitting_flags) ); TRACE_EVENT(cfg80211_chandef_dfs_required, -- cgit v1.2.3 From 4565d2652a37e438e4cd729e2a8dfeffe34c958c Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 12 Jun 2024 10:20:17 +0200 Subject: PCI/pwrctl: Add PCI power control core code Some PCI devices must be powered-on before they can be detected on the bus. Introduce a simple framework reusing the existing PCI OF infrastructure. The way this works is: a DT node representing a PCI device connected to the port can be matched against its power control platform driver. If the match succeeds, the driver is responsible for powering-up the device and calling pci_pwrctl_device_set_ready() which will trigger a PCI bus rescan as well as subscribe to PCI bus notifications. When the device is detected and created, we'll make it consume the same DT node that the platform device did. When the device is bound, we'll create a device link between it and the parent power control device. Tested-by: Amit Pundir Tested-by: Neil Armstrong # on SM8550-QRD, SM8650-QRD & SM8650-HDK Tested-by: Caleb Connolly # OnePlus 8T Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20240612082019.19161-5-brgl@bgdev.pl Signed-off-by: Bartosz Golaszewski --- MAINTAINERS | 8 +++ drivers/pci/Kconfig | 1 + drivers/pci/Makefile | 1 + drivers/pci/pwrctl/Kconfig | 8 +++ drivers/pci/pwrctl/Makefile | 4 ++ drivers/pci/pwrctl/core.c | 137 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci-pwrctl.h | 51 +++++++++++++++++ 7 files changed, 210 insertions(+) create mode 100644 drivers/pci/pwrctl/Kconfig create mode 100644 drivers/pci/pwrctl/Makefile create mode 100644 drivers/pci/pwrctl/core.c create mode 100644 include/linux/pci-pwrctl.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 6df73a7e07f6..3377cab34144 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17349,6 +17349,14 @@ F: Documentation/driver-api/pci/p2pdma.rst F: drivers/pci/p2pdma.c F: include/linux/pci-p2pdma.h +PCI POWER CONTROL +M: Bartosz Golaszewski +L: linux-pci@vger.kernel.org +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci.git +F: drivers/pci/pwrctl/* +F: include/linux/pci-pwrctl.h + PCI SUBSYSTEM M: Bjorn Helgaas L: linux-pci@vger.kernel.org diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index d35001589d88..aa4d1833f442 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -296,5 +296,6 @@ source "drivers/pci/hotplug/Kconfig" source "drivers/pci/controller/Kconfig" source "drivers/pci/endpoint/Kconfig" source "drivers/pci/switch/Kconfig" +source "drivers/pci/pwrctl/Kconfig" endif diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 175302036890..8ddad57934a6 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_PCI) += access.o bus.o probe.o host-bridge.o \ obj-$(CONFIG_PCI) += msi/ obj-$(CONFIG_PCI) += pcie/ +obj-$(CONFIG_PCI) += pwrctl/ ifdef CONFIG_PCI obj-$(CONFIG_PROC_FS) += proc.o diff --git a/drivers/pci/pwrctl/Kconfig b/drivers/pci/pwrctl/Kconfig new file mode 100644 index 000000000000..96195395af69 --- /dev/null +++ b/drivers/pci/pwrctl/Kconfig @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menu "PCI Power control drivers" + +config PCI_PWRCTL + tristate + +endmenu diff --git a/drivers/pci/pwrctl/Makefile b/drivers/pci/pwrctl/Makefile new file mode 100644 index 000000000000..52ae0640ef7b --- /dev/null +++ b/drivers/pci/pwrctl/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_PCI_PWRCTL) += pci-pwrctl-core.o +pci-pwrctl-core-y := core.o diff --git a/drivers/pci/pwrctl/core.c b/drivers/pci/pwrctl/core.c new file mode 100644 index 000000000000..feca26ad2f6a --- /dev/null +++ b/drivers/pci/pwrctl/core.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include + +static int pci_pwrctl_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct pci_pwrctl *pwrctl = container_of(nb, struct pci_pwrctl, nb); + struct device *dev = data; + + if (dev_fwnode(dev) != dev_fwnode(pwrctl->dev)) + return NOTIFY_DONE; + + switch (action) { + case BUS_NOTIFY_ADD_DEVICE: + /* + * We will have two struct device objects bound to two different + * drivers on different buses but consuming the same DT node. We + * must not bind the pins twice in this case but only once for + * the first device to be added. + * + * If we got here then the PCI device is the second after the + * power control platform device. Mark its OF node as reused. + */ + dev->of_node_reused = true; + break; + case BUS_NOTIFY_BOUND_DRIVER: + pwrctl->link = device_link_add(dev, pwrctl->dev, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!pwrctl->link) + dev_err(pwrctl->dev, "Failed to add device link\n"); + break; + case BUS_NOTIFY_UNBOUND_DRIVER: + if (pwrctl->link) + device_link_remove(dev, pwrctl->dev); + break; + } + + return NOTIFY_DONE; +} + +/** + * pci_pwrctl_device_set_ready() - Notify the pwrctl subsystem that the PCI + * device is powered-up and ready to be detected. + * + * @pwrctl: PCI power control data. + * + * Returns: + * 0 on success, negative error number on error. + * + * Note: + * This function returning 0 doesn't mean the device was detected. It means, + * that the bus rescan was successfully started. The device will get bound to + * its PCI driver asynchronously. + */ +int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl) +{ + int ret; + + if (!pwrctl->dev) + return -ENODEV; + + pwrctl->nb.notifier_call = pci_pwrctl_notify; + ret = bus_register_notifier(&pci_bus_type, &pwrctl->nb); + if (ret) + return ret; + + pci_lock_rescan_remove(); + pci_rescan_bus(to_pci_dev(pwrctl->dev->parent)->bus); + pci_unlock_rescan_remove(); + + return 0; +} +EXPORT_SYMBOL_GPL(pci_pwrctl_device_set_ready); + +/** + * pci_pwrctl_device_unset_ready() - Notify the pwrctl subsystem that the PCI + * device is about to be powered-down. + * + * @pwrctl: PCI power control data. + */ +void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl) +{ + /* + * We don't have to delete the link here. Typically, this function + * is only called when the power control device is being detached. If + * it is being detached then the child PCI device must have already + * been unbound too or the device core wouldn't let us unbind. + */ + bus_unregister_notifier(&pci_bus_type, &pwrctl->nb); +} +EXPORT_SYMBOL_GPL(pci_pwrctl_device_unset_ready); + +static void devm_pci_pwrctl_device_unset_ready(void *data) +{ + struct pci_pwrctl *pwrctl = data; + + pci_pwrctl_device_unset_ready(pwrctl); +} + +/** + * devm_pci_pwrctl_device_set_ready - Managed variant of + * pci_pwrctl_device_set_ready(). + * + * @dev: Device managing this pwrctl provider. + * @pwrctl: PCI power control data. + * + * Returns: + * 0 on success, negative error number on error. + */ +int devm_pci_pwrctl_device_set_ready(struct device *dev, + struct pci_pwrctl *pwrctl) +{ + int ret; + + ret = pci_pwrctl_device_set_ready(pwrctl); + if (ret) + return ret; + + return devm_add_action_or_reset(dev, + devm_pci_pwrctl_device_unset_ready, + pwrctl); +} +EXPORT_SYMBOL_GPL(devm_pci_pwrctl_device_set_ready); + +MODULE_AUTHOR("Bartosz Golaszewski "); +MODULE_DESCRIPTION("PCI Device Power Control core driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/pci-pwrctl.h b/include/linux/pci-pwrctl.h new file mode 100644 index 000000000000..45e9cfe740e4 --- /dev/null +++ b/include/linux/pci-pwrctl.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 Linaro Ltd. + */ + +#ifndef __PCI_PWRCTL_H__ +#define __PCI_PWRCTL_H__ + +#include + +struct device; +struct device_link; + +/* + * This is a simple framework for solving the issue of PCI devices that require + * certain resources (regulators, GPIOs, clocks) to be enabled before the + * device can actually be detected on the PCI bus. + * + * The idea is to reuse the platform bus to populate OF nodes describing the + * PCI device and its resources, let these platform devices probe and enable + * relevant resources and then trigger a rescan of the PCI bus allowing for the + * same device (with a second associated struct device) to be registered with + * the PCI subsystem. + * + * To preserve a correct hierarchy for PCI power management and device reset, + * we create a device link between the power control platform device (parent) + * and the supplied PCI device (child). + */ + +/** + * struct pci_pwrctl - PCI device power control context. + * @dev: Address of the power controlling device. + * + * An object of this type must be allocated by the PCI power control device and + * passed to the pwrctl subsystem to trigger a bus rescan and setup a device + * link with the device once it's up. + */ +struct pci_pwrctl { + struct device *dev; + + /* Private: don't use. */ + struct notifier_block nb; + struct device_link *link; +}; + +int pci_pwrctl_device_set_ready(struct pci_pwrctl *pwrctl); +void pci_pwrctl_device_unset_ready(struct pci_pwrctl *pwrctl); +int devm_pci_pwrctl_device_set_ready(struct device *dev, + struct pci_pwrctl *pwrctl); + +#endif /* __PCI_PWRCTL_H__ */ -- cgit v1.2.3 From 7180f8d91fcbf252de572d9ffacc945effed0060 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Tue, 11 Jun 2024 19:38:22 +0200 Subject: vfs: add rcu-based find_inode variants for iget ops This avoids one inode hash lock acquire in the common case on inode creation, in effect significantly reducing contention. On the stock kernel said lock is typically taken twice: 1. once to check if the inode happens to already be present 2. once to add it to the hash The back-to-back lock/unlock pattern is known to degrade performance significantly, which is further exacerbated if the hash is heavily populated (long chains to walk, extending hold time). Arguably hash sizing and hashing algo need to be revisited, but that's beyond the scope of this patch. With the acquire from step 1 eliminated with RCU lookup throughput increases significantly at the scale of 20 cores (benchmark results at the bottom). So happens the hash already supports RCU-based operation, but lookups on inode insertions didn't take advantage of it. This of course has its limits as the global lock is still a bottleneck. There was a patchset posted which introduced fine-grained locking[1] but it appears staled. Apart from that doubt was expressed whether a handrolled hash implementation is appropriate to begin with, suggesting replacement with rhashtables. Nobody committed to carrying [1] across the finish line or implementing anything better, thus the bandaid below. iget_locked consumers (notably ext4) get away without any changes because inode comparison method is built-in. iget5_locked consumers pass a custom callback. Since removal of locking adds more problems (inode can be changing) it's not safe to assume all filesystems happen to cope. Thus iget5_locked_rcu gets added, requiring manual conversion of interested filesystems. In order to reduce code duplication find_inode and find_inode_fast grow an argument indicating whether inode hash lock is held, which is passed down in case sleeping is necessary. They always rcu_read_lock, which is redundant but harmless. Doing it conditionally reduces readability for no real gain that I can see. RCU-alike restrictions were already put on callbacks due to the hash spinlock being held. Benchmarking: There is a real cache-busting workload scanning millions of files in parallel (it's a backup appliance), where the initial lookup is guaranteed to fail resulting in the two lock acquires on stock kernel (and one with the patch at hand). Implemented below is a synthetic benchmark providing the same behavior. [I shall note the workload is not running on Linux, instead it was causing trouble elsewhere. Benchmark below was used while addressing said problems and was found to adequately represent the real workload.] Total real time fluctuates by 1-2s. With 20 threads each walking a dedicated 1000 dirs * 1000 files directory tree to stat(2) on a 32 core + 24GB RAM vm: ext4 (needed mkfs.ext4 -N 24000000): before: 3.77s user 890.90s system 1939% cpu 46.118 total after: 3.24s user 397.73s system 1858% cpu 21.581 total (-53%) That's 20 million files to visit, while the machine can only cache about 15 million at a time (obtained from ext4_inode_cache object count in /proc/slabinfo). Since each terminal inode is only visited once per run this amounts to 0% hit ratio for the dentry cache and the hash table (there are however hits for the intermediate directories). On repeated runs the kernel caches the last ~15 mln, meaning there is ~5 mln of uncached inodes which are going to be visited first, evicting the previously cached state as it happens. Lack of hits can be trivially verified with bpftrace, like so: bpftrace -e 'kretprobe:find_inode_fast { @[kstack(), retval != 0] = count(); }'\ -c "/bin/sh walktrees /testfs 20" Best ran more than once. Expected results after "warmup": [snip] @[ __ext4_iget+275 ext4_lookup+224 __lookup_slow+130 walk_component+219 link_path_walk.part.0.constprop.0+614 path_lookupat+62 filename_lookup+204 vfs_statx+128 vfs_fstatat+131 __do_sys_newfstatat+38 do_syscall_64+87 entry_SYSCALL_64_after_hwframe+118 , 1]: 20000 @[ __ext4_iget+275 ext4_lookup+224 __lookup_slow+130 walk_component+219 path_lookupat+106 filename_lookup+204 vfs_statx+128 vfs_fstatat+131 __do_sys_newfstatat+38 do_syscall_64+87 entry_SYSCALL_64_after_hwframe+118 , 1]: 20000000 That is 20 million calls for the initial lookup and 20 million after allocating a new inode, all of them failing to return a value != 0 (i.e., they are returning NULL -- no match found). Of course aborting the benchmark in the middle and starting it again (or messing with the state in other ways) is going to alter these results. Benchmark can be found here: https://people.freebsd.org/~mjg/fstree.tgz [1] https://lore.kernel.org/all/20231206060629.2827226-1-david@fromorbit.com/ Signed-off-by: Mateusz Guzik Link: https://lore.kernel.org/r/20240611173824.535995-2-mjguzik@gmail.com Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/inode.c | 97 ++++++++++++++++++++++++++++++++++++++++++++---------- include/linux/fs.h | 7 +++- 2 files changed, 86 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 3a41f83a4ba5..8c57cea7bbbb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -886,36 +886,45 @@ long prune_icache_sb(struct super_block *sb, struct shrink_control *sc) return freed; } -static void __wait_on_freeing_inode(struct inode *inode); +static void __wait_on_freeing_inode(struct inode *inode, bool locked); /* * Called with the inode lock held. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, int (*test)(struct inode *, void *), - void *data) + void *data, bool locked) { struct inode *inode = NULL; + if (locked) + lockdep_assert_held(&inode_hash_lock); + else + lockdep_assert_not_held(&inode_hash_lock); + + rcu_read_lock(); repeat: - hlist_for_each_entry(inode, head, i_hash) { + hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_sb != sb) continue; if (!test(inode, data)) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); + __wait_on_freeing_inode(inode, locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); spin_unlock(&inode->i_lock); + rcu_read_unlock(); return inode; } + rcu_read_unlock(); return NULL; } @@ -924,29 +933,39 @@ repeat: * iget_locked for details. */ static struct inode *find_inode_fast(struct super_block *sb, - struct hlist_head *head, unsigned long ino) + struct hlist_head *head, unsigned long ino, + bool locked) { struct inode *inode = NULL; + if (locked) + lockdep_assert_held(&inode_hash_lock); + else + lockdep_assert_not_held(&inode_hash_lock); + + rcu_read_lock(); repeat: - hlist_for_each_entry(inode, head, i_hash) { + hlist_for_each_entry_rcu(inode, head, i_hash) { if (inode->i_ino != ino) continue; if (inode->i_sb != sb) continue; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING|I_WILL_FREE)) { - __wait_on_freeing_inode(inode); + __wait_on_freeing_inode(inode, locked); goto repeat; } if (unlikely(inode->i_state & I_CREATING)) { spin_unlock(&inode->i_lock); + rcu_read_unlock(); return ERR_PTR(-ESTALE); } __iget(inode); spin_unlock(&inode->i_lock); + rcu_read_unlock(); return inode; } + rcu_read_unlock(); return NULL; } @@ -1161,7 +1180,7 @@ struct inode *inode_insert5(struct inode *inode, unsigned long hashval, again: spin_lock(&inode_hash_lock); - old = find_inode(inode->i_sb, head, test, data); + old = find_inode(inode->i_sb, head, test, data, true); if (unlikely(old)) { /* * Uhhuh, somebody else created the same inode under us. @@ -1245,6 +1264,48 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, } EXPORT_SYMBOL(iget5_locked); +/** + * iget5_locked_rcu - obtain an inode from a mounted file system + * @sb: super block of file system + * @hashval: hash value (usually inode number) to get + * @test: callback used for comparisons between inodes + * @set: callback used to initialize a new struct inode + * @data: opaque data pointer to pass to @test and @set + * + * This is equivalent to iget5_locked, except the @test callback must + * tolerate the inode not being stable, including being mid-teardown. + */ +struct inode *iget5_locked_rcu(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *data) +{ + struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode, *new; + +again: + inode = find_inode(sb, head, test, data, false); + if (inode) { + if (IS_ERR(inode)) + return NULL; + wait_on_inode(inode); + if (unlikely(inode_unhashed(inode))) { + iput(inode); + goto again; + } + return inode; + } + + new = alloc_inode(sb); + if (new) { + new->i_state = 0; + inode = inode_insert5(new, hashval, test, set, data); + if (unlikely(inode != new)) + destroy_inode(new); + } + return inode; +} +EXPORT_SYMBOL_GPL(iget5_locked_rcu); + /** * iget_locked - obtain an inode from a mounted file system * @sb: super block of file system @@ -1263,9 +1324,7 @@ struct inode *iget_locked(struct super_block *sb, unsigned long ino) struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); - spin_unlock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino, false); if (inode) { if (IS_ERR(inode)) return NULL; @@ -1283,7 +1342,7 @@ again: spin_lock(&inode_hash_lock); /* We released the lock, so.. */ - old = find_inode_fast(sb, head, ino); + old = find_inode_fast(sb, head, ino, true); if (!old) { inode->i_ino = ino; spin_lock(&inode->i_lock); @@ -1419,7 +1478,7 @@ struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, struct inode *inode; spin_lock(&inode_hash_lock); - inode = find_inode(sb, head, test, data); + inode = find_inode(sb, head, test, data, true); spin_unlock(&inode_hash_lock); return IS_ERR(inode) ? NULL : inode; @@ -1474,7 +1533,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) struct inode *inode; again: spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino); + inode = find_inode_fast(sb, head, ino, true); spin_unlock(&inode_hash_lock); if (inode) { @@ -2235,17 +2294,21 @@ EXPORT_SYMBOL(inode_needs_sync); * wake_up_bit(&inode->i_state, __I_NEW) after removing from the hash list * will DTRT. */ -static void __wait_on_freeing_inode(struct inode *inode) +static void __wait_on_freeing_inode(struct inode *inode, bool locked) { wait_queue_head_t *wq; DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); wq = bit_waitqueue(&inode->i_state, __I_NEW); prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); - spin_unlock(&inode_hash_lock); + rcu_read_unlock(); + if (locked) + spin_unlock(&inode_hash_lock); schedule(); finish_wait(wq, &wait.wq_entry); - spin_lock(&inode_hash_lock); + if (locked) + spin_lock(&inode_hash_lock); + rcu_read_lock(); } static __initdata unsigned long ihash_entries; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0283cf366c2a..aac51da4f90c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3029,7 +3029,12 @@ extern struct inode *inode_insert5(struct inode *inode, unsigned long hashval, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data); -extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); +struct inode *iget5_locked(struct super_block *, unsigned long, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *); +struct inode *iget5_locked_rcu(struct super_block *, unsigned long, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); extern struct inode *find_inode_nowait(struct super_block *, unsigned long, -- cgit v1.2.3 From 66055636a146c435cd226fb5a334176304652f3c Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Wed, 12 Jun 2024 19:05:43 +0530 Subject: drm/mipi-dsi: fix handling of ctx in mipi_dsi_msleep ctx would be better off treated as a pointer to account for most of its usage so far, and brackets should be added to account for operator precedence for correct evaluation. Fixes: f79d6d28d8fe ("drm/mipi-dsi: wrap more functions for streamline handling") Signed-off-by: Tejas Vipin Suggested-by: Douglas Anderson Reviewed-by: Douglas Anderson Link: https://lore.kernel.org/r/20240612133550.473279-3-tejasvipin76@gmail.com [narmstrong: fixed fixes tag] Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20240612133550.473279-3-tejasvipin76@gmail.com --- include/drm/drm_mipi_dsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index bd5a0b6d0711..71d121aeef24 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -293,7 +293,7 @@ ssize_t mipi_dsi_generic_read(struct mipi_dsi_device *dsi, const void *params, #define mipi_dsi_msleep(ctx, delay) \ do { \ - if (!ctx.accum_err) \ + if (!(ctx)->accum_err) \ msleep(delay); \ } while (0) -- cgit v1.2.3 From 3b9c181bcde8555ca81b2394c2dc2201cefc2dd4 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 11 Jun 2024 10:47:15 -0700 Subject: devcoredump: Add dev_coredumpm_timeout() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add function to set a custom coredump timeout. For Xe driver usage, current 5 minutes timeout may be too short for users to search and understand what needs to be done to capture coredump to report bugs. We have plans to automate(distribute a udev script) it but at the end will be up to distros and users to pack it so having a option to increase the timeout is a safer option. v2: - replace dev_coredump_timeout_set() by dev_coredumpm_timeout() (Mukesh) v3: - make dev_coredumpm() static inline (Johannes) v5: - rename DEVCOREDUMP_TIMEOUT -> DEVCD_TIMEOUT to avoid redefinition in include/net/bluetooth/coredump.h v6: - fix definition of dev_coredumpm_timeout() when CONFIG_DEV_COREDUMP is disabled Cc: Rodrigo Vivi Cc: Mukesh Ojha Cc: Johannes Berg Cc: Jonathan Cavitt Reviewed-by: Rodrigo Vivi Reviewed-by: Jonathan Cavitt Signed-off-by: José Roberto de Souza Acked-by: Greg Kroah-Hartman Acked-by: Johannes Berg Link: https://patchwork.freedesktop.org/patch/msgid/20240611174716.72660-1-jose.souza@intel.com Signed-off-by: Rodrigo Vivi --- drivers/base/devcoredump.c | 23 ++++++++++---------- include/linux/devcoredump.h | 53 +++++++++++++++++++++++++++++++++++---------- 2 files changed, 54 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c index 82aeb09b3d1b..c795edad1b96 100644 --- a/drivers/base/devcoredump.c +++ b/drivers/base/devcoredump.c @@ -18,9 +18,6 @@ static struct class devcd_class; /* global disable flag, for security purposes */ static bool devcd_disabled; -/* if data isn't read by userspace after 5 minutes then delete it */ -#define DEVCD_TIMEOUT (HZ * 60 * 5) - struct devcd_entry { struct device devcd_dev; void *data; @@ -328,7 +325,8 @@ void dev_coredump_put(struct device *dev) EXPORT_SYMBOL_GPL(dev_coredump_put); /** - * dev_coredumpm - create device coredump with read/free methods + * dev_coredumpm_timeout - create device coredump with read/free methods with a + * custom timeout. * @dev: the struct device for the crashed device * @owner: the module that contains the read/free functions, use %THIS_MODULE * @data: data cookie for the @read/@free functions @@ -336,17 +334,20 @@ EXPORT_SYMBOL_GPL(dev_coredump_put); * @gfp: allocation flags * @read: function to read from the given buffer * @free: function to free the given buffer + * @timeout: time in jiffies to remove coredump * * Creates a new device coredump for the given device. If a previous one hasn't * been read yet, the new coredump is discarded. The data lifetime is determined * by the device coredump framework and when it is no longer needed the @free * function will be called to free the data. */ -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { static atomic_t devcd_count = ATOMIC_INIT(0); struct devcd_entry *devcd; @@ -403,7 +404,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, dev_set_uevent_suppress(&devcd->devcd_dev, false); kobject_uevent(&devcd->devcd_dev.kobj, KOBJ_ADD); INIT_DELAYED_WORK(&devcd->del_wk, devcd_del); - schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT); + schedule_delayed_work(&devcd->del_wk, timeout); mutex_unlock(&devcd->mutex); return; put_device: @@ -414,7 +415,7 @@ void dev_coredumpm(struct device *dev, struct module *owner, free: free(data); } -EXPORT_SYMBOL_GPL(dev_coredumpm); +EXPORT_SYMBOL_GPL(dev_coredumpm_timeout); /** * dev_coredumpsg - create device coredump that uses scatterlist as data diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h index c8f7eb6cc191..377892604ff4 100644 --- a/include/linux/devcoredump.h +++ b/include/linux/devcoredump.h @@ -12,6 +12,9 @@ #include #include +/* if data isn't read by userspace after 5 minutes then delete it */ +#define DEVCD_TIMEOUT (HZ * 60 * 5) + /* * _devcd_free_sgtable - free all the memory of the given scatterlist table * (i.e. both pages and scatterlist instances) @@ -50,16 +53,17 @@ static inline void _devcd_free_sgtable(struct scatterlist *table) kfree(delete_iter); } - #ifdef CONFIG_DEV_COREDUMP void dev_coredumpv(struct device *dev, void *data, size_t datalen, gfp_t gfp); -void dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)); +void dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout); void dev_coredumpsg(struct device *dev, struct scatterlist *table, size_t datalen, gfp_t gfp); @@ -73,11 +77,13 @@ static inline void dev_coredumpv(struct device *dev, void *data, } static inline void -dev_coredumpm(struct device *dev, struct module *owner, - void *data, size_t datalen, gfp_t gfp, - ssize_t (*read)(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen), - void (*free)(void *data)) +dev_coredumpm_timeout(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, + size_t count, void *data, + size_t datalen), + void (*free)(void *data), + unsigned long timeout) { free(data); } @@ -92,4 +98,29 @@ static inline void dev_coredump_put(struct device *dev) } #endif /* CONFIG_DEV_COREDUMP */ +/** + * dev_coredumpm - create device coredump with read/free methods + * @dev: the struct device for the crashed device + * @owner: the module that contains the read/free functions, use %THIS_MODULE + * @data: data cookie for the @read/@free functions + * @datalen: length of the data + * @gfp: allocation flags + * @read: function to read from the given buffer + * @free: function to free the given buffer + * + * Creates a new device coredump for the given device. If a previous one hasn't + * been read yet, the new coredump is discarded. The data lifetime is determined + * by the device coredump framework and when it is no longer needed the @free + * function will be called to free the data. + */ +static inline void dev_coredumpm(struct device *dev, struct module *owner, + void *data, size_t datalen, gfp_t gfp, + ssize_t (*read)(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen), + void (*free)(void *data)) +{ + dev_coredumpm_timeout(dev, owner, data, datalen, gfp, read, free, + DEVCD_TIMEOUT); +} + #endif /* __DEVCOREDUMP_H */ -- cgit v1.2.3 From e54700f7d6aa2ae0d0a0aeeebedcecd7ce1123fe Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Mon, 3 Jun 2024 20:24:30 +0530 Subject: drm/xe/bmg: Add PCI IDs Add the initial set of device IDs for Battlemage. Signed-off-by: Matt Roper Signed-off-by: Balasubramani Vivekanandan Reviewed-by: Gustavo Sousa Link: https://patchwork.freedesktop.org/patch/msgid/20240603145430.1260817-1-balasubramani.vivekanandan@intel.com --- drivers/gpu/drm/xe/xe_pci.c | 3 ++- include/drm/xe_pciids.h | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c index e84da0cbb8e9..08583fdd7643 100644 --- a/drivers/gpu/drm/xe/xe_pci.c +++ b/drivers/gpu/drm/xe/xe_pci.c @@ -340,7 +340,7 @@ static const struct xe_device_desc lnl_desc = { .require_force_probe = true, }; -static const struct xe_device_desc bmg_desc __maybe_unused = { +static const struct xe_device_desc bmg_desc = { DGFX_FEATURES, PLATFORM(BATTLEMAGE), .require_force_probe = true, @@ -389,6 +389,7 @@ static const struct pci_device_id pciidlist[] = { XE_DG2_IDS(INTEL_VGA_DEVICE, &dg2_desc), XE_MTL_IDS(INTEL_VGA_DEVICE, &mtl_desc), XE_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc), + XE_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc), { } }; MODULE_DEVICE_TABLE(pci, pciidlist); diff --git a/include/drm/xe_pciids.h b/include/drm/xe_pciids.h index adb37bc541e4..644872a35c35 100644 --- a/include/drm/xe_pciids.h +++ b/include/drm/xe_pciids.h @@ -192,4 +192,11 @@ MACRO__(0x64A0, ## __VA_ARGS__), \ MACRO__(0x64B0, ## __VA_ARGS__) +#define XE_BMG_IDS(MACRO__, ...) \ + MACRO__(0xE202, ## __VA_ARGS__), \ + MACRO__(0xE20B, ## __VA_ARGS__), \ + MACRO__(0xE20C, ## __VA_ARGS__), \ + MACRO__(0xE20D, ## __VA_ARGS__), \ + MACRO__(0xE212, ## __VA_ARGS__) + #endif -- cgit v1.2.3 From e038ee6189842e9662d2fc59d09dbcf48350cf99 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 10 Jun 2024 16:41:44 +0530 Subject: block: unmap and free user mapped integrity via submitter The user mapped intergity is copied back and unpinned by bio_integrity_free which is a low-level routine. Do it via the submitter rather than doing it in the low-level block layer code, to split the submitter side from the consumer side of the bio. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20240610111144.14647-1-anuj20.g@samsung.com Signed-off-by: Jens Axboe --- block/bio-integrity.c | 26 ++++++++++++++++++++++++-- drivers/nvme/host/ioctl.c | 15 +++++++++++---- include/linux/bio.h | 4 ++++ 3 files changed, 39 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 2e3e8e04961e..8b528e12136f 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -144,16 +144,38 @@ void bio_integrity_free(struct bio *bio) struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_set *bs = bio->bi_pool; + if (bip->bip_flags & BIP_INTEGRITY_USER) + return; if (bip->bip_flags & BIP_BLOCK_INTEGRITY) kfree(bvec_virt(bip->bip_vec)); - else if (bip->bip_flags & BIP_INTEGRITY_USER) - bio_integrity_unmap_user(bip); __bio_integrity_free(bs, bip); bio->bi_integrity = NULL; bio->bi_opf &= ~REQ_INTEGRITY; } +/** + * bio_integrity_unmap_free_user - Unmap and free bio user integrity payload + * @bio: bio containing bip to be unmapped and freed + * + * Description: Used to unmap and free the user mapped integrity portion of a + * bio. Submitter attaching the user integrity buffer is responsible for + * unmapping and freeing it during completion. + */ +void bio_integrity_unmap_free_user(struct bio *bio) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + struct bio_set *bs = bio->bi_pool; + + if (WARN_ON_ONCE(!(bip->bip_flags & BIP_INTEGRITY_USER))) + return; + bio_integrity_unmap_user(bip); + __bio_integrity_free(bs, bip); + bio->bi_integrity = NULL; + bio->bi_opf &= ~REQ_INTEGRITY; +} +EXPORT_SYMBOL(bio_integrity_unmap_free_user); + /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 9d9d2a127c4e..8b69427a4476 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -111,6 +111,13 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, return req; } +static void nvme_unmap_bio(struct bio *bio) +{ + if (bio_integrity(bio)) + bio_integrity_unmap_free_user(bio); + blk_rq_unmap_user(bio); +} + static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, struct io_uring_cmd *ioucmd, unsigned int flags) @@ -157,7 +164,7 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, out_unmap: if (bio) - blk_rq_unmap_user(bio); + nvme_unmap_bio(bio); out: blk_mq_free_request(req); return ret; @@ -195,7 +202,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); if (bio) - blk_rq_unmap_user(bio); + nvme_unmap_bio(bio); blk_mq_free_request(req); if (effects) @@ -406,7 +413,7 @@ static void nvme_uring_task_cb(struct io_uring_cmd *ioucmd, struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd); if (pdu->bio) - blk_rq_unmap_user(pdu->bio); + nvme_unmap_bio(pdu->bio); io_uring_cmd_done(ioucmd, pdu->status, pdu->result, issue_flags); } @@ -432,7 +439,7 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req, */ if (blk_rq_is_poll(req)) { if (pdu->bio) - blk_rq_unmap_user(pdu->bio); + nvme_unmap_bio(pdu->bio); io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); } else { io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); diff --git a/include/linux/bio.h b/include/linux/bio.h index d5379548d684..818e93612947 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -731,6 +731,7 @@ static inline bool bioset_initialized(struct bio_set *bs) bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t len, u32 seed); +void bio_integrity_unmap_free_user(struct bio *bio); extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_prep(struct bio *); @@ -807,6 +808,9 @@ static inline int bio_integrity_map_user(struct bio *bio, void __user *ubuf, { return -EINVAL; } +static inline void bio_integrity_unmap_free_user(struct bio *bio) +{ +} #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.3 From ec209ad86324de84ef66990f0e9df0851e45e054 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:32 -0600 Subject: bpf: verifier: Relax caller requirements for kfunc projection type args Currently, if a kfunc accepts a projection type as an argument (eg struct __sk_buff *), the caller must exactly provide exactly the same type with provable provenance. However in practice, kfuncs that accept projection types _must_ cast to the underlying type before use b/c projection type layouts are completely made up. Thus, it is ok to relax the verifier rules around implicit conversions. We will use this functionality in the next commit when we align kfuncs to user-facing types. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/e2c025cb09ccfd4af1ec9e18284dc3cecff7514d.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- include/linux/btf.h | 1 + kernel/bpf/btf.c | 13 ++++++++++--- kernel/bpf/verifier.c | 10 +++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..56d91daacdba 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -531,6 +531,7 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id); int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner); struct btf_struct_meta *btf_find_struct_meta(const struct btf *btf, u32 btf_id); +bool btf_is_projection_of(const char *pname, const char *tname); bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7928d920056f..ce4707968217 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5820,6 +5820,15 @@ static int find_kern_ctx_type_id(enum bpf_prog_type prog_type) return ctx_type->type; } +bool btf_is_projection_of(const char *pname, const char *tname) +{ + if (strcmp(pname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) + return true; + if (strcmp(pname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + return true; + return false; +} + bool btf_is_prog_ctx_type(struct bpf_verifier_log *log, const struct btf *btf, const struct btf_type *t, enum bpf_prog_type prog_type, int arg) @@ -5882,9 +5891,7 @@ again: * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used } */ - if (strcmp(ctx_tname, "__sk_buff") == 0 && strcmp(tname, "sk_buff") == 0) - return true; - if (strcmp(ctx_tname, "xdp_md") == 0 && strcmp(tname, "xdp_buff") == 0) + if (btf_is_projection_of(ctx_tname, tname)) return true; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 20ac9cfd54dd..dcac6119d810 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -11265,6 +11265,8 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, bool strict_type_match = false; const struct btf *reg_btf; const char *reg_ref_tname; + bool taking_projection; + bool struct_same; u32 reg_ref_id; if (base_type(reg->type) == PTR_TO_BTF_ID) { @@ -11308,7 +11310,13 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id); reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off); - if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) { + struct_same = btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match); + /* If kfunc is accepting a projection type (ie. __sk_buff), it cannot + * actually use it -- it must cast to the underlying type. So we allow + * caller to pass in the underlying type. + */ + taking_projection = btf_is_projection_of(ref_tname, reg_ref_tname); + if (!taking_projection && !struct_same) { verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n", meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1, btf_type_str(reg_ref_t), reg_ref_tname); -- cgit v1.2.3 From cce4c40b960673f9e020835def310f1e89d3a940 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Wed, 12 Jun 2024 09:58:33 -0600 Subject: bpf: treewide: Align kfunc signatures to prog point-of-view Previously, kfunc declarations in bpf_kfuncs.h (and others) used "user facing" types for kfuncs prototypes while the actual kfunc definitions used "kernel facing" types. More specifically: bpf_dynptr vs bpf_dynptr_kern, __sk_buff vs sk_buff, and xdp_md vs xdp_buff. It wasn't an issue before, as the verifier allows aliased types. However, since we are now generating kfunc prototypes in vmlinux.h (in addition to keeping bpf_kfuncs.h around), this conflict creates compilation errors. Fix this conflict by using "user facing" types in kfunc definitions. This results in more casts, but otherwise has no additional runtime cost. Note, similar to 5b268d1ebcdc ("bpf: Have bpf_rdonly_cast() take a const pointer"), we also make kfuncs take const arguments where appropriate in order to make the kfunc more permissive. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/b58346a63a0e66bc9b7504da751b526b0b189a67.1718207789.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- fs/verity/measure.c | 5 +-- include/linux/bpf.h | 8 ++--- kernel/bpf/crypto.c | 24 ++++++++----- kernel/bpf/helpers.c | 39 +++++++++++++++------- kernel/bpf/verifier.c | 2 +- kernel/trace/bpf_trace.c | 15 +++++---- net/core/filter.c | 32 +++++++++++------- .../testing/selftests/bpf/progs/ip_check_defrag.c | 10 +++--- .../selftests/bpf/progs/verifier_netfilter_ctx.c | 6 ++-- 9 files changed, 88 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/fs/verity/measure.c b/fs/verity/measure.c index 3969d54158d1..175d2f1bc089 100644 --- a/fs/verity/measure.c +++ b/fs/verity/measure.c @@ -111,14 +111,15 @@ __bpf_kfunc_start_defs(); /** * bpf_get_fsverity_digest: read fsverity digest of file * @file: file to get digest from - * @digest_ptr: (out) dynptr for struct fsverity_digest + * @digest_p: (out) dynptr for struct fsverity_digest * * Read fsverity_digest of *file* into *digest_ptr*. * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr_kern *digest_ptr) +__bpf_kfunc int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_p) { + struct bpf_dynptr_kern *digest_ptr = (struct bpf_dynptr_kern *)digest_p; const struct inode *inode = file_inode(file); u32 dynptr_sz = __bpf_dynptr_size(digest_ptr); struct fsverity_digest *arg; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a834f4b761bc..f636b4998bf7 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3265,8 +3265,8 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr); +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr); #else static inline bool bpf_sock_common_is_valid_access(int off, int size, enum bpf_access_type type, @@ -3288,8 +3288,8 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type, { return 0; } -static inline int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr) +static inline int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr) { return -EOPNOTSUPP; } diff --git a/kernel/bpf/crypto.c b/kernel/bpf/crypto.c index 2bee4af91e38..3c1de0e5c0bd 100644 --- a/kernel/bpf/crypto.c +++ b/kernel/bpf/crypto.c @@ -311,11 +311,15 @@ static int bpf_crypto_crypt(const struct bpf_crypto_ctx *ctx, * Decrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv) { - return bpf_crypto_crypt(ctx, src, dst, siv, true); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, true); } /** @@ -328,11 +332,15 @@ __bpf_kfunc int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, * Encrypts provided buffer using IV data and the crypto context. Crypto context must be configured. */ __bpf_kfunc int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, - const struct bpf_dynptr_kern *src, - const struct bpf_dynptr_kern *dst, - const struct bpf_dynptr_kern *siv) + const struct bpf_dynptr *src, + const struct bpf_dynptr *dst, + const struct bpf_dynptr *siv) { - return bpf_crypto_crypt(ctx, src, dst, siv, false); + const struct bpf_dynptr_kern *src_kern = (struct bpf_dynptr_kern *)src; + const struct bpf_dynptr_kern *dst_kern = (struct bpf_dynptr_kern *)dst; + const struct bpf_dynptr_kern *siv_kern = (struct bpf_dynptr_kern *)siv; + + return bpf_crypto_crypt(ctx, src_kern, dst_kern, siv_kern, false); } __bpf_kfunc_end_defs(); diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 6f1abcb4b084..3ac521c48bba 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2459,9 +2459,10 @@ __bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid) * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type; u32 len = buffer__szk; int err; @@ -2543,9 +2544,11 @@ __bpf_kfunc void *bpf_dynptr_slice(const struct bpf_dynptr_kern *ptr, u32 offset * provided buffer, with its contents containing the data, if unable to obtain * direct pointer) */ -__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 offset, +__bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk) { + const struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL; @@ -2571,11 +2574,12 @@ __bpf_kfunc void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr_kern *ptr, u32 o * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes. */ - return bpf_dynptr_slice(ptr, offset, buffer__opt, buffer__szk); + return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk); } -__bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 end) +__bpf_kfunc int bpf_dynptr_adjust(const struct bpf_dynptr *p, u32 start, u32 end) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; u32 size; if (!ptr->data || start > end) @@ -2592,36 +2596,45 @@ __bpf_kfunc int bpf_dynptr_adjust(struct bpf_dynptr_kern *ptr, u32 start, u32 en return 0; } -__bpf_kfunc bool bpf_dynptr_is_null(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_null(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + return !ptr->data; } -__bpf_kfunc bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr) +__bpf_kfunc bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return false; return __bpf_dynptr_is_rdonly(ptr); } -__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr_kern *ptr) +__bpf_kfunc __u32 bpf_dynptr_size(const struct bpf_dynptr *p) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) return -EINVAL; return __bpf_dynptr_size(ptr); } -__bpf_kfunc int bpf_dynptr_clone(struct bpf_dynptr_kern *ptr, - struct bpf_dynptr_kern *clone__uninit) +__bpf_kfunc int bpf_dynptr_clone(const struct bpf_dynptr *p, + struct bpf_dynptr *clone__uninit) { + struct bpf_dynptr_kern *clone = (struct bpf_dynptr_kern *)clone__uninit; + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; + if (!ptr->data) { - bpf_dynptr_set_null(clone__uninit); + bpf_dynptr_set_null(clone); return -EINVAL; } - *clone__uninit = *ptr; + *clone = *ptr; return 0; } @@ -2986,7 +2999,9 @@ late_initcall(kfunc_init); */ const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len) { - return bpf_dynptr_slice(ptr, 0, NULL, len); + const struct bpf_dynptr *p = (struct bpf_dynptr *)ptr; + + return bpf_dynptr_slice(p, 0, NULL, len); } /* Get a pointer to dynptr data up to len bytes for read write access. If diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dcac6119d810..acc9dd830807 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10914,7 +10914,7 @@ enum { }; BTF_ID_LIST(kf_arg_btf_ids) -BTF_ID(struct, bpf_dynptr_kern) +BTF_ID(struct, bpf_dynptr) BTF_ID(struct, bpf_list_head) BTF_ID(struct, bpf_list_node) BTF_ID(struct, bpf_rb_root) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index bc16e21a2a44..4b3fda456299 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1369,8 +1369,8 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) #ifdef CONFIG_SYSTEM_DATA_VERIFICATION /** * bpf_verify_pkcs7_signature - verify a PKCS#7 signature - * @data_ptr: data to verify - * @sig_ptr: signature of the data + * @data_p: data to verify + * @sig_p: signature of the data * @trusted_keyring: keyring with keys trusted for signature verification * * Verify the PKCS#7 signature *sig_ptr* against the supplied *data_ptr* @@ -1378,10 +1378,12 @@ __bpf_kfunc void bpf_key_put(struct bpf_key *bkey) * * Return: 0 on success, a negative value on error. */ -__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr_kern *data_ptr, - struct bpf_dynptr_kern *sig_ptr, +__bpf_kfunc int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_p, + struct bpf_dynptr *sig_p, struct bpf_key *trusted_keyring) { + struct bpf_dynptr_kern *data_ptr = (struct bpf_dynptr_kern *)data_p; + struct bpf_dynptr_kern *sig_ptr = (struct bpf_dynptr_kern *)sig_p; const void *data, *sig; u32 data_len, sig_len; int ret; @@ -1444,7 +1446,7 @@ __bpf_kfunc_start_defs(); * bpf_get_file_xattr - get xattr of a file * @file: file to get xattr from * @name__str: name of the xattr - * @value_ptr: output buffer of the xattr value + * @value_p: output buffer of the xattr value * * Get xattr *name__str* of *file* and store the output in *value_ptr*. * @@ -1453,8 +1455,9 @@ __bpf_kfunc_start_defs(); * Return: 0 on success, a negative value on error. */ __bpf_kfunc int bpf_get_file_xattr(struct file *file, const char *name__str, - struct bpf_dynptr_kern *value_ptr) + struct bpf_dynptr *value_p) { + struct bpf_dynptr_kern *value_ptr = (struct bpf_dynptr_kern *)value_p; struct dentry *dentry; u32 value_len; void *value; diff --git a/net/core/filter.c b/net/core/filter.c index 7c46ecba3b01..73722790cee3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -11859,28 +11859,34 @@ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) } __bpf_kfunc_start_defs(); -__bpf_kfunc int bpf_dynptr_from_skb(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +__bpf_kfunc int bpf_dynptr_from_skb(struct __sk_buff *s, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct sk_buff *skb = (struct sk_buff *)s; + if (flags) { - bpf_dynptr_set_null(ptr__uninit); + bpf_dynptr_set_null(ptr); return -EINVAL; } - bpf_dynptr_init(ptr__uninit, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); + bpf_dynptr_init(ptr, skb, BPF_DYNPTR_TYPE_SKB, 0, skb->len); return 0; } -__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_buff *xdp, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +__bpf_kfunc int bpf_dynptr_from_xdp(struct xdp_md *x, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; + struct xdp_buff *xdp = (struct xdp_buff *)x; + if (flags) { - bpf_dynptr_set_null(ptr__uninit); + bpf_dynptr_set_null(ptr); return -EINVAL; } - bpf_dynptr_init(ptr__uninit, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); + bpf_dynptr_init(ptr, xdp, BPF_DYNPTR_TYPE_XDP, 0, xdp_get_buff_len(xdp)); return 0; } @@ -11906,10 +11912,11 @@ __bpf_kfunc int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, return 0; } -__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, +__bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk, struct bpf_tcp_req_attrs *attrs, int attrs__sz) { #if IS_ENABLED(CONFIG_SYN_COOKIES) + struct sk_buff *skb = (struct sk_buff *)s; const struct request_sock_ops *ops; struct inet_request_sock *ireq; struct tcp_request_sock *treq; @@ -12004,16 +12011,17 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct sk_buff *skb, struct sock *sk, __bpf_kfunc_end_defs(); -int bpf_dynptr_from_skb_rdonly(struct sk_buff *skb, u64 flags, - struct bpf_dynptr_kern *ptr__uninit) +int bpf_dynptr_from_skb_rdonly(struct __sk_buff *skb, u64 flags, + struct bpf_dynptr *ptr__uninit) { + struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)ptr__uninit; int err; err = bpf_dynptr_from_skb(skb, flags, ptr__uninit); if (err) return err; - bpf_dynptr_set_rdonly(ptr__uninit); + bpf_dynptr_set_rdonly(ptr); return 0; } diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c index 1c2b6c1616b0..645b2c9f7867 100644 --- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -12,7 +12,7 @@ #define IP_OFFSET 0x1FFF #define NEXTHDR_FRAGMENT 44 -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h) return ip6h->nexthdr == NEXTHDR_FRAGMENT; } -static int handle_v4(struct sk_buff *skb) +static int handle_v4(struct __sk_buff *skb) { struct bpf_dynptr ptr; u8 iph_buf[20] = {}; @@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb) return NF_ACCEPT; } -static int handle_v6(struct sk_buff *skb) +static int handle_v6(struct __sk_buff *skb) { struct bpf_dynptr ptr; struct ipv6hdr *ip6h; @@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb) SEC("netfilter") int defrag(struct bpf_nf_ctx *ctx) { - struct sk_buff *skb = ctx->skb; + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; - switch (bpf_ntohs(skb->protocol)) { + switch (bpf_ntohs(ctx->skb->protocol)) { case ETH_P_IP: return handle_v4(skb); case ETH_P_IPV6: diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c index 65bba330e7e5..ab9f9f2620ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c @@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx) return NF_ACCEPT; } -extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, +extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym; extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, void *buffer, uint32_t buffer__sz) __ksym; @@ -90,8 +90,8 @@ __success __failure_unpriv __retval(0) int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) { + struct __sk_buff *skb = (struct __sk_buff *)ctx->skb; const struct nf_hook_state *state = ctx->state; - struct sk_buff *skb = ctx->skb; const struct iphdr *iph; const struct tcphdr *th; u8 buffer_iph[20] = {}; @@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx) struct bpf_dynptr ptr; uint8_t ihl; - if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) + if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr)) return NF_ACCEPT; iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph)); -- cgit v1.2.3 From 7fc45cb68696c7213c484ec81892bc8a986fde52 Mon Sep 17 00:00:00 2001 From: Shradha Gupta Date: Mon, 10 Jun 2024 03:28:39 -0700 Subject: net: mana: Allow variable size indirection table Allow variable size indirection table allocation in MANA instead of using a constant value MANA_INDIRECT_TABLE_SIZE. The size is now derived from the MANA_QUERY_VPORT_CONFIG and the indirection table is allocated dynamically. Signed-off-by: Shradha Gupta Link: https://lore.kernel.org/r/1718015319-9609-1-git-send-email-shradhagupta@linux.microsoft.com Reviewed-by: Dexuan Cui Reviewed-by: Haiyang Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mana/qp.c | 10 +-- drivers/net/ethernet/microsoft/mana/mana_en.c | 85 ++++++++++++++++++---- drivers/net/ethernet/microsoft/mana/mana_ethtool.c | 27 +++++-- include/net/mana/gdma.h | 4 +- include/net/mana/mana.h | 9 ++- 5 files changed, 104 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c index ba13c5abf8ef..2d411a16a127 100644 --- a/drivers/infiniband/hw/mana/qp.c +++ b/drivers/infiniband/hw/mana/qp.c @@ -21,7 +21,7 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, gc = mdev_to_gc(dev); - req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_SIZE); + req_buf_size = struct_size(req, indir_tab, MANA_INDIRECT_TABLE_DEF_SIZE); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -41,18 +41,18 @@ static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, if (log_ind_tbl_size) req->rss_enable = true; - req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; + req->num_indir_entries = MANA_INDIRECT_TABLE_DEF_SIZE; req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, indir_tab); req->update_indir_tab = true; req->cqe_coalescing_enable = 1; /* The ind table passed to the hardware must have - * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb + * MANA_INDIRECT_TABLE_DEF_SIZE entries. Adjust the verb * ind_table to MANA_INDIRECT_TABLE_SIZE if required */ ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < MANA_INDIRECT_TABLE_DEF_SIZE; i++) { req->indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, req->indir_tab[i]); @@ -137,7 +137,7 @@ static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, } ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size; - if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) { + if (ind_tbl_size > MANA_INDIRECT_TABLE_DEF_SIZE) { ibdev_dbg(&mdev->ib_dev, "Indirect table size %d exceeding limit\n", ind_tbl_size); diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d087cf954f75..b89ad4afd66e 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -481,7 +481,7 @@ static int mana_get_tx_queue(struct net_device *ndev, struct sk_buff *skb, struct sock *sk = skb->sk; int txq; - txq = apc->indir_table[hash & MANA_INDIRECT_TABLE_MASK]; + txq = apc->indir_table[hash & (apc->indir_table_sz - 1)]; if (txq != old_q && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -721,6 +721,13 @@ static void mana_cleanup_port_context(struct mana_port_context *apc) apc->rxqs = NULL; } +static void mana_cleanup_indir_table(struct mana_port_context *apc) +{ + apc->indir_table_sz = 0; + kfree(apc->indir_table); + kfree(apc->rxobj_table); +} + static int mana_init_port_context(struct mana_port_context *apc) { apc->rxqs = kcalloc(apc->num_queues, sizeof(struct mana_rxq *), @@ -962,7 +969,16 @@ static int mana_query_vport_cfg(struct mana_port_context *apc, u32 vport_index, *max_sq = resp.max_num_sq; *max_rq = resp.max_num_rq; - *num_indir_entry = resp.num_indirection_ent; + if (resp.num_indirection_ent > 0 && + resp.num_indirection_ent <= MANA_INDIRECT_TABLE_MAX_SIZE && + is_power_of_2(resp.num_indirection_ent)) { + *num_indir_entry = resp.num_indirection_ent; + } else { + netdev_warn(apc->ndev, + "Setting indirection table size to default %d for vPort %d\n", + MANA_INDIRECT_TABLE_DEF_SIZE, apc->port_idx); + *num_indir_entry = MANA_INDIRECT_TABLE_DEF_SIZE; + } apc->port_handle = resp.vport; ether_addr_copy(apc->mac_addr, resp.mac_addr); @@ -1054,14 +1070,13 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, bool update_default_rxobj, bool update_key, bool update_tab) { - u16 num_entries = MANA_INDIRECT_TABLE_SIZE; struct mana_cfg_rx_steer_req_v2 *req; struct mana_cfg_rx_steer_resp resp = {}; struct net_device *ndev = apc->ndev; u32 req_buf_size; int err; - req_buf_size = struct_size(req, indir_tab, num_entries); + req_buf_size = struct_size(req, indir_tab, apc->indir_table_sz); req = kzalloc(req_buf_size, GFP_KERNEL); if (!req) return -ENOMEM; @@ -1072,7 +1087,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, req->hdr.req.msg_version = GDMA_MESSAGE_V2; req->vport = apc->port_handle; - req->num_indir_entries = num_entries; + req->num_indir_entries = apc->indir_table_sz; req->indir_tab_offset = offsetof(struct mana_cfg_rx_steer_req_v2, indir_tab); req->rx_enable = rx; @@ -1111,7 +1126,7 @@ static int mana_cfg_vport_steering(struct mana_port_context *apc, } netdev_info(ndev, "Configured steering vPort %llu entries %u\n", - apc->port_handle, num_entries); + apc->port_handle, apc->indir_table_sz); out: kfree(req); return err; @@ -2344,11 +2359,33 @@ static int mana_create_vport(struct mana_port_context *apc, return mana_create_txq(apc, net); } +static int mana_rss_table_alloc(struct mana_port_context *apc) +{ + if (!apc->indir_table_sz) { + netdev_err(apc->ndev, + "Indirection table size not set for vPort %d\n", + apc->port_idx); + return -EINVAL; + } + + apc->indir_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!apc->indir_table) + return -ENOMEM; + + apc->rxobj_table = kcalloc(apc->indir_table_sz, sizeof(mana_handle_t), GFP_KERNEL); + if (!apc->rxobj_table) { + kfree(apc->indir_table); + return -ENOMEM; + } + + return 0; +} + static void mana_rss_table_init(struct mana_port_context *apc) { int i; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = ethtool_rxfh_indir_default(i, apc->num_queues); } @@ -2361,7 +2398,7 @@ int mana_config_rss(struct mana_port_context *apc, enum TRI_STATE rx, int i; if (update_tab) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { queue_idx = apc->indir_table[i]; apc->rxobj_table[i] = apc->rxqs[queue_idx]->rxobj; } @@ -2466,7 +2503,6 @@ static int mana_init_port(struct net_device *ndev) struct mana_port_context *apc = netdev_priv(ndev); u32 max_txq, max_rxq, max_queues; int port_idx = apc->port_idx; - u32 num_indirect_entries; int err; err = mana_init_port_context(apc); @@ -2474,7 +2510,7 @@ static int mana_init_port(struct net_device *ndev) return err; err = mana_query_vport_cfg(apc, port_idx, &max_txq, &max_rxq, - &num_indirect_entries); + &apc->indir_table_sz); if (err) { netdev_err(ndev, "Failed to query info for vPort %d\n", port_idx); @@ -2723,6 +2759,10 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, if (err) goto free_net; + err = mana_rss_table_alloc(apc); + if (err) + goto reset_apc; + netdev_lockdep_set_classes(ndev); ndev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; @@ -2739,11 +2779,13 @@ static int mana_probe_port(struct mana_context *ac, int port_idx, err = register_netdev(ndev); if (err) { netdev_err(ndev, "Unable to register netdev.\n"); - goto reset_apc; + goto free_indir; } return 0; +free_indir: + mana_cleanup_indir_table(apc); reset_apc: kfree(apc->rxqs); apc->rxqs = NULL; @@ -2872,16 +2914,30 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); - if (err) + /* we log the port for which the probe failed and stop + * probes for subsequent ports. + * Note that we keep running ports, for which the probes + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Probe Failed for port %d\n", i); break; + } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); - if (err) + /* we log the port for which the attach failed and stop + * attach for subsequent ports + * Note that we keep running ports, for which the attach + * were successful, unless add_adev fails too + */ + if (err) { + dev_err(dev, "Attach Failed for port %d\n", i); break; + } } } @@ -2897,6 +2953,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; + struct mana_port_context *apc; struct device *dev = gc->dev; struct net_device *ndev; int err; @@ -2908,6 +2965,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) for (i = 0; i < ac->num_ports; i++) { ndev = ac->ports[i]; + apc = netdev_priv(ndev); if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); @@ -2931,6 +2989,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) } unregister_netdevice(ndev); + mana_cleanup_indir_table(apc); rtnl_unlock(); diff --git a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c index ab2413d71f6c..146d5db1792f 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_ethtool.c +++ b/drivers/net/ethernet/microsoft/mana/mana_ethtool.c @@ -245,7 +245,9 @@ static u32 mana_get_rxfh_key_size(struct net_device *ndev) static u32 mana_rss_indir_size(struct net_device *ndev) { - return MANA_INDIRECT_TABLE_SIZE; + struct mana_port_context *apc = netdev_priv(ndev); + + return apc->indir_table_sz; } static int mana_get_rxfh(struct net_device *ndev, @@ -257,7 +259,7 @@ static int mana_get_rxfh(struct net_device *ndev, rxfh->hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) rxfh->indir[i] = apc->indir_table[i]; } @@ -273,8 +275,8 @@ static int mana_set_rxfh(struct net_device *ndev, { struct mana_port_context *apc = netdev_priv(ndev); bool update_hash = false, update_table = false; - u32 save_table[MANA_INDIRECT_TABLE_SIZE]; u8 save_key[MANA_HASH_KEY_SIZE]; + u32 *save_table; int i, err; if (!apc->port_is_up) @@ -284,13 +286,19 @@ static int mana_set_rxfh(struct net_device *ndev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + save_table = kcalloc(apc->indir_table_sz, sizeof(u32), GFP_KERNEL); + if (!save_table) + return -ENOMEM; + if (rxfh->indir) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) - if (rxfh->indir[i] >= apc->num_queues) - return -EINVAL; + for (i = 0; i < apc->indir_table_sz; i++) + if (rxfh->indir[i] >= apc->num_queues) { + err = -EINVAL; + goto cleanup; + } update_table = true; - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + for (i = 0; i < apc->indir_table_sz; i++) { save_table[i] = apc->indir_table[i]; apc->indir_table[i] = rxfh->indir[i]; } @@ -306,7 +314,7 @@ static int mana_set_rxfh(struct net_device *ndev, if (err) { /* recover to original values */ if (update_table) { - for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) + for (i = 0; i < apc->indir_table_sz; i++) apc->indir_table[i] = save_table[i]; } @@ -316,6 +324,9 @@ static int mana_set_rxfh(struct net_device *ndev, mana_config_rss(apc, TRI_STATE_TRUE, update_hash, update_table); } +cleanup: + kfree(save_table); + return err; } diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 27684135bb4d..c547756c4284 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -543,11 +543,13 @@ enum { */ #define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) #define GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG BIT(3) +#define GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT BIT(5) #define GDMA_DRV_CAP_FLAGS1 \ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX | \ - GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG) + GDMA_DRV_CAP_FLAG_1_HWC_TIMEOUT_RECONFIG | \ + GDMA_DRV_CAP_FLAG_1_VARIABLE_INDIRECTION_TABLE_SUPPORT) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 561f6719fb4e..59823901b74f 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -30,8 +30,8 @@ enum TRI_STATE { }; /* Number of entries for hardware indirection table must be in power of 2 */ -#define MANA_INDIRECT_TABLE_SIZE 64 -#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) +#define MANA_INDIRECT_TABLE_MAX_SIZE 512 +#define MANA_INDIRECT_TABLE_DEF_SIZE 64 /* The Toeplitz hash key's length in bytes: should be multiple of 8 */ #define MANA_HASH_KEY_SIZE 40 @@ -410,10 +410,11 @@ struct mana_port_context { struct mana_tx_qp *tx_qp; /* Indirection Table for RX & TX. The values are queue indexes */ - u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + u32 *indir_table; + u32 indir_table_sz; /* Indirection table containing RxObject Handles */ - mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + mana_handle_t *rxobj_table; /* Hash key used by the NIC */ u8 hashkey[MANA_HASH_KEY_SIZE]; -- cgit v1.2.3 From b975d3ee5962237c1e2f5d5aeeaaf0dc2173486c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:39 +0200 Subject: net: add and use skb_get_hash_net Years ago flow dissector gained ability to delegate flow dissection to a bpf program, scoped per netns. Unfortunately, skb_get_hash() only gets an sk_buff argument instead of both net+skb. This means the flow dissector needs to obtain the netns pointer from somewhere else. The netns is derived from skb->dev, and if that is not available, from skb->sk. If neither is set, we hit a (benign) WARN_ON_ONCE(). Trying both dev and sk covers most cases, but not all, as recently reported by Christoph Paasch. In case of nf-generated tcp reset, both sk and dev are NULL: WARNING: .. net/core/flow_dissector.c:1104 skb_flow_dissect_flow_keys include/linux/skbuff.h:1536 [inline] skb_get_hash include/linux/skbuff.h:1578 [inline] nft_trace_init+0x7d/0x120 net/netfilter/nf_tables_trace.c:320 nft_do_chain+0xb26/0xb90 net/netfilter/nf_tables_core.c:268 nft_do_chain_ipv4+0x7a/0xa0 net/netfilter/nft_chain_filter.c:23 nf_hook_slow+0x57/0x160 net/netfilter/core.c:626 __ip_local_out+0x21d/0x260 net/ipv4/ip_output.c:118 ip_local_out+0x26/0x1e0 net/ipv4/ip_output.c:127 nf_send_reset+0x58c/0x700 net/ipv4/netfilter/nf_reject_ipv4.c:308 nft_reject_ipv4_eval+0x53/0x90 net/ipv4/netfilter/nft_reject_ipv4.c:30 [..] syzkaller did something like this: table inet filter { chain input { type filter hook input priority filter; policy accept; meta nftrace set 1 tcp dport 42 reject with tcp reset } chain output { type filter hook output priority filter; policy accept; # empty chain is enough } } ... then sends a tcp packet to port 42. Initial attempt to simply set skb->dev from nf_reject_ipv4 doesn't cover all cases: skbs generated via ipv4 igmp_send_report trigger similar splat. Moreover, Pablo Neira found that nft_hash.c uses __skb_get_hash_symmetric() which would trigger same warn splat for such skbs. Lets allow callers to pass the current netns explicitly. The nf_trace infrastructure is adjusted to use the new helper. __skb_get_hash_symmetric is handled in the next patch. Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/494 Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-2-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 12 ++++++++++-- net/core/flow_dissector.c | 15 +++++++++++---- net/netfilter/nf_tables_trace.c | 2 +- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index fe7d8dbef77e..6e78019f899a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,7 +1498,7 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } -void __skb_get_hash(struct sk_buff *skb); +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, @@ -1578,10 +1578,18 @@ void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); +static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) +{ + if (!skb->l4_hash && !skb->sw_hash) + __skb_get_hash_net(net, skb); + + return skb->hash; +} + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) - __skb_get_hash(skb); + __skb_get_hash_net(NULL, skb); return skb->hash; } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 59fe46077b3c..702b4f0a70b6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1860,7 +1860,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); /** - * __skb_get_hash: calculate a flow hash + * __skb_get_hash_net: calculate a flow hash + * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to calculate flow hash from * * This function calculates a flow hash based on src/dst addresses @@ -1868,18 +1869,24 @@ EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ -void __skb_get_hash(struct sk_buff *skb) +void __skb_get_hash_net(const struct net *net, struct sk_buff *skb) { struct flow_keys keys; u32 hash; + memset(&keys, 0, sizeof(keys)); + + __skb_flow_dissect(net, skb, &flow_keys_dissector, + &keys, NULL, 0, 0, 0, + FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + __flow_hash_secret_init(); - hash = ___skb_get_hash(skb, &keys, &hashrnd); + hash = __flow_hash_from_keys(&keys, &hashrnd); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } -EXPORT_SYMBOL(__skb_get_hash); +EXPORT_SYMBOL(__skb_get_hash_net); __u32 skb_get_hash_perturb(const struct sk_buff *skb, const siphash_key_t *perturb) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index a83637e3f455..580c55268f65 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -317,7 +317,7 @@ void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt, net_get_random_once(&trace_key, sizeof(trace_key)); info->skbid = (u32)siphash_3u32(hash32_ptr(skb), - skb_get_hash(skb), + skb_get_hash_net(nft_net(pkt), skb), skb->skb_iif, &trace_key); } -- cgit v1.2.3 From d1dab4f71d372e00e2d34a9c32bf261623e3a95c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 9 Jun 2024 00:10:40 +0200 Subject: net: add and use __skb_get_hash_symmetric_net Similar to previous patch: apply same logic for __skb_get_hash_symmetric and let callers pass the netns to the dissector core. Existing function is turned into a wrapper to avoid adjusting all callers, nft_hash.c uses new function. Reviewed-by: Willem de Bruijn Signed-off-by: Florian Westphal Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/20240608221057.16070-3-fw@strlen.de Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 8 +++++++- net/core/flow_dissector.c | 6 +++--- net/netfilter/nft_hash.c | 3 ++- 3 files changed, 12 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6e78019f899a..813406a9bd6c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1498,8 +1498,14 @@ __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) __skb_set_hash(skb, hash, true, is_l4); } +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); + +static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +{ + return __skb_get_hash_symmetric_net(NULL, skb); +} + void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); -u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 702b4f0a70b6..e479790db0f7 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1845,19 +1845,19 @@ EXPORT_SYMBOL(make_flow_keys_digest); static struct flow_dissector flow_keys_dissector_symmetric __read_mostly; -u32 __skb_get_hash_symmetric(const struct sk_buff *skb) +u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb) { struct flow_keys keys; __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); - __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, + __skb_flow_dissect(net, skb, &flow_keys_dissector_symmetric, &keys, NULL, 0, 0, 0, 0); return __flow_hash_from_keys(&keys, &hashrnd); } -EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric); +EXPORT_SYMBOL_GPL(__skb_get_hash_symmetric_net); /** * __skb_get_hash_net: calculate a flow hash diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 92d47e469204..868d68302d22 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -51,7 +51,8 @@ static void nft_symhash_eval(const struct nft_expr *expr, struct sk_buff *skb = pkt->skb; u32 h; - h = reciprocal_scale(__skb_get_hash_symmetric(skb), priv->modulus); + h = reciprocal_scale(__skb_get_hash_symmetric_net(nft_net(pkt), skb), + priv->modulus); regs->data[priv->dreg] = h + priv->offset; } -- cgit v1.2.3 From 3e453ca122d483eb519f934b6624215f0536301c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:53 +0200 Subject: net: ipv4,ipv6: Pass multipath hash computation through a helper The following patches will add a sysctl to control multipath hash seed. In order to centralize the hash computation, add a helper, fib_multipath_hash_from_keys(), and have all IPv4 and IPv6 route.c invocations of flow_hash_from_keys() go through this helper instead. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-2-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- include/net/ip_fib.h | 7 +++++++ net/ipv4/route.c | 12 ++++++------ net/ipv6/route.c | 12 ++++++------ 3 files changed, 19 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9b2f69ba5e49..b8b3c07e8f7b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -521,6 +521,13 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); #endif + +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + return flow_hash_from_keys(keys); +} + int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); void fib_select_multipath(struct fib_result *res, int hash); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb0bdf34ed50..54512acbead7 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1923,7 +1923,7 @@ static u32 fib_multipath_custom_hash_outer(const struct net *net, hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_inner(const struct net *net, @@ -1972,7 +1972,7 @@ static u32 fib_multipath_custom_hash_inner(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_skb(const struct net *net, @@ -2009,7 +2009,7 @@ static u32 fib_multipath_custom_hash_fl4(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl4 can be NULL */ @@ -2030,7 +2030,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: /* skb is currently provided only when forwarding */ @@ -2064,7 +2064,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2095,7 +2095,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad5fff5a210c..1916de615398 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2372,7 +2372,7 @@ static u32 rt6_multipath_custom_hash_outer(const struct net *net, hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 rt6_multipath_custom_hash_inner(const struct net *net, @@ -2421,7 +2421,7 @@ static u32 rt6_multipath_custom_hash_inner(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 rt6_multipath_custom_hash_skb(const struct net *net, @@ -2460,7 +2460,7 @@ static u32 rt6_multipath_custom_hash_fl6(const struct net *net, if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl6->fl6_dport; - return flow_hash_from_keys(&hash_keys); + return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl6 can be NULL */ @@ -2482,7 +2482,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: if (skb) { @@ -2514,7 +2514,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.ports.dst = fl6->fl6_dport; hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -2551,7 +2551,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); hash_keys.basic.ip_proto = fl6->flowi6_proto; } - mhash = flow_hash_from_keys(&hash_keys); + mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) -- cgit v1.2.3 From 4ee2a8cace3fb9a34aea6a56426f89d26dd514f3 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 7 Jun 2024 17:13:54 +0200 Subject: net: ipv4: Add a sysctl to set multipath hash seed When calculating hashes for the purpose of multipath forwarding, both IPv4 and IPv6 code currently fall back on flow_hash_from_keys(). That uses a randomly-generated seed. That's a fine choice by default, but unfortunately some deployments may need a tighter control over the seed used. In this patch, make the seed configurable by adding a new sysctl key, net.ipv4.fib_multipath_hash_seed to control the seed. This seed is used specifically for multipath forwarding and not for the other concerns that flow_hash_from_keys() is used for, such as queue selection. Expose the knob as sysctl because other such settings, such as headers to hash, are also handled that way. Like those, the multipath hash seed is a per-netns variable. Despite being placed in the net.ipv4 namespace, the multipath seed sysctl is used for both IPv4 and IPv6, similarly to e.g. a number of TCP variables. The seed used by flow_hash_from_keys() is a 128-bit quantity. However it seems that usually the seed is a much more modest value. 32 bits seem typical (Cisco, Cumulus), some systems go even lower. For that reason, and to decouple the user interface from implementation details, go with a 32-bit quantity, which is then quadruplicated to form the siphash key. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240607151357.421181-3-petrm@nvidia.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 14 ++++++++ include/net/flow_dissector.h | 2 ++ include/net/ip_fib.h | 23 +++++++++++- include/net/netns/ipv4.h | 8 +++++ net/core/flow_dissector.c | 7 ++++ net/ipv4/sysctl_net_ipv4.c | 66 ++++++++++++++++++++++++++++++++++ 6 files changed, 119 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 6e99eccdb837..3616389c8c2d 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -131,6 +131,20 @@ fib_multipath_hash_fields - UNSIGNED INTEGER Default: 0x0007 (source IP, destination IP and IP protocol) +fib_multipath_hash_seed - UNSIGNED INTEGER + The seed value used when calculating hash for multipath routes. Applies + to both IPv4 and IPv6 datapath. Only present for kernels built with + CONFIG_IP_ROUTE_MULTIPATH enabled. + + When set to 0, the seed value used for multipath routing defaults to an + internal random-generated one. + + The actual hashing algorithm is not specified -- there is no guarantee + that a next hop distribution effected by a given seed will keep stable + across kernel versions. + + Default: 0 (random) + fib_sync_mem - UNSIGNED INTEGER Amount of dirty memory from fib entries that can be backlogged before synchronize_rcu is forced. diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 99626475c3f4..3e47e123934d 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -442,6 +442,8 @@ static inline bool flow_keys_have_l4(const struct flow_keys *keys) } u32 flow_hash_from_keys(struct flow_keys *keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval); void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, const void *data, int thoff, int hlen); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b8b3c07e8f7b..6e7984bfb986 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -520,13 +520,34 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); -#endif + +static void +fib_multipath_hash_construct_key(siphash_key_t *key, u32 mp_seed) +{ + u64 mp_seed_64 = mp_seed; + + key->key[0] = (mp_seed_64 << 32) | mp_seed_64; + key->key[1] = key->key[0]; +} +static inline u32 fib_multipath_hash_from_keys(const struct net *net, + struct flow_keys *keys) +{ + siphash_aligned_key_t hash_key; + u32 mp_seed; + + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + fib_multipath_hash_construct_key(&hash_key, mp_seed); + + return flow_hash_from_keys_seed(keys, &hash_key); +} +#else static inline u32 fib_multipath_hash_from_keys(const struct net *net, struct flow_keys *keys) { return flow_hash_from_keys(keys); } +#endif int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index a91bb971f901..5fcd61ada622 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,6 +40,13 @@ struct inet_timewait_death_row { struct tcp_fastopen_context; +#ifdef CONFIG_IP_ROUTE_MULTIPATH +struct sysctl_fib_multipath_hash_seed { + u32 user_seed; + u32 mp_seed; +}; +#endif + struct netns_ipv4 { /* Cacheline organization can be found documented in * Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst. @@ -246,6 +253,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + struct sysctl_fib_multipath_hash_seed sysctl_fib_multipath_hash_seed; u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e479790db0f7..e64a26379807 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -1806,6 +1806,13 @@ u32 flow_hash_from_keys(struct flow_keys *keys) } EXPORT_SYMBOL(flow_hash_from_keys); +u32 flow_hash_from_keys_seed(struct flow_keys *keys, + const siphash_key_t *keyval) +{ + return __flow_hash_from_keys(keys, keyval); +} +EXPORT_SYMBOL(flow_hash_from_keys_seed); + static inline u32 ___skb_get_hash(const struct sk_buff *skb, struct flow_keys *keys, const siphash_key_t *keyval) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bb64c0ef092d..9140d20eb2d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -464,6 +464,61 @@ static int proc_fib_multipath_hash_fields(struct ctl_table *table, int write, return ret; } + +static u32 proc_fib_multipath_hash_rand_seed __ro_after_init; + +static void proc_fib_multipath_hash_init_rand_seed(void) +{ + get_random_bytes(&proc_fib_multipath_hash_rand_seed, + sizeof(proc_fib_multipath_hash_rand_seed)); +} + +static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) +{ + struct sysctl_fib_multipath_hash_seed new = { + .user_seed = user_seed, + .mp_seed = (user_seed ? user_seed : + proc_fib_multipath_hash_rand_seed), + }; + + WRITE_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed, new); +} + +static int proc_fib_multipath_hash_seed(struct ctl_table *table, int write, + void *buffer, size_t *lenp, + loff_t *ppos) +{ + struct sysctl_fib_multipath_hash_seed *mphs; + struct net *net = table->data; + struct ctl_table tmp; + u32 user_seed; + int ret; + + mphs = &net->ipv4.sysctl_fib_multipath_hash_seed; + user_seed = mphs->user_seed; + + tmp = *table; + tmp.data = &user_seed; + + ret = proc_douintvec_minmax(&tmp, write, buffer, lenp, ppos); + + if (write && ret == 0) { + proc_fib_multipath_hash_set_seed(net, user_seed); + call_netevent_notifiers(NETEVENT_IPV4_MPATH_HASH_UPDATE, net); + } + + return ret; +} +#else + +static void proc_fib_multipath_hash_init_rand_seed(void) +{ +} + +static void proc_fib_multipath_hash_set_seed(struct net *net, u32 user_seed) +{ +} + #endif static struct ctl_table ipv4_table[] = { @@ -1072,6 +1127,13 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ONE, .extra2 = &fib_multipath_hash_fields_all_mask, }, + { + .procname = "fib_multipath_hash_seed", + .data = &init_net, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_fib_multipath_hash_seed, + }, #endif { .procname = "ip_unprivileged_port_start", @@ -1550,6 +1612,8 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; + proc_fib_multipath_hash_set_seed(net, 0); + return 0; err_ports: @@ -1584,6 +1648,8 @@ static __init int sysctl_ipv4_init(void) if (!hdr) return -ENOMEM; + proc_fib_multipath_hash_init_rand_seed(); + if (register_pernet_subsys(&ipv4_sysctl_ops)) { unregister_net_sysctl_table(hdr); return -ENOMEM; -- cgit v1.2.3 From b48a1540b73a3c3a9ef59ce34176cc8180c6ce57 Mon Sep 17 00:00:00 2001 From: Asbjørn Sloth Tønnesen Date: Sun, 9 Jun 2024 17:33:51 +0000 Subject: flow_offload: add encapsulation control flag helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds two new helper functions: flow_rule_is_supp_enc_control_flags() flow_rule_has_enc_control_flags() They are intended to be used for validating encapsulation control flags, and compliment the similar helpers without "enc_" in the name. The only difference is that they have their own error message, to make it obvious if an unsupported flag error is related to FLOW_DISSECTOR_KEY_CONTROL or FLOW_DISSECTOR_KEY_ENC_CONTROL. flow_rule_has_enc_control_flags() is for drivers supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, but not supporting any encapsulation control flags. (Currently all 4 drivers fits this category) flow_rule_is_supp_enc_control_flags() is currently only used for the above helper, but should also be used by drivers once they implement at least one encapsulation control flag. There is AFAICT currently no need for an "enc_" variant of flow_rule_match_has_control_flags(), as all drivers currently supporting FLOW_DISSECTOR_KEY_ENC_CONTROL, are already calling flow_rule_match_enc_control() directly. Only compile tested. Signed-off-by: Asbjørn Sloth Tønnesen Reviewed-by: Davide Caratti Link: https://lore.kernel.org/r/20240609173358.193178-2-ast@fiberby.net Signed-off-by: Jakub Kicinski --- include/net/flow_offload.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index ec9f80509f60..292cd8f4b762 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -471,6 +471,28 @@ static inline bool flow_rule_is_supp_control_flags(const u32 supp_flags, return false; } +/** + * flow_rule_is_supp_enc_control_flags() - check for supported control flags + * @supp_enc_flags: encapsulation control flags supported by driver + * @enc_ctrl_flags: encapsulation control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if only supported control flags are set, false otherwise. + */ +static inline bool flow_rule_is_supp_enc_control_flags(const u32 supp_enc_flags, + const u32 enc_ctrl_flags, + struct netlink_ext_ack *extack) +{ + if (likely((enc_ctrl_flags & ~supp_enc_flags) == 0)) + return true; + + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported match on enc_control.flags %#x", + enc_ctrl_flags); + + return false; +} + /** * flow_rule_has_control_flags() - check for presence of any control flags * @ctrl_flags: control flags present in rule @@ -484,6 +506,19 @@ static inline bool flow_rule_has_control_flags(const u32 ctrl_flags, return !flow_rule_is_supp_control_flags(0, ctrl_flags, extack); } +/** + * flow_rule_has_enc_control_flags() - check for presence of any control flags + * @enc_ctrl_flags: encapsulation control flags present in rule + * @extack: The netlink extended ACK for reporting errors. + * + * Return: true if control flags are set, false otherwise. + */ +static inline bool flow_rule_has_enc_control_flags(const u32 enc_ctrl_flags, + struct netlink_ext_ack *extack) +{ + return !flow_rule_is_supp_enc_control_flags(0, enc_ctrl_flags, extack); +} + /** * flow_rule_match_has_control_flags() - match and check for any control flags * @rule: The flow_rule under evaluation. -- cgit v1.2.3 From 80bbd1c355d661678d2a25bd36e739b6925e7a4e Mon Sep 17 00:00:00 2001 From: Luo Jie Date: Wed, 5 Jun 2024 20:45:39 +0800 Subject: dt-bindings: clock: add qca8386/qca8084 clock and reset definitions QCA8386/QCA8084 includes the clock & reset controller that is accessed by MDIO bus. Two work modes are supported, qca8386 works as switch mode, qca8084 works as PHY mode. Reviewed-by: Rob Herring Signed-off-by: Luo Jie Link: https://lore.kernel.org/r/20240605124541.2711467-3-quic_luoj@quicinc.com Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,qca8k-nsscc.yaml | 86 ++++++++++++++++++ include/dt-bindings/clock/qcom,qca8k-nsscc.h | 101 +++++++++++++++++++++ include/dt-bindings/reset/qcom,qca8k-nsscc.h | 76 ++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml create mode 100644 include/dt-bindings/clock/qcom,qca8k-nsscc.h create mode 100644 include/dt-bindings/reset/qcom,qca8k-nsscc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml b/Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml new file mode 100644 index 000000000000..61473385da2d --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,qca8k-nsscc.yaml @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,qca8k-nsscc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm NSS Clock & Reset Controller on QCA8386/QCA8084 + +maintainers: + - Bjorn Andersson + - Luo Jie + +description: | + Qualcomm NSS clock control module provides the clocks and resets + on QCA8386(switch mode)/QCA8084(PHY mode) + + See also:: + include/dt-bindings/clock/qcom,qca8k-nsscc.h + include/dt-bindings/reset/qcom,qca8k-nsscc.h + +properties: + compatible: + oneOf: + - const: qcom,qca8084-nsscc + - items: + - enum: + - qcom,qca8082-nsscc + - qcom,qca8085-nsscc + - qcom,qca8384-nsscc + - qcom,qca8385-nsscc + - qcom,qca8386-nsscc + - const: qcom,qca8084-nsscc + + clocks: + items: + - description: Chip reference clock source + - description: UNIPHY0 RX 312P5M/125M clock source + - description: UNIPHY0 TX 312P5M/125M clock source + - description: UNIPHY1 RX 312P5M/125M clock source + - description: UNIPHY1 TX 312P5M/125M clock source + - description: UNIPHY1 RX 312P5M clock source + - description: UNIPHY1 TX 312P5M clock source + + reg: + items: + - description: MDIO bus address for Clock & Reset Controller register + + reset-gpios: + description: GPIO connected to the chip + maxItems: 1 + +required: + - compatible + - clocks + - reg + - reset-gpios + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + mdio { + #address-cells = <1>; + #size-cells = <0>; + + clock-controller@18 { + compatible = "qcom,qca8084-nsscc"; + reg = <0x18>; + reset-gpios = <&tlmm 51 GPIO_ACTIVE_LOW>; + clocks = <&pcs0_pll>, + <&qca8k_uniphy0_rx>, + <&qca8k_uniphy0_tx>, + <&qca8k_uniphy1_rx>, + <&qca8k_uniphy1_tx>, + <&qca8k_uniphy1_rx312p5m>, + <&qca8k_uniphy1_tx312p5m>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; + }; +... diff --git a/include/dt-bindings/clock/qcom,qca8k-nsscc.h b/include/dt-bindings/clock/qcom,qca8k-nsscc.h new file mode 100644 index 000000000000..0ac3e4c69a1a --- /dev/null +++ b/include/dt-bindings/clock/qcom,qca8k-nsscc.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_QCA8K_NSS_CC_H +#define _DT_BINDINGS_CLK_QCOM_QCA8K_NSS_CC_H + +#define NSS_CC_SWITCH_CORE_CLK_SRC 0 +#define NSS_CC_SWITCH_CORE_CLK 1 +#define NSS_CC_APB_BRIDGE_CLK 2 +#define NSS_CC_MAC0_TX_CLK_SRC 3 +#define NSS_CC_MAC0_TX_DIV_CLK_SRC 4 +#define NSS_CC_MAC0_TX_CLK 5 +#define NSS_CC_MAC0_TX_SRDS1_CLK 6 +#define NSS_CC_MAC0_RX_CLK_SRC 7 +#define NSS_CC_MAC0_RX_DIV_CLK_SRC 8 +#define NSS_CC_MAC0_RX_CLK 9 +#define NSS_CC_MAC0_RX_SRDS1_CLK 10 +#define NSS_CC_MAC1_TX_CLK_SRC 11 +#define NSS_CC_MAC1_TX_DIV_CLK_SRC 12 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_DIV_CLK_SRC 13 +#define NSS_CC_MAC1_SRDS1_CH0_RX_CLK 14 +#define NSS_CC_MAC1_TX_CLK 15 +#define NSS_CC_MAC1_GEPHY0_TX_CLK 16 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_CLK 17 +#define NSS_CC_MAC1_RX_CLK_SRC 18 +#define NSS_CC_MAC1_RX_DIV_CLK_SRC 19 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_DIV_CLK_SRC 20 +#define NSS_CC_MAC1_SRDS1_CH0_TX_CLK 21 +#define NSS_CC_MAC1_RX_CLK 22 +#define NSS_CC_MAC1_GEPHY0_RX_CLK 23 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_CLK 24 +#define NSS_CC_MAC2_TX_CLK_SRC 25 +#define NSS_CC_MAC2_TX_DIV_CLK_SRC 26 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_DIV_CLK_SRC 27 +#define NSS_CC_MAC2_SRDS1_CH1_RX_CLK 28 +#define NSS_CC_MAC2_TX_CLK 29 +#define NSS_CC_MAC2_GEPHY1_TX_CLK 30 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_CLK 31 +#define NSS_CC_MAC2_RX_CLK_SRC 32 +#define NSS_CC_MAC2_RX_DIV_CLK_SRC 33 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_DIV_CLK_SRC 34 +#define NSS_CC_MAC2_SRDS1_CH1_TX_CLK 35 +#define NSS_CC_MAC2_RX_CLK 36 +#define NSS_CC_MAC2_GEPHY1_RX_CLK 37 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_CLK 38 +#define NSS_CC_MAC3_TX_CLK_SRC 39 +#define NSS_CC_MAC3_TX_DIV_CLK_SRC 40 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_DIV_CLK_SRC 41 +#define NSS_CC_MAC3_SRDS1_CH2_RX_CLK 42 +#define NSS_CC_MAC3_TX_CLK 43 +#define NSS_CC_MAC3_GEPHY2_TX_CLK 44 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_CLK 45 +#define NSS_CC_MAC3_RX_CLK_SRC 46 +#define NSS_CC_MAC3_RX_DIV_CLK_SRC 47 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_DIV_CLK_SRC 48 +#define NSS_CC_MAC3_SRDS1_CH2_TX_CLK 49 +#define NSS_CC_MAC3_RX_CLK 50 +#define NSS_CC_MAC3_GEPHY2_RX_CLK 51 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_CLK 52 +#define NSS_CC_MAC4_TX_CLK_SRC 53 +#define NSS_CC_MAC4_TX_DIV_CLK_SRC 54 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_DIV_CLK_SRC 55 +#define NSS_CC_MAC4_SRDS1_CH3_RX_CLK 56 +#define NSS_CC_MAC4_TX_CLK 57 +#define NSS_CC_MAC4_GEPHY3_TX_CLK 58 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_CLK 59 +#define NSS_CC_MAC4_RX_CLK_SRC 60 +#define NSS_CC_MAC4_RX_DIV_CLK_SRC 61 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_DIV_CLK_SRC 62 +#define NSS_CC_MAC4_SRDS1_CH3_TX_CLK 63 +#define NSS_CC_MAC4_RX_CLK 64 +#define NSS_CC_MAC4_GEPHY3_RX_CLK 65 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_CLK 66 +#define NSS_CC_MAC5_TX_CLK_SRC 67 +#define NSS_CC_MAC5_TX_DIV_CLK_SRC 68 +#define NSS_CC_MAC5_TX_SRDS0_CLK 69 +#define NSS_CC_MAC5_TX_CLK 70 +#define NSS_CC_MAC5_RX_CLK_SRC 71 +#define NSS_CC_MAC5_RX_DIV_CLK_SRC 72 +#define NSS_CC_MAC5_RX_SRDS0_CLK 73 +#define NSS_CC_MAC5_RX_CLK 74 +#define NSS_CC_MAC5_TX_SRDS0_CLK_SRC 75 +#define NSS_CC_MAC5_RX_SRDS0_CLK_SRC 76 +#define NSS_CC_AHB_CLK_SRC 77 +#define NSS_CC_AHB_CLK 78 +#define NSS_CC_SEC_CTRL_AHB_CLK 79 +#define NSS_CC_TLMM_CLK 80 +#define NSS_CC_TLMM_AHB_CLK 81 +#define NSS_CC_CNOC_AHB_CLK 82 +#define NSS_CC_MDIO_AHB_CLK 83 +#define NSS_CC_MDIO_MASTER_AHB_CLK 84 +#define NSS_CC_SYS_CLK_SRC 85 +#define NSS_CC_SRDS0_SYS_CLK 86 +#define NSS_CC_SRDS1_SYS_CLK 87 +#define NSS_CC_GEPHY0_SYS_CLK 88 +#define NSS_CC_GEPHY1_SYS_CLK 89 +#define NSS_CC_GEPHY2_SYS_CLK 90 +#define NSS_CC_GEPHY3_SYS_CLK 91 +#endif diff --git a/include/dt-bindings/reset/qcom,qca8k-nsscc.h b/include/dt-bindings/reset/qcom,qca8k-nsscc.h new file mode 100644 index 000000000000..c71167a3bd41 --- /dev/null +++ b/include/dt-bindings/reset/qcom,qca8k-nsscc.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_RESET_QCOM_QCA8K_NSS_CC_H +#define _DT_BINDINGS_RESET_QCOM_QCA8K_NSS_CC_H + +#define NSS_CC_SWITCH_CORE_ARES 1 +#define NSS_CC_APB_BRIDGE_ARES 2 +#define NSS_CC_MAC0_TX_ARES 3 +#define NSS_CC_MAC0_TX_SRDS1_ARES 4 +#define NSS_CC_MAC0_RX_ARES 5 +#define NSS_CC_MAC0_RX_SRDS1_ARES 6 +#define NSS_CC_MAC1_SRDS1_CH0_RX_ARES 7 +#define NSS_CC_MAC1_TX_ARES 8 +#define NSS_CC_MAC1_GEPHY0_TX_ARES 9 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_RX_ARES 10 +#define NSS_CC_MAC1_SRDS1_CH0_TX_ARES 11 +#define NSS_CC_MAC1_RX_ARES 12 +#define NSS_CC_MAC1_GEPHY0_RX_ARES 13 +#define NSS_CC_MAC1_SRDS1_CH0_XGMII_TX_ARES 14 +#define NSS_CC_MAC2_SRDS1_CH1_RX_ARES 15 +#define NSS_CC_MAC2_TX_ARES 16 +#define NSS_CC_MAC2_GEPHY1_TX_ARES 17 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_RX_ARES 18 +#define NSS_CC_MAC2_SRDS1_CH1_TX_ARES 19 +#define NSS_CC_MAC2_RX_ARES 20 +#define NSS_CC_MAC2_GEPHY1_RX_ARES 21 +#define NSS_CC_MAC2_SRDS1_CH1_XGMII_TX_ARES 22 +#define NSS_CC_MAC3_SRDS1_CH2_RX_ARES 23 +#define NSS_CC_MAC3_TX_ARES 24 +#define NSS_CC_MAC3_GEPHY2_TX_ARES 25 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_RX_ARES 26 +#define NSS_CC_MAC3_SRDS1_CH2_TX_ARES 27 +#define NSS_CC_MAC3_RX_ARES 28 +#define NSS_CC_MAC3_GEPHY2_RX_ARES 29 +#define NSS_CC_MAC3_SRDS1_CH2_XGMII_TX_ARES 30 +#define NSS_CC_MAC4_SRDS1_CH3_RX_ARES 31 +#define NSS_CC_MAC4_TX_ARES 32 +#define NSS_CC_MAC4_GEPHY3_TX_ARES 33 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_RX_ARES 34 +#define NSS_CC_MAC4_SRDS1_CH3_TX_ARES 35 +#define NSS_CC_MAC4_RX_ARES 36 +#define NSS_CC_MAC4_GEPHY3_RX_ARES 37 +#define NSS_CC_MAC4_SRDS1_CH3_XGMII_TX_ARES 38 +#define NSS_CC_MAC5_TX_ARES 39 +#define NSS_CC_MAC5_TX_SRDS0_ARES 40 +#define NSS_CC_MAC5_RX_ARES 41 +#define NSS_CC_MAC5_RX_SRDS0_ARES 42 +#define NSS_CC_AHB_ARES 43 +#define NSS_CC_SEC_CTRL_AHB_ARES 44 +#define NSS_CC_TLMM_ARES 45 +#define NSS_CC_TLMM_AHB_ARES 46 +#define NSS_CC_CNOC_AHB_ARES 47 +#define NSS_CC_MDIO_AHB_ARES 48 +#define NSS_CC_MDIO_MASTER_AHB_ARES 49 +#define NSS_CC_SRDS0_SYS_ARES 50 +#define NSS_CC_SRDS1_SYS_ARES 51 +#define NSS_CC_GEPHY0_SYS_ARES 52 +#define NSS_CC_GEPHY1_SYS_ARES 53 +#define NSS_CC_GEPHY2_SYS_ARES 54 +#define NSS_CC_GEPHY3_SYS_ARES 55 +#define NSS_CC_SEC_CTRL_ARES 56 +#define NSS_CC_SEC_CTRL_SENSE_ARES 57 +#define NSS_CC_SLEEP_ARES 58 +#define NSS_CC_DEBUG_ARES 59 +#define NSS_CC_GEPHY0_ARES 60 +#define NSS_CC_GEPHY1_ARES 61 +#define NSS_CC_GEPHY2_ARES 62 +#define NSS_CC_GEPHY3_ARES 63 +#define NSS_CC_DSP_ARES 64 +#define NSS_CC_GEPHY_FULL_ARES 65 +#define NSS_CC_GLOBAL_ARES 66 +#define NSS_CC_XPCS_ARES 67 +#endif -- cgit v1.2.3 From 525b42832bd333e3e7ccb0efceb41b47347beab5 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Thu, 6 Jun 2024 13:36:00 +0200 Subject: dt-bindings: clock: Add Qcom QCM2290 GPUCC Add device tree bindings for graphics clock controller for Qualcomm Technology Inc's QCM2290 SoCs. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Konrad Dybcio Link: https://lore.kernel.org/r/20240606-topic-rb1_gpu-v4-1-4bc0c19da4af@linaro.org Signed-off-by: Bjorn Andersson --- .../bindings/clock/qcom,qcm2290-gpucc.yaml | 77 ++++++++++++++++++++++ include/dt-bindings/clock/qcom,qcm2290-gpucc.h | 32 +++++++++ 2 files changed, 109 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml create mode 100644 include/dt-bindings/clock/qcom,qcm2290-gpucc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml new file mode 100644 index 000000000000..734880805c1b --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,qcm2290-gpucc.yaml @@ -0,0 +1,77 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,qcm2290-gpucc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Graphics Clock & Reset Controller on QCM2290 + +maintainers: + - Konrad Dybcio + +description: | + Qualcomm graphics clock control module provides the clocks, resets and power + domains on Qualcomm SoCs. + + See also:: + include/dt-bindings/clock/qcom,qcm2290-gpucc.h + +properties: + compatible: + const: qcom,qcm2290-gpucc + + reg: + maxItems: 1 + + clocks: + items: + - description: AHB interface clock, + - description: SoC CXO clock + - description: GPLL0 main branch source + - description: GPLL0 div branch source + + power-domains: + description: + A phandle and PM domain specifier for the CX power domain. + maxItems: 1 + + required-opps: + description: + A phandle to an OPP node describing required CX performance point. + maxItems: 1 + +required: + - compatible + - clocks + - power-domains + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + soc { + #address-cells = <2>; + #size-cells = <2>; + + clock-controller@5990000 { + compatible = "qcom,qcm2290-gpucc"; + reg = <0x0 0x05990000 0x0 0x9000>; + clocks = <&gcc GCC_GPU_CFG_AHB_CLK>, + <&rpmcc RPM_SMD_XO_CLK_SRC>, + <&gcc GCC_GPU_GPLL0_CLK_SRC>, + <&gcc GCC_GPU_GPLL0_DIV_CLK_SRC>; + power-domains = <&rpmpd QCM2290_VDDCX>; + required-opps = <&rpmpd_opp_low_svs>; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; + }; +... diff --git a/include/dt-bindings/clock/qcom,qcm2290-gpucc.h b/include/dt-bindings/clock/qcom,qcm2290-gpucc.h new file mode 100644 index 000000000000..7c76dd05278f --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcm2290-gpucc.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2018-2019, The Linux Foundation. All rights reserved. + * Copyright (c) 2024, Linaro Limited + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GPU_CC_QCM2290_H +#define _DT_BINDINGS_CLK_QCOM_GPU_CC_QCM2290_H + +/* GPU_CC clocks */ +#define GPU_CC_AHB_CLK 0 +#define GPU_CC_CRC_AHB_CLK 1 +#define GPU_CC_CX_GFX3D_CLK 2 +#define GPU_CC_CX_GMU_CLK 3 +#define GPU_CC_CX_SNOC_DVM_CLK 4 +#define GPU_CC_CXO_AON_CLK 5 +#define GPU_CC_CXO_CLK 6 +#define GPU_CC_GMU_CLK_SRC 7 +#define GPU_CC_GX_GFX3D_CLK 8 +#define GPU_CC_GX_GFX3D_CLK_SRC 9 +#define GPU_CC_PLL0 10 +#define GPU_CC_SLEEP_CLK 11 +#define GPU_CC_HLOS1_VOTE_GPU_SMMU_CLK 12 + +/* Resets */ +#define GPU_GX_BCR 0 + +/* GDSCs */ +#define GPU_CX_GDSC 0 +#define GPU_GX_GDSC 1 + +#endif -- cgit v1.2.3 From b5c29fba72a6c950655d1cb0f6aa16b60dc83be7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 28 May 2024 12:54:58 +0800 Subject: iommu: Make iommu_sva_domain_alloc() static iommu_sva_domain_alloc() is only called in iommu-sva.c, hence make it static. On the other hand, iommu_sva_domain_alloc() should not return NULL anymore after commit <80af5a452024> ("iommu: Add ops->domain_alloc_sva()"), the removal of inline code avoids potential confusion. Fixes: 80af5a452024 ("iommu: Add ops->domain_alloc_sva()") Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20240528045458.81458-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-sva.c | 6 ++++-- include/linux/iommu.h | 8 -------- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 18a35e798b72..25e581299226 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,6 +10,8 @@ #include "iommu-priv.h" static DEFINE_MUTEX(iommu_sva_lock); +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm); /* Allocate a PASID for the mm within range (inclusive) */ static struct iommu_mm_data *iommu_alloc_mm_data(struct mm_struct *mm, struct device *dev) @@ -277,8 +279,8 @@ static int iommu_sva_iopf_handler(struct iopf_group *group) return 0; } -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm) +static struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) { const struct iommu_ops *ops = dev_iommu_ops(dev); struct iommu_domain *domain; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 17b3f36ad843..1cd19b903354 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1527,8 +1527,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm); void iommu_sva_unbind_device(struct iommu_sva *handle); u32 iommu_sva_get_pasid(struct iommu_sva *handle); -struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, - struct mm_struct *mm); #else static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) @@ -1553,12 +1551,6 @@ static inline u32 mm_get_enqcmd_pasid(struct mm_struct *mm) } static inline void mm_pasid_drop(struct mm_struct *mm) {} - -static inline struct iommu_domain * -iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm) -{ - return NULL; -} #endif /* CONFIG_IOMMU_SVA */ #ifdef CONFIG_IOMMU_IOPF -- cgit v1.2.3 From 0e6b6dedf16800df0ff73ffe2bb5066514db29c2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 12 Jun 2024 16:15:55 +0200 Subject: ACPI: EC: Evaluate orphan _REG under EC device After starting to install the EC address space handler at the ACPI namespace root, if there is an "orphan" _REG method in the EC device's scope, it will not be evaluated any more. This breaks EC operation regions on some systems, like Asus gu605. To address this, use a wrapper around an existing ACPICA function to look for an "orphan" _REG method in the EC device scope and evaluate it if present. Fixes: 60fa6ae6e6d0 ("ACPI: EC: Install address space handler at the namespace root") Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218945 Reported-by: VitaliiT Tested-by: VitaliiT Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpica/acevents.h | 4 ++++ drivers/acpi/acpica/evregion.c | 6 +---- drivers/acpi/acpica/evxfregn.c | 54 ++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/ec.c | 3 +++ include/acpi/acpixf.h | 4 ++++ 5 files changed, 66 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index ddd072cbc738..2133085deda7 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -191,6 +191,10 @@ void acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, acpi_adr_space_type space_id, u32 function); +void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, + acpi_adr_space_type space_id); + acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index 18fdf2bc2d49..dc6004daf624 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,10 +20,6 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ -static void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, - acpi_adr_space_type space_id); - static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -818,7 +814,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -static void +void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 3197e6303c5b..624361a5f34d 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -306,3 +306,57 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) + +/******************************************************************************* + * + * FUNCTION: acpi_execute_orphan_reg_method + * + * PARAMETERS: device - Handle for the device + * space_id - The address space ID + * + * RETURN: Status + * + * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI + * device. This is a _REG method that has no corresponding region + * within the device's scope. + * + ******************************************************************************/ +acpi_status +acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) +{ + struct acpi_namespace_node *node; + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); + + /* Parameter validation */ + + if (!device) { + return_ACPI_STATUS(AE_BAD_PARAMETER); + } + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Convert and validate the device handle */ + + node = acpi_ns_validate_handle(device); + if (node) { + + /* + * If an "orphan" _REG method is present in the device's scope + * for the given address space ID, run it. + */ + + acpi_ev_execute_orphan_reg_method(node, space_id); + } else { + status = AE_BAD_PARAMETER; + } + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + return_ACPI_STATUS(status); +} + +ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 68dd17f96f63..299ec653388c 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1507,6 +1507,9 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); + if (scope_handle != ec->handle) + acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); + set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 94d0fc3bd412..80dc36f9d527 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -662,6 +662,10 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)) +ACPI_EXTERNAL_RETURN_STATUS(acpi_status + acpi_execute_orphan_reg_method(acpi_handle device, + acpi_adr_space_type + space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, -- cgit v1.2.3 From 1a8009e108382848d149a24dd3fc67607d054a05 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 10 Jun 2024 10:57:30 +0200 Subject: dt-bindings: interconnect: Add MediaTek EMI Interconnect bindings Add bindings for the MediaTek External Memory Interface Interconnect, which providers support system bandwidth requirements through Dynamic Voltage Frequency Scaling Resource Collector (DVFSRC) hardware. This adds bindings for MediaTek MT8183 and MT8195 SoCs. Note that this is modeled as a subnode of DVFSRC for multiple reasons: - Some SoCs have more than one interconnect on the DVFSRC (and two different kinds of EMI interconnect, and also a SMI interconnect); - Some boards will want to not enable the interconnect driver because some of those are not battery powered (so they just keep the knobs at full thrust from the bootloader and never care scaling busses); - Some DVFSRC interconnect features may depend on firmware. Reviewed-by: Rob Herring Signed-off-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20240610085735.147134-3-angelogioacchino.delregno@collabora.com Signed-off-by: Georgi Djakov --- .../bindings/interconnect/mediatek,mt8183-emi.yaml | 51 ++++++++++++++++++++++ include/dt-bindings/interconnect/mediatek,mt8183.h | 23 ++++++++++ include/dt-bindings/interconnect/mediatek,mt8195.h | 44 +++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/mediatek,mt8183-emi.yaml create mode 100644 include/dt-bindings/interconnect/mediatek,mt8183.h create mode 100644 include/dt-bindings/interconnect/mediatek,mt8195.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/interconnect/mediatek,mt8183-emi.yaml b/Documentation/devicetree/bindings/interconnect/mediatek,mt8183-emi.yaml new file mode 100644 index 000000000000..017c8478b2a7 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/mediatek,mt8183-emi.yaml @@ -0,0 +1,51 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/mediatek,mt8183-emi.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek External Memory Interface (EMI) Interconnect + +maintainers: + - AngeloGioacchino Del Regno + +description: | + EMI interconnect providers support system bandwidth requirements through + Dynamic Voltage Frequency Scaling Resource Collector (DVFSRC) hardware. + The provider is able to communicate with the DVFSRC through Secure Monitor + Call (SMC). + + ICC provider ICC Nodes + ---- ---- + _________ |CPU | |--- |VPU | + _____ | |----- ---- | ---- + | |->| DRAM | ---- | ---- + |DRAM |->|scheduler|----- |GPU | |--- |DISP| + | |->| (EMI) | ---- | ---- + |_____|->|_________|---. ----- | ---- + /|\ `-|MMSYS|--|--- |VDEC| + | ----- | ---- + | | ---- + | change DRAM freq |--- |VENC| + -------- | ---- + SMC --> | DVFSRC | | ---- + -------- |--- |IMG | + | ---- + | ---- + |--- |CAM | + ---- + +properties: + compatible: + enum: + - mediatek,mt8183-emi + - mediatek,mt8195-emi + + '#interconnect-cells': + const: 1 + +required: + - compatible + - '#interconnect-cells' + +unevaluatedProperties: false diff --git a/include/dt-bindings/interconnect/mediatek,mt8183.h b/include/dt-bindings/interconnect/mediatek,mt8183.h new file mode 100644 index 000000000000..1088c350258d --- /dev/null +++ b/include/dt-bindings/interconnect/mediatek,mt8183.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2021 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8183_H +#define __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8183_H + +#define SLAVE_DDR_EMI 0 +#define MASTER_MCUSYS 1 +#define MASTER_MFG 2 +#define MASTER_MMSYS 3 +#define MASTER_MM_VPU 4 +#define MASTER_MM_DISP 5 +#define MASTER_MM_VDEC 6 +#define MASTER_MM_VENC 7 +#define MASTER_MM_CAM 8 +#define MASTER_MM_IMG 9 +#define MASTER_MM_MDP 10 + +#endif diff --git a/include/dt-bindings/interconnect/mediatek,mt8195.h b/include/dt-bindings/interconnect/mediatek,mt8195.h new file mode 100644 index 000000000000..33e0e6cde732 --- /dev/null +++ b/include/dt-bindings/interconnect/mediatek,mt8195.h @@ -0,0 +1,44 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2020 MediaTek Inc. + * Copyright (c) 2024 Collabora Ltd. + * AngeloGioacchino Del Regno + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8195_H +#define __DT_BINDINGS_INTERCONNECT_MEDIATEK_MT8195_H + +#define SLAVE_DDR_EMI 0 +#define MASTER_MCUSYS 1 +#define MASTER_GPUSYS 2 +#define MASTER_MMSYS 3 +#define MASTER_MM_VPU 4 +#define MASTER_MM_DISP 5 +#define MASTER_MM_VDEC 6 +#define MASTER_MM_VENC 7 +#define MASTER_MM_CAM 8 +#define MASTER_MM_IMG 9 +#define MASTER_MM_MDP 10 +#define MASTER_VPUSYS 11 +#define MASTER_VPU_0 12 +#define MASTER_VPU_1 13 +#define MASTER_MDLASYS 14 +#define MASTER_MDLA_0 15 +#define MASTER_UFS 16 +#define MASTER_PCIE_0 17 +#define MASTER_PCIE_1 18 +#define MASTER_USB 19 +#define MASTER_DBGIF 20 +#define SLAVE_HRT_DDR_EMI 21 +#define MASTER_HRT_MMSYS 22 +#define MASTER_HRT_MM_DISP 23 +#define MASTER_HRT_MM_VDEC 24 +#define MASTER_HRT_MM_VENC 25 +#define MASTER_HRT_MM_CAM 26 +#define MASTER_HRT_MM_IMG 27 +#define MASTER_HRT_MM_MDP 28 +#define MASTER_HRT_DBGIF 29 +#define MASTER_WIFI 30 +#define MASTER_BT 31 +#define MASTER_NETSYS 32 +#endif -- cgit v1.2.3 From 163f10b2935362f0e8ef8d7fadd0b5aa33e9130f Mon Sep 17 00:00:00 2001 From: Pierre-Louis Bossart Date: Wed, 12 Jun 2024 08:47:07 +0200 Subject: PCI: Add INTEL_HDA_PTL to pci_ids.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More PCI ids for Intel audio. Signed-off-by: Pierre-Louis Bossart Reviewed-by: Ranjani Sridharan Reviewed-by: Bard Liao Reviewed-by: Péter Ujfalusi Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20240612064709.51141-2-pierre-louis.bossart@linux.intel.com Signed-off-by: Takashi Iwai --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 942a587bb97e..0168c6a60148 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3112,6 +3112,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_LNL_P 0xa828 #define PCI_DEVICE_ID_INTEL_S21152BB 0xb152 #define PCI_DEVICE_ID_INTEL_HDA_BMG 0xe2f7 +#define PCI_DEVICE_ID_INTEL_HDA_PTL 0xe428 #define PCI_DEVICE_ID_INTEL_HDA_CML_R 0xf0c8 #define PCI_DEVICE_ID_INTEL_HDA_RKL_S 0xf1c8 -- cgit v1.2.3 From a3cfe84cca28f205761a0450016593b0d728165e Mon Sep 17 00:00:00 2001 From: Vadim Fedorenko Date: Thu, 6 Jun 2024 07:58:50 -0700 Subject: bpf: Add CHECKSUM_COMPLETE to bpf test progs Add special flag to validate that TC BPF program properly updates checksum information in skb. Signed-off-by: Vadim Fedorenko Signed-off-by: Daniel Borkmann Reviewed-by: Jakub Kicinski Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20240606145851.229116-1-vadfed@meta.com --- include/uapi/linux/bpf.h | 2 ++ net/bpf/test_run.c | 28 +++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 36ae54f57bf5..3c965e32fc33 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -983,7 +983,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, void *data; int ret; - if (kattr->test.flags || kattr->test.cpu || kattr->test.batch_size) + if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) || + kattr->test.cpu || kattr->test.batch_size) return -EINVAL; data = bpf_test_init(kattr, kattr->test.data_size_in, @@ -1031,6 +1032,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); + if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); if (!dev) { @@ -1066,9 +1068,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, __skb_push(skb, hh_len); if (is_direct_pkt_access) bpf_compute_data_pointers(skb); + ret = convert___skb_to_skb(skb, ctx); if (ret) goto out; + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + + skb->csum = skb_checksum(skb, off, len, 0); + skb->ip_summed = CHECKSUM_COMPLETE; + } + ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); if (ret) goto out; @@ -1083,6 +1095,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } memset(__skb_push(skb, hh_len), 0, hh_len); } + + if (kattr->test.flags & BPF_F_TEST_SKB_CHECKSUM_COMPLETE) { + const int off = skb_network_offset(skb); + int len = skb->len - off; + __wsum csum; + + csum = skb_checksum(skb, off, len, 0); + + if (csum_fold(skb->csum) != csum_fold(csum)) { + ret = -EBADMSG; + goto out; + } + } + convert_skb_to___skb(skb, ctx); size = skb->len; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 25ea393cf084..35bcf52dbc65 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1425,6 +1425,8 @@ enum { #define BPF_F_TEST_RUN_ON_CPU (1U << 0) /* If set, XDP frames will be transmitted after processing */ #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1) +/* If set, apply CHECKSUM_COMPLETE to skb and validate the checksum */ +#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2) /* type for BPF_ENABLE_STATS */ enum bpf_stats_type { -- cgit v1.2.3 From 2755d1f46aa25f65179964bf315d8a16b3540eab Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 10 Jun 2024 13:12:00 +0200 Subject: drm/connector: hdmi: Fix kerneldoc warnings It looks like the documentation for the HDMI-related fields recently added to both the drm_connector and drm_connector_state structures trigger some warnings because of their use of anonymous structures: $ scripts/kernel-doc -none include/drm/drm_connector.h include/drm/drm_connector.h:1138: warning: Excess struct member 'broadcast_rgb' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'infoframes' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'avi' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'hdr_drm' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'spd' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'vendor' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'is_limited_range' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'output_bpc' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'output_format' description in 'drm_connector_state' include/drm/drm_connector.h:1138: warning: Excess struct member 'tmds_char_rate' description in 'drm_connector_state' include/drm/drm_connector.h:2112: warning: Excess struct member 'vendor' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'product' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'supported_formats' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'infoframes' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'lock' description in 'drm_connector' include/drm/drm_connector.h:2112: warning: Excess struct member 'audio' description in 'drm_connector' Create some intermediate structures instead of anonymous ones to silence the warnings. Reported-by: Jani Nikula Suggested-by: Jani Nikula Fixes: 54cb39e2293b ("drm/connector: hdmi: Create an HDMI sub-state") Fixes: 948f01d5e559 ("drm/connector: hdmi: Add support for output format") Reviewed-by: Dmitry Baryshkov Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240610111200.428224-1-mripard@kernel.org --- include/drm/drm_connector.h | 206 +++++++++++++++++++++++--------------------- 1 file changed, 108 insertions(+), 98 deletions(-) (limited to 'include') diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e04a8a0d1bbd..f750765d8fbc 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -929,6 +929,67 @@ struct drm_connector_hdmi_infoframe { bool set; }; +/* + * struct drm_connector_hdmi_state - HDMI state container + */ +struct drm_connector_hdmi_state { + /** + * @broadcast_rgb: Connector property to pass the + * Broadcast RGB selection value. + */ + enum drm_hdmi_broadcast_rgb broadcast_rgb; + + /** + * @infoframes: HDMI Infoframes matching that state + */ + struct { + /** + * @avi: AVI Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe avi; + + /** + * @hdr_drm: DRM (Dynamic Range and Mastering) + * Infoframes structure matching our state. + */ + struct drm_connector_hdmi_infoframe hdr_drm; + + /** + * @spd: SPD Infoframes structure matching our + * state. + */ + struct drm_connector_hdmi_infoframe spd; + + /** + * @vendor: HDMI Vendor Infoframes structure + * matching our state. + */ + struct drm_connector_hdmi_infoframe hdmi; + } infoframes; + + /** + * @is_limited_range: Is the output supposed to use a limited + * RGB Quantization Range or not? + */ + bool is_limited_range; + + /** + * @output_bpc: Bits per color channel to output. + */ + unsigned int output_bpc; + + /** + * @output_format: Pixel format to output in. + */ + enum hdmi_colorspace output_format; + + /** + * @tmds_char_rate: TMDS Character Rate, in Hz. + */ + unsigned long long tmds_char_rate; +}; + /** * struct drm_connector_state - mutable connector state */ @@ -1078,63 +1139,7 @@ struct drm_connector_state { * @hdmi: HDMI-related variable and properties. Filled by * @drm_atomic_helper_connector_hdmi_check(). */ - struct { - /** - * @broadcast_rgb: Connector property to pass the - * Broadcast RGB selection value. - */ - enum drm_hdmi_broadcast_rgb broadcast_rgb; - - /** - * @infoframes: HDMI Infoframes matching that state - */ - struct { - /** - * @avi: AVI Infoframes structure matching our - * state. - */ - struct drm_connector_hdmi_infoframe avi; - - /** - * @hdr_drm: DRM (Dynamic Range and Mastering) - * Infoframes structure matching our state. - */ - struct drm_connector_hdmi_infoframe hdr_drm; - - /** - * @spd: SPD Infoframes structure matching our - * state. - */ - struct drm_connector_hdmi_infoframe spd; - - /** - * @vendor: HDMI Vendor Infoframes structure - * matching our state. - */ - struct drm_connector_hdmi_infoframe hdmi; - } infoframes; - - /** - * @is_limited_range: Is the output supposed to use a limited - * RGB Quantization Range or not? - */ - bool is_limited_range; - - /** - * @output_bpc: Bits per color channel to output. - */ - unsigned int output_bpc; - - /** - * @output_format: Pixel format to output in. - */ - enum hdmi_colorspace output_format; - - /** - * @tmds_char_rate: TMDS Character Rate, in Hz. - */ - unsigned long long tmds_char_rate; - } hdmi; + struct drm_connector_hdmi_state hdmi; }; /** @@ -1656,6 +1661,51 @@ struct drm_cmdline_mode { bool tv_mode_specified; }; +/* + * struct drm_connector_hdmi - DRM Connector HDMI-related structure + */ +struct drm_connector_hdmi { +#define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 + /** + * @vendor: HDMI Controller Vendor Name + */ + unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; + +#define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 + /** + * @product: HDMI Controller Product Name + */ + unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; + + /** + * @supported_formats: Bitmask of @hdmi_colorspace + * supported by the controller. + */ + unsigned long supported_formats; + + /** + * @funcs: HDMI connector Control Functions + */ + const struct drm_connector_hdmi_funcs *funcs; + + /** + * @infoframes: Current Infoframes output by the connector + */ + struct { + /** + * @lock: Mutex protecting against concurrent access to + * the infoframes, most notably between KMS and ALSA. + */ + struct mutex lock; + + /** + * @audio: Current Audio Infoframes structure. Protected + * by @lock. + */ + struct drm_connector_hdmi_infoframe audio; + } infoframes; +}; + /** * struct drm_connector - central DRM connector control structure * @@ -2068,47 +2118,7 @@ struct drm_connector { /** * @hdmi: HDMI-related variable and properties. */ - struct { -#define DRM_CONNECTOR_HDMI_VENDOR_LEN 8 - /** - * @vendor: HDMI Controller Vendor Name - */ - unsigned char vendor[DRM_CONNECTOR_HDMI_VENDOR_LEN] __nonstring; - -#define DRM_CONNECTOR_HDMI_PRODUCT_LEN 16 - /** - * @product: HDMI Controller Product Name - */ - unsigned char product[DRM_CONNECTOR_HDMI_PRODUCT_LEN] __nonstring; - - /** - * @supported_formats: Bitmask of @hdmi_colorspace - * supported by the controller. - */ - unsigned long supported_formats; - - /** - * @funcs: HDMI connector Control Functions - */ - const struct drm_connector_hdmi_funcs *funcs; - - /** - * @infoframes: Current Infoframes output by the connector - */ - struct { - /** - * @lock: Mutex protecting against concurrent access to - * the infoframes, most notably between KMS and ALSA. - */ - struct mutex lock; - - /** - * @audio: Current Audio Infoframes structure. Protected - * by @lock. - */ - struct drm_connector_hdmi_infoframe audio; - } infoframes; - } hdmi; + struct drm_connector_hdmi hdmi; }; #define obj_to_connector(x) container_of(x, struct drm_connector, base) -- cgit v1.2.3 From ff985c759778986f55cbc557055fbeb84ee833eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 11 Jun 2024 15:01:04 +0200 Subject: auxbus: make to_auxiliary_drv accept and return a constant pointer In the quest to make struct device constant, start by making to_auxiliary_drv() return a constant pointer so that drivers that call this can be fixed up before the driver core changes. As the return type previously was not constant, also fix up all callers that were assuming that the pointer was not going to be a constant one in order to not break the build. Cc: Dave Ertman Cc: Ira Weiny Cc: Rafael J. Wysocki Cc: Bingbu Cao Cc: Tianshu Qiu Cc: Mauro Carvalho Chehab Cc: Michael Chan Cc: David S. Miller Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Jesse Brandeburg Cc: Tony Nguyen Cc: Saeed Mahameed Cc: Leon Romanovsky Cc: Tariq Toukan Cc: Pierre-Louis Bossart Cc: Liam Girdwood Cc: Peter Ujfalusi Cc: Bard Liao Cc: Ranjani Sridharan Cc: Daniel Baluta Cc: Kai Vehmanen Cc: Jaroslav Kysela Cc: Takashi Iwai Cc: Richard Cochran Cc: linux-media@vger.kernel.org Cc: netdev@vger.kernel.org Cc: intel-wired-lan@lists.osuosl.org Cc: linux-rdma@vger.kernel.org Cc: sound-open-firmware@alsa-project.org Cc: linux-sound@vger.kernel.org Acked-by: Sakari Ailus # drivers/media/pci/intel/ipu6 Acked-by: Mark Brown Reviewed-by: Martin Habets Link: https://lore.kernel.org/r/20240611130103.3262749-7-gregkh@linuxfoundation.org Signed-off-by: Greg Kroah-Hartman --- drivers/base/auxiliary.c | 8 ++++---- drivers/media/pci/intel/ipu6/ipu6-bus.h | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_ptp.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 4 ++-- include/linux/auxiliary_bus.h | 2 +- sound/soc/sof/sof-client.c | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index d3a2c40c2f12..5832e31bb77b 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -180,7 +180,7 @@ static const struct auxiliary_device_id *auxiliary_match_id(const struct auxilia static int auxiliary_match(struct device *dev, struct device_driver *drv) { struct auxiliary_device *auxdev = to_auxiliary_dev(dev); - struct auxiliary_driver *auxdrv = to_auxiliary_drv(drv); + const struct auxiliary_driver *auxdrv = to_auxiliary_drv(drv); return !!auxiliary_match_id(auxdrv->id_table, auxdev); } @@ -203,7 +203,7 @@ static const struct dev_pm_ops auxiliary_dev_pm_ops = { static int auxiliary_bus_probe(struct device *dev) { - struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver); + const struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver); struct auxiliary_device *auxdev = to_auxiliary_dev(dev); int ret; @@ -222,7 +222,7 @@ static int auxiliary_bus_probe(struct device *dev) static void auxiliary_bus_remove(struct device *dev) { - struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver); + const struct auxiliary_driver *auxdrv = to_auxiliary_drv(dev->driver); struct auxiliary_device *auxdev = to_auxiliary_dev(dev); if (auxdrv->remove) @@ -232,7 +232,7 @@ static void auxiliary_bus_remove(struct device *dev) static void auxiliary_bus_shutdown(struct device *dev) { - struct auxiliary_driver *auxdrv = NULL; + const struct auxiliary_driver *auxdrv = NULL; struct auxiliary_device *auxdev; if (dev->driver) { diff --git a/drivers/media/pci/intel/ipu6/ipu6-bus.h b/drivers/media/pci/intel/ipu6/ipu6-bus.h index b26c6aee1621..bb4926dfdf08 100644 --- a/drivers/media/pci/intel/ipu6/ipu6-bus.h +++ b/drivers/media/pci/intel/ipu6/ipu6-bus.h @@ -21,7 +21,7 @@ struct ipu6_buttress_ctrl; struct ipu6_bus_device { struct auxiliary_device auxdev; - struct auxiliary_driver *auxdrv; + const struct auxiliary_driver *auxdrv; const struct ipu6_auxdrv_data *auxdrv_data; struct list_head list; void *pdata; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index ba3fa1c2e5d9..b9e7d3e7b15d 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -239,7 +239,7 @@ void bnxt_ulp_stop(struct bnxt *bp) adev = &aux_priv->aux_dev; if (adev->dev.driver) { - struct auxiliary_driver *adrv; + const struct auxiliary_driver *adrv; pm_message_t pm = {}; adrv = to_auxiliary_drv(adev->dev.driver); @@ -277,7 +277,7 @@ void bnxt_ulp_start(struct bnxt *bp, int err) adev = &aux_priv->aux_dev; if (adev->dev.driver) { - struct auxiliary_driver *adrv; + const struct auxiliary_driver *adrv; adrv = to_auxiliary_drv(adev->dev.driver); edev->en_state = bp->state; diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 0f17fc1181d2..7341e7c4ef24 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2784,7 +2784,7 @@ static struct ice_pf * ice_ptp_aux_dev_to_owner_pf(struct auxiliary_device *aux_dev) { struct ice_ptp_port_owner *ports_owner; - struct auxiliary_driver *aux_drv; + const struct auxiliary_driver *aux_drv; struct ice_ptp *owner_ptp; if (!aux_dev->dev.driver) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 47e7c2639774..9a79674d27f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -349,7 +349,7 @@ int mlx5_attach_device(struct mlx5_core_dev *dev) { struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; - struct auxiliary_driver *adrv; + const struct auxiliary_driver *adrv; int ret = 0, i; devl_assert_locked(priv_to_devlink(dev)); @@ -406,7 +406,7 @@ void mlx5_detach_device(struct mlx5_core_dev *dev, bool suspend) { struct mlx5_priv *priv = &dev->priv; struct auxiliary_device *adev; - struct auxiliary_driver *adrv; + const struct auxiliary_driver *adrv; pm_message_t pm = {}; int i; diff --git a/include/linux/auxiliary_bus.h b/include/linux/auxiliary_bus.h index de21d9d24a95..bdff7b85f2ae 100644 --- a/include/linux/auxiliary_bus.h +++ b/include/linux/auxiliary_bus.h @@ -203,7 +203,7 @@ static inline struct auxiliary_device *to_auxiliary_dev(struct device *dev) return container_of(dev, struct auxiliary_device, dev); } -static inline struct auxiliary_driver *to_auxiliary_drv(struct device_driver *drv) +static inline const struct auxiliary_driver *to_auxiliary_drv(const struct device_driver *drv) { return container_of(drv, struct auxiliary_driver, driver); } diff --git a/sound/soc/sof/sof-client.c b/sound/soc/sof/sof-client.c index 99f74def4ab6..5d6005a88e79 100644 --- a/sound/soc/sof/sof-client.c +++ b/sound/soc/sof/sof-client.c @@ -357,7 +357,7 @@ EXPORT_SYMBOL_NS_GPL(sof_client_ipc4_find_module, SND_SOC_SOF_CLIENT); int sof_suspend_clients(struct snd_sof_dev *sdev, pm_message_t state) { - struct auxiliary_driver *adrv; + const struct auxiliary_driver *adrv; struct sof_client_dev *cdev; mutex_lock(&sdev->ipc_client_mutex); @@ -380,7 +380,7 @@ EXPORT_SYMBOL_NS_GPL(sof_suspend_clients, SND_SOC_SOF_CLIENT); int sof_resume_clients(struct snd_sof_dev *sdev) { - struct auxiliary_driver *adrv; + const struct auxiliary_driver *adrv; struct sof_client_dev *cdev; mutex_lock(&sdev->ipc_client_mutex); -- cgit v1.2.3 From 288b550463cf5dd21ad34f736b8c5ccb7ff69ceb Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Sat, 8 Jun 2024 17:55:24 +0200 Subject: mfd: pm8008: Rework to match new DT binding Rework the pm8008 driver to match the new devicetree binding which no longer describes internal details like interrupts and register offsets (including which of the two consecutive I2C addresses the registers belong to). Instead make the interrupt controller implementation internal and pass interrupts to the subdrivers using MFD cell resources. Note that subdrivers may either get their resources, like register block offsets, from the parent MFD or this can be included in the subdrivers directly. In the current implementation, the temperature alarm driver is generic enough to just get its base address and alarm interrupt from the parent driver, which already uses this information to implement the interrupt controller. The regulator driver, however, needs additional information like parent supplies and regulator characteristics so in that case it is easier to just augment its table with the regulator register base addresses. Similarly, the current GPIO driver already holds the number of pins and that lookup table can therefore also be extended with register offsets. Note that subdrivers can now access the two regmaps by name, even if the primary regmap is registered last so that it is returned by default when no name is provided in lookups. Finally, note that the temperature alarm and GPIO subdrivers need some minor rework before they can be used with non-SPMI devices like the PM8008. The temperature alarm MFD cell name specifically uses a "qpnp" rather than "spmi" prefix to prevent binding until the driver has been updated. Tested-by: Bryan O'Donoghue Signed-off-by: Johan Hovold Link: https://lore.kernel.org/r/20240608155526.12996-11-johan+linaro@kernel.org Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 1 + drivers/mfd/qcom-pm8008.c | 97 ++++++++++++++++++++++++++++++----- include/dt-bindings/mfd/qcom-pm8008.h | 19 ------- 3 files changed, 86 insertions(+), 31 deletions(-) delete mode 100644 include/dt-bindings/mfd/qcom-pm8008.h (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 266b4f54af60..6b220dfea0a4 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -2208,6 +2208,7 @@ config MFD_ACER_A500_EC config MFD_QCOM_PM8008 tristate "QCOM PM8008 Power Management IC" depends on I2C && OF + select MFD_CORE select REGMAP_I2C select REGMAP_IRQ help diff --git a/drivers/mfd/qcom-pm8008.c b/drivers/mfd/qcom-pm8008.c index 72199840231e..246b5fe9819d 100644 --- a/drivers/mfd/qcom-pm8008.c +++ b/drivers/mfd/qcom-pm8008.c @@ -7,8 +7,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -16,8 +18,6 @@ #include #include -#include - #define I2C_INTR_STATUS_BASE 0x0550 #define INT_RT_STS_OFFSET 0x10 #define INT_SET_TYPE_OFFSET 0x11 @@ -45,6 +45,16 @@ enum { #define PM8008_GPIO1_ADDR PM8008_PERIPH_2_BASE #define PM8008_GPIO2_ADDR PM8008_PERIPH_3_BASE +/* PM8008 IRQ numbers */ +#define PM8008_IRQ_MISC_UVLO 0 +#define PM8008_IRQ_MISC_OVLO 1 +#define PM8008_IRQ_MISC_OTST2 2 +#define PM8008_IRQ_MISC_OTST3 3 +#define PM8008_IRQ_MISC_LDO_OCP 4 +#define PM8008_IRQ_TEMP_ALARM 5 +#define PM8008_IRQ_GPIO1 6 +#define PM8008_IRQ_GPIO2 7 + enum { SET_TYPE_INDEX, POLARITY_HI_INDEX, @@ -148,21 +158,65 @@ static const struct regmap_irq_chip pm8008_irq_chip = { .get_irq_reg = pm8008_get_irq_reg, }; -static struct regmap_config qcom_mfd_regmap_cfg = { +static const struct regmap_config qcom_mfd_regmap_cfg = { + .name = "primary", + .reg_bits = 16, + .val_bits = 8, + .max_register = 0xffff, +}; + +static const struct regmap_config pm8008_regmap_cfg_2 = { + .name = "secondary", .reg_bits = 16, .val_bits = 8, .max_register = 0xffff, }; +static const struct resource pm8008_temp_res[] = { + DEFINE_RES_MEM(PM8008_TEMP_ALARM_ADDR, 0x100), + DEFINE_RES_IRQ(PM8008_IRQ_TEMP_ALARM), +}; + +static const struct mfd_cell pm8008_cells[] = { + MFD_CELL_NAME("pm8008-regulator"), + MFD_CELL_RES("qpnp-temp-alarm", pm8008_temp_res), + MFD_CELL_NAME("pm8008-gpio"), +}; + +static void devm_irq_domain_fwnode_release(void *data) +{ + struct fwnode_handle *fwnode = data; + + irq_domain_free_fwnode(fwnode); +} + static int pm8008_probe(struct i2c_client *client) { struct regmap_irq_chip_data *irq_data; + struct device *dev = &client->dev; + struct regmap *regmap, *regmap2; + struct fwnode_handle *fwnode; + struct i2c_client *dummy; struct gpio_desc *reset; - int rc; - struct device *dev; - struct regmap *regmap; + char *name; + int ret; + + dummy = devm_i2c_new_dummy_device(dev, client->adapter, client->addr + 1); + if (IS_ERR(dummy)) { + ret = PTR_ERR(dummy); + dev_err(dev, "failed to claim second address: %d\n", ret); + return ret; + } + + regmap2 = devm_regmap_init_i2c(dummy, &qcom_mfd_regmap_cfg); + if (IS_ERR(regmap2)) + return PTR_ERR(regmap2); - dev = &client->dev; + ret = regmap_attach_dev(dev, regmap2, &pm8008_regmap_cfg_2); + if (ret) + return ret; + + /* Default regmap must be attached last. */ regmap = devm_regmap_init_i2c(client, &qcom_mfd_regmap_cfg); if (IS_ERR(regmap)) return PTR_ERR(regmap); @@ -177,14 +231,33 @@ static int pm8008_probe(struct i2c_client *client) */ usleep_range(1000, 2000); - if (of_property_read_bool(dev->of_node, "interrupt-controller")) { - rc = devm_regmap_add_irq_chip(dev, regmap, client->irq, + name = devm_kasprintf(dev, GFP_KERNEL, "%pOF-internal", dev->of_node); + if (!name) + return -ENOMEM; + + name = strreplace(name, '/', ':'); + + fwnode = irq_domain_alloc_named_fwnode(name); + if (!fwnode) + return -ENOMEM; + + ret = devm_add_action_or_reset(dev, devm_irq_domain_fwnode_release, fwnode); + if (ret) + return ret; + + ret = devm_regmap_add_irq_chip_fwnode(dev, fwnode, regmap, client->irq, IRQF_SHARED, 0, &pm8008_irq_chip, &irq_data); - if (rc) - dev_err(dev, "failed to add IRQ chip: %d\n", rc); + if (ret) { + dev_err(dev, "failed to add IRQ chip: %d\n", ret); + return ret; } - return devm_of_platform_populate(dev); + /* Needed by GPIO driver. */ + dev_set_drvdata(dev, regmap_irq_get_domain(irq_data)); + + return devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, pm8008_cells, + ARRAY_SIZE(pm8008_cells), NULL, 0, + regmap_irq_get_domain(irq_data)); } static const struct of_device_id pm8008_match[] = { diff --git a/include/dt-bindings/mfd/qcom-pm8008.h b/include/dt-bindings/mfd/qcom-pm8008.h deleted file mode 100644 index eca9448df228..000000000000 --- a/include/dt-bindings/mfd/qcom-pm8008.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2021 The Linux Foundation. All rights reserved. - */ - -#ifndef __DT_BINDINGS_MFD_QCOM_PM8008_H -#define __DT_BINDINGS_MFD_QCOM_PM8008_H - -/* PM8008 IRQ numbers */ -#define PM8008_IRQ_MISC_UVLO 0 -#define PM8008_IRQ_MISC_OVLO 1 -#define PM8008_IRQ_MISC_OTST2 2 -#define PM8008_IRQ_MISC_OTST3 3 -#define PM8008_IRQ_MISC_LDO_OCP 4 -#define PM8008_IRQ_TEMP_ALARM 5 -#define PM8008_IRQ_GPIO1 6 -#define PM8008_IRQ_GPIO2 7 - -#endif -- cgit v1.2.3 From 92424801261d1564a0bb759da3cf3ccd69fdf5a2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 13 Jun 2024 13:53:08 +0200 Subject: bpf: Fix reg_set_min_max corruption of fake_reg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Juan reported that after doing some changes to buzzer [0] and implementing a new fuzzing strategy guided by coverage, they noticed the following in one of the probes: [...] 13: (79) r6 = *(u64 *)(r0 +0) ; R0=map_value(ks=4,vs=8) R6_w=scalar() 14: (b7) r0 = 0 ; R0_w=0 15: (b4) w0 = -1 ; R0_w=0xffffffff 16: (74) w0 >>= 1 ; R0_w=0x7fffffff 17: (5c) w6 &= w0 ; R0_w=0x7fffffff R6_w=scalar(smin=smin32=0,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff)) 18: (44) w6 |= 2 ; R6_w=scalar(smin=umin=smin32=umin32=2,smax=umax=umax32=0x7fffffff,var_off=(0x2; 0x7ffffffd)) 19: (56) if w6 != 0x7ffffffd goto pc+1 REG INVARIANTS VIOLATION (true_reg2): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0) REG INVARIANTS VIOLATION (false_reg1): range bounds violation u64=[0x7fffffff, 0x7ffffffd] s64=[0x7fffffff, 0x7ffffffd] u32=[0x7fffffff, 0x7ffffffd] s32=[0x7fffffff, 0x7ffffffd] var_off=(0x7fffffff, 0x0) REG INVARIANTS VIOLATION (false_reg2): const tnum out of sync with range bounds u64=[0x0, 0xffffffffffffffff] s64=[0x8000000000000000, 0x7fffffffffffffff] u32=[0x0, 0xffffffff] s32=[0x80000000, 0x7fffffff] var_off=(0x7fffffff, 0x0) 19: R6_w=0x7fffffff 20: (95) exit from 19 to 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 21: R0=0x7fffffff R6=scalar(smin=umin=smin32=umin32=2,smax=umax=smax32=umax32=0x7ffffffe,var_off=(0x2; 0x7ffffffd)) R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 21: (14) w6 -= 2147483632 ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=14,var_off=(0x2; 0xfffffffd)) 22: (76) if w6 s>= 0xe goto pc+1 ; R6_w=scalar(smin=umin=umin32=2,smax=umax=0xffffffff,smin32=0x80000012,smax32=13,var_off=(0x2; 0xfffffffd)) 23: (95) exit from 22 to 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 24: R0=0x7fffffff R6_w=14 R7=map_ptr(ks=4,vs=8) R9=ctx() R10=fp0 fp-24=map_ptr(ks=4,vs=8) fp-40=mmmmmmmm 24: (14) w6 -= 14 ; R6_w=0 [...] What can be seen here is a register invariant violation on line 19. After the binary-or in line 18, the verifier knows that bit 2 is set but knows nothing about the rest of the content which was loaded from a map value, meaning, range is [2,0x7fffffff] with var_off=(0x2; 0x7ffffffd). When in line 19 the verifier analyzes the branch, it splits the register states in reg_set_min_max() into the registers of the true branch (true_reg1, true_reg2) and the registers of the false branch (false_reg1, false_reg2). Since the test is w6 != 0x7ffffffd, the src_reg is a known constant. Internally, the verifier creates a "fake" register initialized as scalar to the value of 0x7ffffffd, and then passes it onto reg_set_min_max(). Now, for line 19, it is mathematically impossible to take the false branch of this program, yet the verifier analyzes it. It is impossible because the second bit of r6 will be set due to the prior or operation and the constant in the condition has that bit unset (hex(fd) == binary(1111 1101). When the verifier first analyzes the false / fall-through branch, it will compute an intersection between the var_off of r6 and of the constant. This is because the verifier creates a "fake" register initialized to the value of the constant. The intersection result later refines both registers in regs_refine_cond_op(): [...] t = tnum_intersect(tnum_subreg(reg1->var_off), tnum_subreg(reg2->var_off)); reg1->var_off = tnum_with_subreg(reg1->var_off, t); reg2->var_off = tnum_with_subreg(reg2->var_off, t); [...] Since the verifier is analyzing the false branch of the conditional jump, reg1 is equal to false_reg1 and reg2 is equal to false_reg2, i.e. the reg2 is the "fake" register that was meant to hold a constant value. The resulting var_off of the intersection says that both registers now hold a known value of var_off=(0x7fffffff, 0x0) or in other words: this operation manages to make the verifier think that the "constant" value that was passed in the jump operation now holds a different value. Normally this would not be an issue since it should not influence the true branch, however, false_reg2 and true_reg2 are pointers to the same "fake" register. Meaning, the false branch can influence the results of the true branch. In line 24, the verifier assumes R6_w=0, but the actual runtime value in this case is 1. The fix is simply not passing in the same "fake" register location as inputs to reg_set_min_max(), but instead making a copy. Moving the fake_reg into the env also reduces stack consumption by 120 bytes. With this, the verifier successfully rejects invalid accesses from the test program. [0] https://github.com/google/buzzer Fixes: 67420501e868 ("bpf: generalize reg_set_min_max() to handle non-const register comparisons") Reported-by: Juan José López Jaimez Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Link: https://lore.kernel.org/r/20240613115310.25383-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/verifier.c | 14 ++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..e4070fb02b11 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -746,6 +746,8 @@ struct bpf_verifier_env { /* Same as scratched_regs but for stack slots */ u64 scratched_stack_slots; u64 prev_log_pos, prev_insn_print_pos; + /* buffer used to temporary hold constants as scalar registers */ + struct bpf_reg_state fake_reg[2]; /* buffer used to generate temporary string representations, * e.g., in reg_type_str() to generate reg_type string */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 36ef8e96787e..f455548ba46c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -15113,7 +15113,6 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL; struct bpf_reg_state *eq_branch_regs; - struct bpf_reg_state fake_reg = {}; u8 opcode = BPF_OP(insn->code); bool is_jmp32; int pred = -1; @@ -15179,7 +15178,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, verbose(env, "BPF_JMP/JMP32 uses reserved fields\n"); return -EINVAL; } - src_reg = &fake_reg; + src_reg = &env->fake_reg[0]; + memset(src_reg, 0, sizeof(*src_reg)); src_reg->type = SCALAR_VALUE; __mark_reg_known(src_reg, insn->imm); } @@ -15239,10 +15239,16 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, &other_branch_regs[insn->src_reg], dst_reg, src_reg, opcode, is_jmp32); } else /* BPF_SRC(insn->code) == BPF_K */ { + /* reg_set_min_max() can mangle the fake_reg. Make a copy + * so that these are two different memory locations. The + * src_reg is not used beyond here in context of K. + */ + memcpy(&env->fake_reg[1], &env->fake_reg[0], + sizeof(env->fake_reg[0])); err = reg_set_min_max(env, &other_branch_regs[insn->dst_reg], - src_reg /* fake one */, - dst_reg, src_reg /* same fake one */, + &env->fake_reg[0], + dst_reg, &env->fake_reg[1], opcode, is_jmp32); } if (err) -- cgit v1.2.3 From 9a95c5bfbf02a0a7f5983280fe284a0ff0836c34 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Tue, 7 May 2024 01:25:41 +0000 Subject: ima: Avoid blocking in RCU read-side critical section A panic happens in ima_match_policy: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 PGD 42f873067 P4D 0 Oops: 0000 [#1] SMP NOPTI CPU: 5 PID: 1286325 Comm: kubeletmonit.sh Kdump: loaded Tainted: P Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 RIP: 0010:ima_match_policy+0x84/0x450 Code: 49 89 fc 41 89 cf 31 ed 89 44 24 14 eb 1c 44 39 7b 18 74 26 41 83 ff 05 74 20 48 8b 1b 48 3b 1d f2 b9 f4 00 0f 84 9c 01 00 00 <44> 85 73 10 74 ea 44 8b 6b 14 41 f6 c5 01 75 d4 41 f6 c5 02 74 0f RSP: 0018:ff71570009e07a80 EFLAGS: 00010207 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000200 RDX: ffffffffad8dc7c0 RSI: 0000000024924925 RDI: ff3e27850dea2000 RBP: 0000000000000000 R08: 0000000000000000 R09: ffffffffabfce739 R10: ff3e27810cc42400 R11: 0000000000000000 R12: ff3e2781825ef970 R13: 00000000ff3e2785 R14: 000000000000000c R15: 0000000000000001 FS: 00007f5195b51740(0000) GS:ff3e278b12d40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000010 CR3: 0000000626d24002 CR4: 0000000000361ee0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ima_get_action+0x22/0x30 process_measurement+0xb0/0x830 ? page_add_file_rmap+0x15/0x170 ? alloc_set_pte+0x269/0x4c0 ? prep_new_page+0x81/0x140 ? simple_xattr_get+0x75/0xa0 ? selinux_file_open+0x9d/0xf0 ima_file_check+0x64/0x90 path_openat+0x571/0x1720 do_filp_open+0x9b/0x110 ? page_counter_try_charge+0x57/0xc0 ? files_cgroup_alloc_fd+0x38/0x60 ? __alloc_fd+0xd4/0x250 ? do_sys_open+0x1bd/0x250 do_sys_open+0x1bd/0x250 do_syscall_64+0x5d/0x1d0 entry_SYSCALL_64_after_hwframe+0x65/0xca Commit c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()") introduced call to ima_lsm_copy_rule within a RCU read-side critical section which contains kmalloc with GFP_KERNEL. This implies a possible sleep and violates limitations of RCU read-side critical sections on non-PREEMPT systems. Sleeping within RCU read-side critical section might cause synchronize_rcu() returning early and break RCU protection, allowing a UAF to happen. The root cause of this issue could be described as follows: | Thread A | Thread B | | |ima_match_policy | | | rcu_read_lock | |ima_lsm_update_rule | | | synchronize_rcu | | | | kmalloc(GFP_KERNEL)| | | sleep | ==> synchronize_rcu returns early | kfree(entry) | | | | entry = entry->next| ==> UAF happens and entry now becomes NULL (or could be anything). | | entry->action | ==> Accessing entry might cause panic. To fix this issue, we are converting all kmalloc that is called within RCU read-side critical section to use GFP_ATOMIC. Fixes: c7423dbdbc9e ("ima: Handle -ESTALE returned by ima_filter_rule_match()") Cc: stable@vger.kernel.org Signed-off-by: GUO Zihua Acked-by: John Johansen Reviewed-by: Mimi Zohar Reviewed-by: Casey Schaufler [PM: fixed missing comment, long lines, !CONFIG_IMA_LSM_RULES case] Signed-off-by: Paul Moore --- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 5 +++-- kernel/auditfilter.c | 5 +++-- security/apparmor/audit.c | 6 +++--- security/apparmor/include/audit.h | 2 +- security/integrity/ima/ima.h | 2 +- security/integrity/ima/ima_policy.c | 15 +++++++++------ security/security.c | 6 ++++-- security/selinux/include/audit.h | 4 +++- security/selinux/ss/services.c | 5 +++-- security/smack/smack_lsm.c | 4 +++- 11 files changed, 34 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index f804b76cde44..44488b1ab9a9 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -413,7 +413,7 @@ LSM_HOOK(void, LSM_RET_VOID, key_post_create_or_update, struct key *keyring, #ifdef CONFIG_AUDIT LSM_HOOK(int, 0, audit_rule_init, u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) LSM_HOOK(int, 0, audit_rule_known, struct audit_krule *krule) LSM_HOOK(int, 0, audit_rule_match, u32 secid, u32 field, u32 op, void *lsmrule) LSM_HOOK(void, LSM_RET_VOID, audit_rule_free, void *lsmrule) diff --git a/include/linux/security.h b/include/linux/security.h index 21cf70346b33..de3af33e6ff5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2048,7 +2048,8 @@ static inline void security_key_post_create_or_update(struct key *keyring, #ifdef CONFIG_AUDIT #ifdef CONFIG_SECURITY -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule); +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp); int security_audit_rule_known(struct audit_krule *krule); int security_audit_rule_match(u32 secid, u32 field, u32 op, void *lsmrule); void security_audit_rule_free(void *lsmrule); @@ -2056,7 +2057,7 @@ void security_audit_rule_free(void *lsmrule); #else static inline int security_audit_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return 0; } diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index be8c680121e4..d6ef4f4f9cba 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -529,7 +529,8 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data, entry->rule.buflen += f_val; f->lsm_str = str; err = security_audit_rule_init(f->type, f->op, str, - (void **)&f->lsm_rule); + (void **)&f->lsm_rule, + GFP_KERNEL); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (err == -EINVAL) { @@ -799,7 +800,7 @@ static inline int audit_dupe_lsm_field(struct audit_field *df, /* our own (refreshed) copy of lsm_rule */ ret = security_audit_rule_init(df->type, df->op, df->lsm_str, - (void **)&df->lsm_rule); + (void **)&df->lsm_rule, GFP_KERNEL); /* Keep currently invalid fields around in case they * become valid after a policy reload. */ if (ret == -EINVAL) { diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c index 45beb1c5f747..6b5181c668b5 100644 --- a/security/apparmor/audit.c +++ b/security/apparmor/audit.c @@ -217,7 +217,7 @@ void aa_audit_rule_free(void *vrule) } } -int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp) { struct aa_audit_rule *rule; @@ -230,14 +230,14 @@ int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; } - rule = kzalloc(sizeof(struct aa_audit_rule), GFP_KERNEL); + rule = kzalloc(sizeof(struct aa_audit_rule), gfp); if (!rule) return -ENOMEM; /* Currently rules are treated as coming from the root ns */ rule->label = aa_label_parse(&root_ns->unconfined->label, rulestr, - GFP_KERNEL, true, false); + gfp, true, false); if (IS_ERR(rule->label)) { int err = PTR_ERR(rule->label); aa_audit_rule_free(rule); diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h index acbb03b9bd25..0c8cc86b417b 100644 --- a/security/apparmor/include/audit.h +++ b/security/apparmor/include/audit.h @@ -200,7 +200,7 @@ static inline int complain_error(int error) } void aa_audit_rule_free(void *vrule); -int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule); +int aa_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, gfp_t gfp); int aa_audit_rule_known(struct audit_krule *rule); int aa_audit_rule_match(u32 sid, u32 field, u32 op, void *vrule); diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h index 3e568126cd48..c51e24d24d1e 100644 --- a/security/integrity/ima/ima.h +++ b/security/integrity/ima/ima.h @@ -546,7 +546,7 @@ static inline void ima_free_modsig(struct modsig *modsig) #else static inline int ima_filter_rule_init(u32 field, u32 op, char *rulestr, - void **lsmrule) + void **lsmrule, gfp_t gfp) { return -EINVAL; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index c0556907c2e6..09da8e639239 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -401,7 +401,8 @@ static void ima_free_rule(struct ima_rule_entry *entry) kfree(entry); } -static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) +static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry, + gfp_t gfp) { struct ima_rule_entry *nentry; int i; @@ -410,7 +411,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) * Immutable elements are copied over as pointers and data; only * lsm rules can change */ - nentry = kmemdup(entry, sizeof(*nentry), GFP_KERNEL); + nentry = kmemdup(entry, sizeof(*nentry), gfp); if (!nentry) return NULL; @@ -425,7 +426,8 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry) ima_filter_rule_init(nentry->lsm[i].type, Audit_equal, nentry->lsm[i].args_p, - &nentry->lsm[i].rule); + &nentry->lsm[i].rule, + gfp); if (!nentry->lsm[i].rule) pr_warn("rule for LSM \'%s\' is undefined\n", nentry->lsm[i].args_p); @@ -438,7 +440,7 @@ static int ima_lsm_update_rule(struct ima_rule_entry *entry) int i; struct ima_rule_entry *nentry; - nentry = ima_lsm_copy_rule(entry); + nentry = ima_lsm_copy_rule(entry, GFP_KERNEL); if (!nentry) return -ENOMEM; @@ -664,7 +666,7 @@ retry: } if (rc == -ESTALE && !rule_reinitialized) { - lsm_rule = ima_lsm_copy_rule(rule); + lsm_rule = ima_lsm_copy_rule(rule, GFP_ATOMIC); if (lsm_rule) { rule_reinitialized = true; goto retry; @@ -1140,7 +1142,8 @@ static int ima_lsm_rule_init(struct ima_rule_entry *entry, entry->lsm[lsm_rule].type = audit_type; result = ima_filter_rule_init(entry->lsm[lsm_rule].type, Audit_equal, entry->lsm[lsm_rule].args_p, - &entry->lsm[lsm_rule].rule); + &entry->lsm[lsm_rule].rule, + GFP_KERNEL); if (!entry->lsm[lsm_rule].rule) { pr_warn("rule for LSM \'%s\' is undefined\n", entry->lsm[lsm_rule].args_p); diff --git a/security/security.c b/security/security.c index e5da848c50b9..e5ca08789f74 100644 --- a/security/security.c +++ b/security/security.c @@ -5332,15 +5332,17 @@ void security_key_post_create_or_update(struct key *keyring, struct key *key, * @op: rule operator * @rulestr: rule context * @lsmrule: receive buffer for audit rule struct + * @gfp: GFP flag used for kmalloc * * Allocate and initialize an LSM audit rule structure. * * Return: Return 0 if @lsmrule has been successfully set, -EINVAL in case of * an invalid rule. */ -int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule) +int security_audit_rule_init(u32 field, u32 op, char *rulestr, void **lsmrule, + gfp_t gfp) { - return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule); + return call_int_hook(audit_rule_init, field, op, rulestr, lsmrule, gfp); } /** diff --git a/security/selinux/include/audit.h b/security/selinux/include/audit.h index 52aca71210b4..29c7d4c86f6d 100644 --- a/security/selinux/include/audit.h +++ b/security/selinux/include/audit.h @@ -21,12 +21,14 @@ * @op: the operator the rule uses * @rulestr: the text "target" of the rule * @rule: pointer to the new rule structure returned via this + * @gfp: GFP flag used for kmalloc * * Returns 0 if successful, -errno if not. On success, the rule structure * will be allocated internally. The caller must free this structure with * selinux_audit_rule_free() after use. */ -int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule); +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **rule, + gfp_t gfp); /** * selinux_audit_rule_free - free an selinux audit rule structure. diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index f20e1968b7f7..e33e55384b75 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -3507,7 +3507,8 @@ void selinux_audit_rule_free(void *vrule) } } -int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, + gfp_t gfp) { struct selinux_state *state = &selinux_state; struct selinux_policy *policy; @@ -3548,7 +3549,7 @@ int selinux_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) return -EINVAL; } - tmprule = kzalloc(sizeof(struct selinux_audit_rule), GFP_KERNEL); + tmprule = kzalloc(sizeof(struct selinux_audit_rule), gfp); if (!tmprule) return -ENOMEM; context_init(&tmprule->au_ctxt); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 70ba2841e181..f5cbec1e6a92 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4693,11 +4693,13 @@ static int smack_post_notification(const struct cred *w_cred, * @op: required testing operator (=, !=, >, <, ...) * @rulestr: smack label to be audited * @vrule: pointer to save our own audit rule representation + * @gfp: type of the memory for the allocation * * Prepare to audit cases where (@field @op @rulestr) is true. * The label to be audited is created if necessay. */ -static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule) +static int smack_audit_rule_init(u32 field, u32 op, char *rulestr, void **vrule, + gfp_t gfp) { struct smack_known *skp; char **rule = (char **)vrule; -- cgit v1.2.3 From 79a947bd54348d4f63120aacc30505b3ad81fa59 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Thu, 6 Jun 2024 20:52:02 +0200 Subject: ACPI: NUMA: Consolidate header includes The header file acpi/acpi_numa.h is included whether CONFIG_ACPI is defined or not. Include it only once before the #ifdef/#else/#endif preprocessor directives and fix the following make includecheck warning: acpi/acpi_numa.h is included more than once Signed-off-by: Thorsten Blum Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28c3fb2bef0d..bb18e7bf8826 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -24,6 +24,7 @@ struct irq_domain_ops; #define _LINUX #endif #include +#include #ifdef CONFIG_ACPI @@ -35,7 +36,6 @@ struct irq_domain_ops; #include #include -#include #include #include @@ -777,8 +777,6 @@ const char *acpi_get_subsystem_id(acpi_handle handle); #define acpi_dev_uid_match(adev, uid2) (adev && false) #define acpi_dev_hid_uid_match(adev, hid2, uid2) (adev && false) -#include - struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) -- cgit v1.2.3 From d6161b7d8b33aa8756ab2f7eed8cb371c2e7e8ed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 12 Jun 2024 15:42:29 +0200 Subject: video/logo: Remove linux_serial_image comments The last user of the serial_console ASCII image was removed in v2.1.115. Signed-off-by: Geert Uytterhoeven Signed-off-by: Helge Deller --- include/linux/linux_logo.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/linux/linux_logo.h b/include/linux/linux_logo.h index d4d5b93efe84..e37699b7e839 100644 --- a/include/linux/linux_logo.h +++ b/include/linux/linux_logo.h @@ -10,9 +10,6 @@ * Copyright (C) 2001 Greg Banks * Copyright (C) 2001 Jan-Benedict Glaw * Copyright (C) 2003 Geert Uytterhoeven - * - * Serial_console ascii image can be any size, - * but should contain %s to display the version */ #include -- cgit v1.2.3 From 633aeefafc9c2a07a76a62be6aac1d73c3e3defa Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 13 Jun 2024 14:18:26 -0700 Subject: scsi: core: Introduce the BLIST_SKIP_IO_HINTS flag Prepare for skipping the IO Advice Hints Grouping mode page for USB storage devices. Cc: Alan Stern Cc: Joao Machado Cc: Andy Shevchenko Cc: Christian Heusel Cc: stable@vger.kernel.org Fixes: 4f53138fffc2 ("scsi: sd: Translate data lifetime information") Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20240613211828.2077477-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 4 ++++ include/scsi/scsi_devinfo.h | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index fbc11046bbf6..fe82baa924f8 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include @@ -3118,6 +3119,9 @@ static void sd_read_io_hints(struct scsi_disk *sdkp, unsigned char *buffer) struct scsi_mode_data data; int res; + if (sdp->sdev_bflags & BLIST_SKIP_IO_HINTS) + return; + res = scsi_mode_sense(sdp, /*dbd=*/0x8, /*modepage=*/0x0a, /*subpage=*/0x05, buffer, SD_BUF_SIZE, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr); diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 6b548dc2c496..1d79a3b536ce 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -69,8 +69,10 @@ #define BLIST_RETRY_ITF ((__force blist_flags_t)(1ULL << 32)) /* Always retry ABORTED_COMMAND with ASC 0xc1 */ #define BLIST_RETRY_ASC_C1 ((__force blist_flags_t)(1ULL << 33)) +/* Do not query the IO Advice Hints Grouping mode page */ +#define BLIST_SKIP_IO_HINTS ((__force blist_flags_t)(1ULL << 34)) -#define __BLIST_LAST_USED BLIST_RETRY_ASC_C1 +#define __BLIST_LAST_USED BLIST_SKIP_IO_HINTS #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \ (__force blist_flags_t) \ -- cgit v1.2.3 From f4a1254f2a076afb0edd473589bf40f9b4d36b41 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 14 Jun 2024 01:04:29 +0100 Subject: io_uring: fix cancellation overwriting req->flags Only the current owner of a request is allowed to write into req->flags. Hence, the cancellation path should never touch it. Add a new field instead of the flag, move it into the 3rd cache line because it should always be initialised. poll_refs can move further as polling is an involved process anyway. It's a minimal patch, in the future we can and should find a better place for it and remove now unused REQ_F_CANCEL_SEQ. Fixes: 521223d7c229f ("io_uring/cancel: don't default to setting req->work.cancel_seq") Cc: stable@vger.kernel.org Reported-by: Li Shi Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/6827b129f8f0ad76fa9d1f0a773de938b240ffab.1718323430.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 3 ++- io_uring/cancel.h | 4 ++-- io_uring/io_uring.c | 1 + 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 7a6b190c7da7..b48570eaa449 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -648,7 +648,7 @@ struct io_kiocb { struct io_rsrc_node *rsrc_node; atomic_t refs; - atomic_t poll_refs; + bool cancel_seq_set; struct io_task_work io_task_work; /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ struct hlist_node hash_node; @@ -657,6 +657,7 @@ struct io_kiocb { /* opcode allocated if it needs to store data for async defer */ void *async_data; /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + atomic_t poll_refs; struct io_kiocb *link; /* custom credentials, valid IFF REQ_F_CREDS is set */ const struct cred *creds; diff --git a/io_uring/cancel.h b/io_uring/cancel.h index 76b32e65c03c..b33995e00ba9 100644 --- a/io_uring/cancel.h +++ b/io_uring/cancel.h @@ -27,10 +27,10 @@ bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd); static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence) { - if ((req->flags & REQ_F_CANCEL_SEQ) && sequence == req->work.cancel_seq) + if (req->cancel_seq_set && sequence == req->work.cancel_seq) return true; - req->flags |= REQ_F_CANCEL_SEQ; + req->cancel_seq_set = true; req->work.cancel_seq = sequence; return false; } diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 816e93e7f949..154b25b8a613 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -2058,6 +2058,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, req->file = NULL; req->rsrc_node = NULL; req->task = current; + req->cancel_seq_set = false; if (unlikely(opcode >= IORING_OP_LAST)) { req->opcode = 0; -- cgit v1.2.3 From 88a26c3c2405626e0083a49609c5e8cd6c453d87 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Mon, 15 Jan 2024 17:58:46 +0800 Subject: dt-bindings: clock: sophgo: add pll clocks for SG2042 Add bindings for the pll clocks for Sophgo SG2042. Signed-off-by: Chen Wang Reviewed-by: Rob Herring Reviewed-by: Guo Ren --- .../bindings/clock/sophgo,sg2042-pll.yaml | 53 ++++++++++++++++++++++ include/dt-bindings/clock/sophgo,sg2042-pll.h | 14 ++++++ 2 files changed, 67 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml create mode 100644 include/dt-bindings/clock/sophgo,sg2042-pll.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml new file mode 100644 index 000000000000..1a417a627dd2 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/sophgo,sg2042-pll.yaml @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/sophgo,sg2042-pll.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo SG2042 PLL Clock Generator + +maintainers: + - Chen Wang + +properties: + compatible: + const: sophgo,sg2042-pll + + reg: + maxItems: 1 + + clocks: + items: + - description: Oscillator(Clock Generation IC) for Main/Fixed PLL (25 MHz) + - description: Oscillator(Clock Generation IC) for DDR PLL 0 (25 MHz) + - description: Oscillator(Clock Generation IC) for DDR PLL 1 (25 MHz) + + clock-names: + items: + - const: cgi_main + - const: cgi_dpll0 + - const: cgi_dpll1 + + '#clock-cells': + const: 1 + description: + See for valid indices. + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + +additionalProperties: false + +examples: + - | + clock-controller@10000000 { + compatible = "sophgo,sg2042-pll"; + reg = <0x10000000 0x10000>; + clocks = <&cgi_main>, <&cgi_dpll0>, <&cgi_dpll1>; + clock-names = "cgi_main", "cgi_dpll0", "cgi_dpll1"; + #clock-cells = <1>; + }; diff --git a/include/dt-bindings/clock/sophgo,sg2042-pll.h b/include/dt-bindings/clock/sophgo,sg2042-pll.h new file mode 100644 index 000000000000..2d519b3bf51c --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2042-pll.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ +#define __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ + +#define MPLL_CLK 0 +#define FPLL_CLK 1 +#define DPLL0_CLK 2 +#define DPLL1_CLK 3 + +#endif /* __DT_BINDINGS_SOPHGO_SG2042_PLL_H__ */ -- cgit v1.2.3 From 5a7144d61d73d801540f8846d8e1b9fa52a5559c Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Wed, 31 Jan 2024 09:57:01 +0800 Subject: dt-bindings: clock: sophgo: add RP gate clocks for SG2042 Add bindings for the gate clocks of RP subsystem for Sophgo SG2042. Signed-off-by: Chen Wang Reviewed-by: Rob Herring --- .../bindings/clock/sophgo,sg2042-rpgate.yaml | 49 ++++++++++++++++++ include/dt-bindings/clock/sophgo,sg2042-rpgate.h | 58 ++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml create mode 100644 include/dt-bindings/clock/sophgo,sg2042-rpgate.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml new file mode 100644 index 000000000000..1491fb8ef6a3 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/sophgo,sg2042-rpgate.yaml @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/sophgo,sg2042-rpgate.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo SG2042 Gate Clock Generator for RP(riscv processors) subsystem + +maintainers: + - Chen Wang + +properties: + compatible: + const: sophgo,sg2042-rpgate + + reg: + maxItems: 1 + + clocks: + items: + - description: Gate clock for RP subsystem + + clock-names: + items: + - const: rpgate + + '#clock-cells': + const: 1 + description: + See for valid indices. + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + +additionalProperties: false + +examples: + - | + clock-controller@20000000 { + compatible = "sophgo,sg2042-rpgate"; + reg = <0x20000000 0x10000>; + clocks = <&clkgen 85>; + clock-names = "rpgate"; + #clock-cells = <1>; + }; diff --git a/include/dt-bindings/clock/sophgo,sg2042-rpgate.h b/include/dt-bindings/clock/sophgo,sg2042-rpgate.h new file mode 100644 index 000000000000..8b4522d5f559 --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2042-rpgate.h @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ +#define __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ + +#define GATE_CLK_RXU0 0 +#define GATE_CLK_RXU1 1 +#define GATE_CLK_RXU2 2 +#define GATE_CLK_RXU3 3 +#define GATE_CLK_RXU4 4 +#define GATE_CLK_RXU5 5 +#define GATE_CLK_RXU6 6 +#define GATE_CLK_RXU7 7 +#define GATE_CLK_RXU8 8 +#define GATE_CLK_RXU9 9 +#define GATE_CLK_RXU10 10 +#define GATE_CLK_RXU11 11 +#define GATE_CLK_RXU12 12 +#define GATE_CLK_RXU13 13 +#define GATE_CLK_RXU14 14 +#define GATE_CLK_RXU15 15 +#define GATE_CLK_RXU16 16 +#define GATE_CLK_RXU17 17 +#define GATE_CLK_RXU18 18 +#define GATE_CLK_RXU19 19 +#define GATE_CLK_RXU20 20 +#define GATE_CLK_RXU21 21 +#define GATE_CLK_RXU22 22 +#define GATE_CLK_RXU23 23 +#define GATE_CLK_RXU24 24 +#define GATE_CLK_RXU25 25 +#define GATE_CLK_RXU26 26 +#define GATE_CLK_RXU27 27 +#define GATE_CLK_RXU28 28 +#define GATE_CLK_RXU29 29 +#define GATE_CLK_RXU30 30 +#define GATE_CLK_RXU31 31 +#define GATE_CLK_MP0 32 +#define GATE_CLK_MP1 33 +#define GATE_CLK_MP2 34 +#define GATE_CLK_MP3 35 +#define GATE_CLK_MP4 36 +#define GATE_CLK_MP5 37 +#define GATE_CLK_MP6 38 +#define GATE_CLK_MP7 39 +#define GATE_CLK_MP8 40 +#define GATE_CLK_MP9 41 +#define GATE_CLK_MP10 42 +#define GATE_CLK_MP11 43 +#define GATE_CLK_MP12 44 +#define GATE_CLK_MP13 45 +#define GATE_CLK_MP14 46 +#define GATE_CLK_MP15 47 + +#endif /* __DT_BINDINGS_SOPHGO_SG2042_RPGATE_H__ */ -- cgit v1.2.3 From 5911423798b2eba65d6ea0aff6505280b3eb55e6 Mon Sep 17 00:00:00 2001 From: Chen Wang Date: Fri, 24 Nov 2023 14:02:43 +0800 Subject: dt-bindings: clock: sophgo: add clkgen for SG2042 Add bindings for the clock generator of divider/mux and gates working for other subsystem than RP subsystem for Sophgo SG2042. Signed-off-by: Chen Wang Reviewed-by: Rob Herring --- .../bindings/clock/sophgo,sg2042-clkgen.yaml | 61 +++++++++++ include/dt-bindings/clock/sophgo,sg2042-clkgen.h | 111 +++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml create mode 100644 include/dt-bindings/clock/sophgo,sg2042-clkgen.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml b/Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml new file mode 100644 index 000000000000..e7a9255bcb58 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/sophgo,sg2042-clkgen.yaml @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/sophgo,sg2042-clkgen.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Sophgo SG2042 Clock Generator for divider/mux/gate + +maintainers: + - Chen Wang + +properties: + compatible: + const: sophgo,sg2042-clkgen + + reg: + maxItems: 1 + + clocks: + items: + - description: Main PLL + - description: Fixed PLL + - description: DDR PLL 0 + - description: DDR PLL 1 + + clock-names: + items: + - const: mpll + - const: fpll + - const: dpll0 + - const: dpll1 + + '#clock-cells': + const: 1 + description: + See for valid indices. + +required: + - compatible + - reg + - clocks + - clock-names + - '#clock-cells' + +additionalProperties: false + +examples: + - | + clock-controller@30012000 { + compatible = "sophgo,sg2042-clkgen"; + reg = <0x30012000 0x1000>; + clocks = <&pllclk 0>, + <&pllclk 1>, + <&pllclk 2>, + <&pllclk 3>; + clock-names = "mpll", + "fpll", + "dpll0", + "dpll1"; + #clock-cells = <1>; + }; diff --git a/include/dt-bindings/clock/sophgo,sg2042-clkgen.h b/include/dt-bindings/clock/sophgo,sg2042-clkgen.h new file mode 100644 index 000000000000..84f7857317a2 --- /dev/null +++ b/include/dt-bindings/clock/sophgo,sg2042-clkgen.h @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright (C) 2023 Sophgo Technology Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ +#define __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ + +#define DIV_CLK_MPLL_RP_CPU_NORMAL_0 0 +#define DIV_CLK_MPLL_AXI_DDR_0 1 +#define DIV_CLK_FPLL_DDR01_1 2 +#define DIV_CLK_FPLL_DDR23_1 3 +#define DIV_CLK_FPLL_RP_CPU_NORMAL_1 4 +#define DIV_CLK_FPLL_50M_A53 5 +#define DIV_CLK_FPLL_TOP_RP_CMN_DIV2 6 +#define DIV_CLK_FPLL_UART_500M 7 +#define DIV_CLK_FPLL_AHB_LPC 8 +#define DIV_CLK_FPLL_EFUSE 9 +#define DIV_CLK_FPLL_TX_ETH0 10 +#define DIV_CLK_FPLL_PTP_REF_I_ETH0 11 +#define DIV_CLK_FPLL_REF_ETH0 12 +#define DIV_CLK_FPLL_EMMC 13 +#define DIV_CLK_FPLL_SD 14 +#define DIV_CLK_FPLL_TOP_AXI0 15 +#define DIV_CLK_FPLL_TOP_AXI_HSPERI 16 +#define DIV_CLK_FPLL_AXI_DDR_1 17 +#define DIV_CLK_FPLL_DIV_TIMER1 18 +#define DIV_CLK_FPLL_DIV_TIMER2 19 +#define DIV_CLK_FPLL_DIV_TIMER3 20 +#define DIV_CLK_FPLL_DIV_TIMER4 21 +#define DIV_CLK_FPLL_DIV_TIMER5 22 +#define DIV_CLK_FPLL_DIV_TIMER6 23 +#define DIV_CLK_FPLL_DIV_TIMER7 24 +#define DIV_CLK_FPLL_DIV_TIMER8 25 +#define DIV_CLK_FPLL_100K_EMMC 26 +#define DIV_CLK_FPLL_100K_SD 27 +#define DIV_CLK_FPLL_GPIO_DB 28 +#define DIV_CLK_DPLL0_DDR01_0 29 +#define DIV_CLK_DPLL1_DDR23_0 30 + +#define GATE_CLK_RP_CPU_NORMAL_DIV0 31 +#define GATE_CLK_AXI_DDR_DIV0 32 + +#define GATE_CLK_RP_CPU_NORMAL_DIV1 33 +#define GATE_CLK_A53_50M 34 +#define GATE_CLK_TOP_RP_CMN_DIV2 35 +#define GATE_CLK_HSDMA 36 +#define GATE_CLK_EMMC_100M 37 +#define GATE_CLK_SD_100M 38 +#define GATE_CLK_TX_ETH0 39 +#define GATE_CLK_PTP_REF_I_ETH0 40 +#define GATE_CLK_REF_ETH0 41 +#define GATE_CLK_UART_500M 42 +#define GATE_CLK_EFUSE 43 + +#define GATE_CLK_AHB_LPC 44 +#define GATE_CLK_AHB_ROM 45 +#define GATE_CLK_AHB_SF 46 + +#define GATE_CLK_APB_UART 47 +#define GATE_CLK_APB_TIMER 48 +#define GATE_CLK_APB_EFUSE 49 +#define GATE_CLK_APB_GPIO 50 +#define GATE_CLK_APB_GPIO_INTR 51 +#define GATE_CLK_APB_SPI 52 +#define GATE_CLK_APB_I2C 53 +#define GATE_CLK_APB_WDT 54 +#define GATE_CLK_APB_PWM 55 +#define GATE_CLK_APB_RTC 56 + +#define GATE_CLK_AXI_PCIE0 57 +#define GATE_CLK_AXI_PCIE1 58 +#define GATE_CLK_SYSDMA_AXI 59 +#define GATE_CLK_AXI_DBG_I2C 60 +#define GATE_CLK_AXI_SRAM 61 +#define GATE_CLK_AXI_ETH0 62 +#define GATE_CLK_AXI_EMMC 63 +#define GATE_CLK_AXI_SD 64 +#define GATE_CLK_TOP_AXI0 65 +#define GATE_CLK_TOP_AXI_HSPERI 66 + +#define GATE_CLK_TIMER1 67 +#define GATE_CLK_TIMER2 68 +#define GATE_CLK_TIMER3 69 +#define GATE_CLK_TIMER4 70 +#define GATE_CLK_TIMER5 71 +#define GATE_CLK_TIMER6 72 +#define GATE_CLK_TIMER7 73 +#define GATE_CLK_TIMER8 74 +#define GATE_CLK_100K_EMMC 75 +#define GATE_CLK_100K_SD 76 +#define GATE_CLK_GPIO_DB 77 + +#define GATE_CLK_AXI_DDR_DIV1 78 +#define GATE_CLK_DDR01_DIV1 79 +#define GATE_CLK_DDR23_DIV1 80 + +#define GATE_CLK_DDR01_DIV0 81 +#define GATE_CLK_DDR23_DIV0 82 + +#define GATE_CLK_DDR01 83 +#define GATE_CLK_DDR23 84 +#define GATE_CLK_RP_CPU_NORMAL 85 +#define GATE_CLK_AXI_DDR 86 + +#define MUX_CLK_DDR01 87 +#define MUX_CLK_DDR23 88 +#define MUX_CLK_RP_CPU_NORMAL 89 +#define MUX_CLK_AXI_DDR 90 + +#endif /* __DT_BINDINGS_SOPHGO_SG2042_CLKGEN_H__ */ -- cgit v1.2.3 From 6b0d3355e5a58fd73529fa6f930a877caca3f75d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 26 May 2024 20:17:16 +0200 Subject: leds: class: Add flag to avoid automatic renaming of LED devices MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a mechanism for drivers to opt-out of the automatic device renaming on conflicts. Those drivers will provide their own conflict resolution. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20240526-cros_ec-kbd-led-framework-v3-2-ee577415a521@weissschuh.net Signed-off-by: Lee Jones --- drivers/leds/led-class.c | 2 ++ include/linux/leds.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c index c298355d5b7d..2f08c20702f3 100644 --- a/drivers/leds/led-class.c +++ b/drivers/leds/led-class.c @@ -503,6 +503,8 @@ int led_classdev_register_ext(struct device *parent, ret = led_classdev_next_name(proposed_name, final_name, sizeof(final_name)); if (ret < 0) return ret; + else if (ret && led_cdev->flags & LED_REJECT_NAME_CONFLICT) + return -EEXIST; else if (ret) dev_warn(parent, "Led %s renamed to %s due to name collision\n", proposed_name, final_name); diff --git a/include/linux/leds.h b/include/linux/leds.h index 6300313c46b7..36663ac6c58a 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -107,6 +107,7 @@ struct led_classdev { #define LED_BRIGHT_HW_CHANGED BIT(21) #define LED_RETAIN_AT_SHUTDOWN BIT(22) #define LED_INIT_DEFAULT_TRIGGER BIT(23) +#define LED_REJECT_NAME_CONFLICT BIT(24) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; -- cgit v1.2.3 From 146a06a0d225cae240065233fd168fb0b95a10ff Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:13 +0200 Subject: HID: rename struct hid_bpf_ops into hid_ops Those operations are the ones from HID, not HID-BPF, and I'd like to reuse hid_bpf_ops as the user facing struct_ops API. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-1-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_dispatch.c | 22 +++++++++++----------- drivers/hid/hid-core.c | 6 +++--- include/linux/hid_bpf.h | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 10289f44d0cc..55c9f82fdef0 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -21,8 +21,8 @@ #include "hid_bpf_dispatch.h" #include "entrypoints/entrypoints.lskel.h" -struct hid_bpf_ops *hid_bpf_ops; -EXPORT_SYMBOL(hid_bpf_ops); +struct hid_ops *hid_ops; +EXPORT_SYMBOL(hid_ops); /** * hid_bpf_device_event - Called whenever an event is coming in from the device @@ -284,13 +284,13 @@ hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) struct device *dev; int err, fd; - if (!hid_bpf_ops) + if (!hid_ops) return -EINVAL; if ((flags & ~HID_BPF_FLAG_MASK)) return -EINVAL; - dev = bus_find_device(hid_bpf_ops->bus_type, NULL, &hid_id, device_match_id); + dev = bus_find_device(hid_ops->bus_type, NULL, &hid_id, device_match_id); if (!dev) return -EINVAL; @@ -335,10 +335,10 @@ hid_bpf_allocate_context(unsigned int hid_id) struct hid_bpf_ctx_kern *ctx_kern = NULL; struct device *dev; - if (!hid_bpf_ops) + if (!hid_ops) return NULL; - dev = bus_find_device(hid_bpf_ops->bus_type, NULL, &hid_id, device_match_id); + dev = bus_find_device(hid_ops->bus_type, NULL, &hid_id, device_match_id); if (!dev) return NULL; @@ -386,7 +386,7 @@ __hid_bpf_hw_check_params(struct hid_bpf_ctx *ctx, __u8 *buf, size_t *buf__sz, u32 report_len; /* check arguments */ - if (!ctx || !hid_bpf_ops || !buf) + if (!ctx || !hid_ops || !buf) return -EINVAL; switch (rtype) { @@ -404,7 +404,7 @@ __hid_bpf_hw_check_params(struct hid_bpf_ctx *ctx, __u8 *buf, size_t *buf__sz, hdev = (struct hid_device *)ctx->hid; /* discard const */ report_enum = hdev->report_enum + rtype; - report = hid_bpf_ops->hid_get_report(report_enum, buf); + report = hid_ops->hid_get_report(report_enum, buf); if (!report) return -EINVAL; @@ -459,7 +459,7 @@ hid_bpf_hw_request(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz, if (!dma_data) return -ENOMEM; - ret = hid_bpf_ops->hid_hw_raw_request(hdev, + ret = hid_ops->hid_hw_raw_request(hdev, dma_data[0], dma_data, size, @@ -501,7 +501,7 @@ hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx, __u8 *buf, size_t buf__sz) if (!dma_data) return -ENOMEM; - ret = hid_bpf_ops->hid_hw_output_report(hdev, + ret = hid_ops->hid_hw_output_report(hdev, dma_data, size); @@ -534,7 +534,7 @@ hid_bpf_input_report(struct hid_bpf_ctx *ctx, enum hid_report_type type, u8 *buf hdev = (struct hid_device *)ctx->hid; /* discard const */ - return hid_bpf_ops->hid_input_report(hdev, type, buf, size, 0); + return hid_ops->hid_input_report(hdev, type, buf, size, 0); } __bpf_kfunc_end_defs(); diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 74efda212c55..aed8850a4d01 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2970,7 +2970,7 @@ int hid_check_keys_pressed(struct hid_device *hid) EXPORT_SYMBOL_GPL(hid_check_keys_pressed); #ifdef CONFIG_HID_BPF -static struct hid_bpf_ops hid_ops = { +static struct hid_ops __hid_ops = { .hid_get_report = hid_get_report, .hid_hw_raw_request = hid_hw_raw_request, .hid_hw_output_report = hid_hw_output_report, @@ -2991,7 +2991,7 @@ static int __init hid_init(void) } #ifdef CONFIG_HID_BPF - hid_bpf_ops = &hid_ops; + hid_ops = &__hid_ops; #endif ret = hidraw_init(); @@ -3010,7 +3010,7 @@ err: static void __exit hid_exit(void) { #ifdef CONFIG_HID_BPF - hid_bpf_ops = NULL; + hid_ops = NULL; #endif hid_debug_exit(); hidraw_exit(); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index eec2592dec12..a66103618e6e 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -97,7 +97,7 @@ enum hid_bpf_prog_type { struct hid_report_enum; -struct hid_bpf_ops { +struct hid_ops { struct hid_report *(*hid_get_report)(struct hid_report_enum *report_enum, const u8 *data); int (*hid_hw_raw_request)(struct hid_device *hdev, unsigned char reportnum, __u8 *buf, @@ -110,7 +110,7 @@ struct hid_bpf_ops { const struct bus_type *bus_type; }; -extern struct hid_bpf_ops *hid_bpf_ops; +extern struct hid_ops *hid_ops; struct hid_bpf_prog_list { u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; -- cgit v1.2.3 From ebc0d8093e8c97de459615438edefad1a4ac352c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:15 +0200 Subject: HID: bpf: implement HID-BPF through bpf_struct_ops We do this implementation in several steps to not have the CI failing: - first (this patch), we add struct_ops while keeping the existing infra available - then we change the selftests, the examples and the existing in-tree HID-BPF programs - then we remove the existing trace points making old HID-BPF obsolete There are a few advantages of struct_ops over tracing: - compatibility with sleepable programs (for hid_hw_raw_request() in a later patch) - a lot simpler in the kernel: it's a simple rcu protected list - we can add more parameters to the function called without much trouble - the "attach" is now generic through BPF-core: the caller just needs to set hid_id and flags before calling __load(). - all the BPF tough part is not handled in BPF-core through generic processing - hid_bpf_ctx is now only writable where it needs be Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-3-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/Makefile | 2 +- drivers/hid/bpf/hid_bpf_dispatch.c | 52 +++++++- drivers/hid/bpf/hid_bpf_dispatch.h | 4 + drivers/hid/bpf/hid_bpf_jmp_table.c | 3 + drivers/hid/bpf/hid_bpf_struct_ops.c | 252 +++++++++++++++++++++++++++++++++++ include/linux/hid_bpf.h | 61 ++++++++- 6 files changed, 365 insertions(+), 9 deletions(-) create mode 100644 drivers/hid/bpf/hid_bpf_struct_ops.c (limited to 'include') diff --git a/drivers/hid/bpf/Makefile b/drivers/hid/bpf/Makefile index cf55120cf7d6..1cb3f31e9335 100644 --- a/drivers/hid/bpf/Makefile +++ b/drivers/hid/bpf/Makefile @@ -8,4 +8,4 @@ LIBBPF_INCLUDE = $(srctree)/tools/lib obj-$(CONFIG_HID_BPF) += hid_bpf.o CFLAGS_hid_bpf_dispatch.o += -I$(LIBBPF_INCLUDE) CFLAGS_hid_bpf_jmp_table.o += -I$(LIBBPF_INCLUDE) -hid_bpf-objs += hid_bpf_dispatch.o hid_bpf_jmp_table.o +hid_bpf-objs += hid_bpf_dispatch.o hid_bpf_jmp_table.o hid_bpf_struct_ops.o diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index c8bb79ce2354..7216c3c7713d 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -58,6 +58,7 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type }, .data = hdev->bpf.device_data, }; + struct hid_bpf_ops *e; int ret; if (type >= HID_REPORT_TYPES) @@ -70,9 +71,25 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type memset(ctx_kern.data, 0, hdev->bpf.allocated_data); memcpy(ctx_kern.data, data, *size); + rcu_read_lock(); + list_for_each_entry_rcu(e, &hdev->bpf.prog_list, list) { + if (e->hid_device_event) { + ret = e->hid_device_event(&ctx_kern.ctx, type); + if (ret < 0) { + rcu_read_unlock(); + return ERR_PTR(ret); + } + + if (ret) + ctx_kern.ctx.retval = ret; + } + } + rcu_read_unlock(); + ret = hid_bpf_prog_run(hdev, HID_BPF_PROG_TYPE_DEVICE_EVENT, &ctx_kern); if (ret < 0) return ERR_PTR(ret); + ret = ctx_kern.ctx.retval; if (ret) { if (ret > ctx_kern.ctx.allocated_size) @@ -122,7 +139,10 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s memcpy(ctx_kern.data, rdesc, min_t(unsigned int, *size, HID_MAX_DESCRIPTOR_SIZE)); - ret = hid_bpf_prog_run(hdev, HID_BPF_PROG_TYPE_RDESC_FIXUP, &ctx_kern); + if (hdev->bpf.rdesc_ops) + ret = hdev->bpf.rdesc_ops->hid_rdesc_fixup(&ctx_kern.ctx); + else + ret = hid_bpf_prog_run(hdev, HID_BPF_PROG_TYPE_RDESC_FIXUP, &ctx_kern); if (ret < 0) goto ignore_bpf; @@ -150,7 +170,7 @@ static int device_match_id(struct device *dev, const void *id) return hdev->id == *(int *)id; } -static struct hid_device *hid_get_device(unsigned int hid_id) +struct hid_device *hid_get_device(unsigned int hid_id) { struct device *dev; @@ -164,7 +184,7 @@ static struct hid_device *hid_get_device(unsigned int hid_id) return to_hid_device(dev); } -static void hid_put_device(struct hid_device *hid) +void hid_put_device(struct hid_device *hid) { put_device(&hid->dev); } @@ -205,7 +225,7 @@ static int __hid_bpf_allocate_data(struct hid_device *hdev, u8 **data, u32 *size return 0; } -static int hid_bpf_allocate_event_data(struct hid_device *hdev) +int hid_bpf_allocate_event_data(struct hid_device *hdev) { /* hdev->bpf.device_data is already allocated, abort */ if (hdev->bpf.device_data) @@ -592,14 +612,22 @@ static const struct btf_kfunc_id_set hid_bpf_syscall_kfunc_set = { int hid_bpf_connect_device(struct hid_device *hdev) { - struct hid_bpf_prog_list *prog_list; + bool need_to_allocate = false; + struct hid_bpf_ops *e; rcu_read_lock(); - prog_list = rcu_dereference(hdev->bpf.progs[HID_BPF_PROG_TYPE_DEVICE_EVENT]); + list_for_each_entry_rcu(e, &hdev->bpf.prog_list, list) { + if (e->hid_device_event) { + need_to_allocate = true; + break; + } + } + if (rcu_dereference(hdev->bpf.progs[HID_BPF_PROG_TYPE_DEVICE_EVENT])) + need_to_allocate = true; rcu_read_unlock(); /* only allocate BPF data if there are programs attached */ - if (!prog_list) + if (!need_to_allocate) return 0; return hid_bpf_allocate_event_data(hdev); @@ -623,12 +651,15 @@ void hid_bpf_destroy_device(struct hid_device *hdev) hdev->bpf.destroyed = true; __hid_bpf_destroy_device(hdev); + __hid_bpf_ops_destroy_device(hdev); } EXPORT_SYMBOL_GPL(hid_bpf_destroy_device); void hid_bpf_device_init(struct hid_device *hdev) { spin_lock_init(&hdev->bpf.progs_lock); + INIT_LIST_HEAD(&hdev->bpf.prog_list); + mutex_init(&hdev->bpf.prog_list_lock); } EXPORT_SYMBOL_GPL(hid_bpf_device_init); @@ -662,6 +693,13 @@ static int __init hid_bpf_init(void) return 0; } + /* register struct_ops kfuncs after we are sure we can load our preloaded bpf program */ + err = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &hid_bpf_kfunc_set); + if (err) { + pr_warn("error while setting HID BPF tracing kfuncs: %d", err); + return 0; + } + /* register syscalls after we are sure we can load our preloaded bpf program */ err = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &hid_bpf_syscall_kfunc_set); if (err) { diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h index fbe0639d09f2..e52c43d81650 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.h +++ b/drivers/hid/bpf/hid_bpf_dispatch.h @@ -10,12 +10,16 @@ struct hid_bpf_ctx_kern { u8 *data; }; +struct hid_device *hid_get_device(unsigned int hid_id); +void hid_put_device(struct hid_device *hid); +int hid_bpf_allocate_event_data(struct hid_device *hdev); int hid_bpf_preload_skel(void); void hid_bpf_free_links_and_skel(void); int hid_bpf_get_prog_attach_type(struct bpf_prog *prog); int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd, struct bpf_prog *prog, __u32 flags); void __hid_bpf_destroy_device(struct hid_device *hdev); +void __hid_bpf_ops_destroy_device(struct hid_device *hdev); int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, struct hid_bpf_ctx_kern *ctx_kern); int hid_bpf_reconnect(struct hid_device *hdev); diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c index aa8e1c79cdf5..8a54ba447718 100644 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ b/drivers/hid/bpf/hid_bpf_jmp_table.c @@ -81,6 +81,9 @@ static int hid_bpf_program_count(struct hid_device *hdev, if (type >= HID_BPF_PROG_TYPE_MAX) return -EINVAL; + if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && hdev->bpf.rdesc_ops) + n += 1; + FOR_ENTRIES(i, jmp_table.tail, jmp_table.head) { struct hid_bpf_prog_entry *entry = &jmp_table.entries[i]; diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c new file mode 100644 index 000000000000..dde547e734ed --- /dev/null +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -0,0 +1,252 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * HID-BPF support for Linux + * + * Copyright (c) 2024 Benjamin Tissoires + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "hid_bpf_dispatch.h" + +static struct btf *hid_bpf_ops_btf; + +static int hid_bpf_ops_init(struct btf *btf) +{ + hid_bpf_ops_btf = btf; + return 0; +} + +static bool hid_bpf_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int hid_bpf_ops_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct hid_bpf_ops, hid_rdesc_fixup): + break; + default: + if (prog->sleepable) + return -EINVAL; + } + + return 0; +} + +static int hid_bpf_ops_btf_struct_access(struct bpf_verifier_log *log, + const struct bpf_reg_state *reg, + int off, int size) +{ + const struct btf_type *state; + const struct btf_type *t; + s32 type_id; + + type_id = btf_find_by_name_kind(reg->btf, "hid_bpf_ctx", + BTF_KIND_STRUCT); + if (type_id < 0) + return -EINVAL; + + t = btf_type_by_id(reg->btf, reg->btf_id); + state = btf_type_by_id(reg->btf, type_id); + if (t != state) { + bpf_log(log, "only access to hid_bpf_ctx is supported\n"); + return -EACCES; + } + + /* out-of-bound access in hid_bpf_ctx */ + if (off + size > sizeof(struct hid_bpf_ctx)) { + bpf_log(log, "write access at off %d with size %d\n", off, size); + return -EACCES; + } + + if (off < offsetof(struct hid_bpf_ctx, retval)) { + bpf_log(log, + "write access at off %d with size %d on read-only part of hid_bpf_ctx\n", + off, size); + return -EACCES; + } + + return NOT_INIT; +} + +static const struct bpf_verifier_ops hid_bpf_verifier_ops = { + .is_valid_access = hid_bpf_ops_is_valid_access, + .btf_struct_access = hid_bpf_ops_btf_struct_access, +}; + +static int hid_bpf_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + const struct hid_bpf_ops *uhid_bpf_ops; + struct hid_bpf_ops *khid_bpf_ops; + u32 moff; + + uhid_bpf_ops = (const struct hid_bpf_ops *)udata; + khid_bpf_ops = (struct hid_bpf_ops *)kdata; + + moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct hid_bpf_ops, hid_id): + /* For hid_id and flags fields, this function has to copy it + * and return 1 to indicate that the data has been handled by + * the struct_ops type, or the verifier will reject the map if + * the value of those fields is not zero. + */ + khid_bpf_ops->hid_id = uhid_bpf_ops->hid_id; + return 1; + case offsetof(struct hid_bpf_ops, flags): + if (uhid_bpf_ops->flags & ~BPF_F_BEFORE) + return -EINVAL; + khid_bpf_ops->flags = uhid_bpf_ops->flags; + return 1; + } + return 0; +} + +static int hid_bpf_reg(void *kdata) +{ + struct hid_bpf_ops *ops = kdata; + struct hid_device *hdev; + int count, err = 0; + + hdev = hid_get_device(ops->hid_id); + if (IS_ERR(hdev)) + return PTR_ERR(hdev); + + ops->hdev = hdev; + + mutex_lock(&hdev->bpf.prog_list_lock); + + count = list_count_nodes(&hdev->bpf.prog_list); + if (count >= HID_BPF_MAX_PROGS_PER_DEV) { + err = -E2BIG; + goto out_unlock; + } + + if (ops->hid_rdesc_fixup) { + if (hdev->bpf.rdesc_ops) { + err = -EINVAL; + goto out_unlock; + } + + hdev->bpf.rdesc_ops = ops; + } + + if (ops->hid_device_event) { + err = hid_bpf_allocate_event_data(hdev); + if (err) + goto out_unlock; + } + + if (ops->flags & BPF_F_BEFORE) + list_add_rcu(&ops->list, &hdev->bpf.prog_list); + else + list_add_tail_rcu(&ops->list, &hdev->bpf.prog_list); + +out_unlock: + mutex_unlock(&hdev->bpf.prog_list_lock); + + if (err) { + if (hdev->bpf.rdesc_ops == ops) + hdev->bpf.rdesc_ops = NULL; + hid_put_device(hdev); + } else if (ops->hid_rdesc_fixup) { + hid_bpf_reconnect(hdev); + } + + return err; +} + +static void hid_bpf_unreg(void *kdata) +{ + struct hid_bpf_ops *ops = kdata; + struct hid_device *hdev; + bool reconnect = false; + + hdev = ops->hdev; + + /* check if __hid_bpf_ops_destroy_device() has been called */ + if (!hdev) + return; + + mutex_lock(&hdev->bpf.prog_list_lock); + + list_del_rcu(&ops->list); + + reconnect = hdev->bpf.rdesc_ops == ops; + if (reconnect) + hdev->bpf.rdesc_ops = NULL; + + mutex_unlock(&hdev->bpf.prog_list_lock); + + if (reconnect) + hid_bpf_reconnect(hdev); + + hid_put_device(hdev); +} + +static int __hid_bpf_device_event(struct hid_bpf_ctx *ctx, enum hid_report_type type) +{ + return 0; +} + +static int __hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx) +{ + return 0; +} + +static struct hid_bpf_ops __bpf_hid_bpf_ops = { + .hid_device_event = __hid_bpf_device_event, + .hid_rdesc_fixup = __hid_bpf_rdesc_fixup, +}; + +static struct bpf_struct_ops bpf_hid_bpf_ops = { + .verifier_ops = &hid_bpf_verifier_ops, + .init = hid_bpf_ops_init, + .check_member = hid_bpf_ops_check_member, + .init_member = hid_bpf_ops_init_member, + .reg = hid_bpf_reg, + .unreg = hid_bpf_unreg, + .name = "hid_bpf_ops", + .cfi_stubs = &__bpf_hid_bpf_ops, + .owner = THIS_MODULE, +}; + +void __hid_bpf_ops_destroy_device(struct hid_device *hdev) +{ + struct hid_bpf_ops *e; + + rcu_read_lock(); + list_for_each_entry_rcu(e, &hdev->bpf.prog_list, list) { + hid_put_device(hdev); + e->hdev = NULL; + } + rcu_read_unlock(); +} + +static int __init hid_bpf_struct_ops_init(void) +{ + return register_bpf_struct_ops(&bpf_hid_bpf_ops, hid_bpf_ops); +} +late_initcall(hid_bpf_struct_ops_init); diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index a66103618e6e..c4f4ce10b7dd 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -65,11 +65,12 @@ struct hid_bpf_ctx { * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program * currently attached to the device. This doesn't * guarantee that this program will always be first - * @HID_BPF_FLAG_MAX: sentinel value, not to be used by the callers */ enum hid_bpf_attach_flags { HID_BPF_FLAG_NONE = 0, HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), + + /* private: internal use only */ HID_BPF_FLAG_MAX, }; @@ -112,6 +113,60 @@ struct hid_ops { extern struct hid_ops *hid_ops; +/** + * struct hid_bpf_ops - A BPF struct_ops of callbacks allowing to attach HID-BPF + * programs to a HID device + * @hid_id: the HID uniq ID to attach to. This is writeable before ``load()``, and + * cannot be changed after + * @flags: flags used while attaching the struct_ops to the device. Currently only + * available value is %0 or ``BPF_F_BEFORE``. + * Writeable only before ``load()`` + */ +struct hid_bpf_ops { + /* hid_id needs to stay first so we can easily change it + * from userspace. + */ + int hid_id; + u32 flags; + + /* private: do not show up in the docs */ + struct list_head list; + + /* public: rest should show up in the docs */ + + /** + * @hid_device_event: called whenever an event is coming in from the device + * + * It has the following arguments: + * + * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * + * Return: %0 on success and keep processing; a positive + * value to change the incoming size buffer; a negative + * error code to interrupt the processing of this event + * + * Context: Interrupt context. + */ + int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type); + + /** + * @hid_rdesc_fixup: called when the probe function parses the report descriptor + * of the HID device + * + * It has the following arguments: + * + * ``ctx``: The HID-BPF context as &struct hid_bpf_ctx + * + * Return: %0 on success and keep processing; a positive + * value to change the incoming size buffer; a negative + * error code to interrupt the processing of this device + */ + int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx); + + /* private: do not show up in the docs */ + struct hid_device *hdev; +}; + struct hid_bpf_prog_list { u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; u8 prog_cnt; @@ -129,6 +184,10 @@ struct hid_bpf { bool destroyed; /* prevents the assignment of any progs */ spinlock_t progs_lock; /* protects RCU update of progs */ + + struct hid_bpf_ops *rdesc_ops; + struct list_head prog_list; + struct mutex prog_list_lock; /* protects prog_list update */ }; /* specific HID-BPF link when a program is attached to a device */ -- cgit v1.2.3 From 4a86220e046da009bef0948e9f51d1d26d68f93c Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:20 +0200 Subject: HID: bpf: remove tracing HID-BPF capability We can now rely on struct_ops as we cleared the users in-tree. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-8-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/Makefile | 2 +- drivers/hid/bpf/entrypoints/Makefile | 93 ---- drivers/hid/bpf/entrypoints/README | 4 - drivers/hid/bpf/entrypoints/entrypoints.bpf.c | 25 -- drivers/hid/bpf/entrypoints/entrypoints.lskel.h | 248 ----------- drivers/hid/bpf/hid_bpf_dispatch.c | 193 +------- drivers/hid/bpf/hid_bpf_dispatch.h | 8 - drivers/hid/bpf/hid_bpf_jmp_table.c | 568 ------------------------ include/linux/hid_bpf.h | 54 +-- 9 files changed, 8 insertions(+), 1187 deletions(-) delete mode 100644 drivers/hid/bpf/entrypoints/Makefile delete mode 100644 drivers/hid/bpf/entrypoints/README delete mode 100644 drivers/hid/bpf/entrypoints/entrypoints.bpf.c delete mode 100644 drivers/hid/bpf/entrypoints/entrypoints.lskel.h delete mode 100644 drivers/hid/bpf/hid_bpf_jmp_table.c (limited to 'include') diff --git a/drivers/hid/bpf/Makefile b/drivers/hid/bpf/Makefile index 1cb3f31e9335..d1f2b81788ca 100644 --- a/drivers/hid/bpf/Makefile +++ b/drivers/hid/bpf/Makefile @@ -8,4 +8,4 @@ LIBBPF_INCLUDE = $(srctree)/tools/lib obj-$(CONFIG_HID_BPF) += hid_bpf.o CFLAGS_hid_bpf_dispatch.o += -I$(LIBBPF_INCLUDE) CFLAGS_hid_bpf_jmp_table.o += -I$(LIBBPF_INCLUDE) -hid_bpf-objs += hid_bpf_dispatch.o hid_bpf_jmp_table.o hid_bpf_struct_ops.o +hid_bpf-objs += hid_bpf_dispatch.o hid_bpf_struct_ops.o diff --git a/drivers/hid/bpf/entrypoints/Makefile b/drivers/hid/bpf/entrypoints/Makefile deleted file mode 100644 index 43b99b5575cf..000000000000 --- a/drivers/hid/bpf/entrypoints/Makefile +++ /dev/null @@ -1,93 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -OUTPUT := .output -abs_out := $(abspath $(OUTPUT)) - -CLANG ?= clang -LLC ?= llc -LLVM_STRIP ?= llvm-strip - -TOOLS_PATH := $(abspath ../../../../tools) -BPFTOOL_SRC := $(TOOLS_PATH)/bpf/bpftool -BPFTOOL_OUTPUT := $(abs_out)/bpftool -DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)/bootstrap/bpftool -BPFTOOL ?= $(DEFAULT_BPFTOOL) - -LIBBPF_SRC := $(TOOLS_PATH)/lib/bpf -LIBBPF_OUTPUT := $(abs_out)/libbpf -LIBBPF_DESTDIR := $(LIBBPF_OUTPUT) -LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)/include -BPFOBJ := $(LIBBPF_OUTPUT)/libbpf.a - -INCLUDES := -I$(OUTPUT) -I$(LIBBPF_INCLUDE) -I$(TOOLS_PATH)/include/uapi -CFLAGS := -g -Wall - -VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ - $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ - ../../../../vmlinux \ - /sys/kernel/btf/vmlinux \ - /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -ifeq ($(VMLINUX_BTF),) -$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") -endif - -ifeq ($(V),1) -Q = -msg = -else -Q = @ -msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; -MAKEFLAGS += --no-print-directory -submake_extras := feature_display=0 -endif - -.DELETE_ON_ERROR: - -.PHONY: all clean - -all: entrypoints.lskel.h - -clean: - $(call msg,CLEAN) - $(Q)rm -rf $(OUTPUT) entrypoints - -entrypoints.lskel.h: $(OUTPUT)/entrypoints.bpf.o | $(BPFTOOL) - $(call msg,GEN-SKEL,$@) - $(Q)$(BPFTOOL) gen skeleton -L $< > $@ - - -$(OUTPUT)/entrypoints.bpf.o: entrypoints.bpf.c $(OUTPUT)/vmlinux.h $(BPFOBJ) | $(OUTPUT) - $(call msg,BPF,$@) - $(Q)$(CLANG) -g -O2 --target=bpf $(INCLUDES) \ - -c $(filter %.c,$^) -o $@ && \ - $(LLVM_STRIP) -g $@ - -$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) -ifeq ($(VMLINUX_H),) - $(call msg,GEN,,$@) - $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ -else - $(call msg,CP,,$@) - $(Q)cp "$(VMLINUX_H)" $@ -endif - -$(OUTPUT) $(LIBBPF_OUTPUT) $(BPFTOOL_OUTPUT): - $(call msg,MKDIR,$@) - $(Q)mkdir -p $@ - -$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ - OUTPUT=$(abspath $(dir $@))/ prefix= \ - DESTDIR=$(LIBBPF_DESTDIR) $(abspath $@) install_headers - -ifeq ($(CROSS_COMPILE),) -$(DEFAULT_BPFTOOL): $(BPFOBJ) | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ - OUTPUT=$(BPFTOOL_OUTPUT)/ \ - LIBBPF_BOOTSTRAP_OUTPUT=$(LIBBPF_OUTPUT)/ \ - LIBBPF_BOOTSTRAP_DESTDIR=$(LIBBPF_DESTDIR)/ bootstrap -else -$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT) - $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOL_SRC) \ - OUTPUT=$(BPFTOOL_OUTPUT)/ bootstrap -endif diff --git a/drivers/hid/bpf/entrypoints/README b/drivers/hid/bpf/entrypoints/README deleted file mode 100644 index 147e0d41509f..000000000000 --- a/drivers/hid/bpf/entrypoints/README +++ /dev/null @@ -1,4 +0,0 @@ -WARNING: -If you change "entrypoints.bpf.c" do "make -j" in this directory to rebuild "entrypoints.skel.h". -Make sure to have clang 10 installed. -See Documentation/bpf/bpf_devel_QA.rst diff --git a/drivers/hid/bpf/entrypoints/entrypoints.bpf.c b/drivers/hid/bpf/entrypoints/entrypoints.bpf.c deleted file mode 100644 index c22921125a1a..000000000000 --- a/drivers/hid/bpf/entrypoints/entrypoints.bpf.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2022 Benjamin Tissoires */ - -#include ".output/vmlinux.h" -#include -#include - -#define HID_BPF_MAX_PROGS 1024 - -struct { - __uint(type, BPF_MAP_TYPE_PROG_ARRAY); - __uint(max_entries, HID_BPF_MAX_PROGS); - __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); -} hid_jmp_table SEC(".maps"); - -SEC("fmod_ret/__hid_bpf_tail_call") -int BPF_PROG(hid_tail_call, struct hid_bpf_ctx *hctx) -{ - bpf_tail_call(ctx, &hid_jmp_table, hctx->index); - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/drivers/hid/bpf/entrypoints/entrypoints.lskel.h b/drivers/hid/bpf/entrypoints/entrypoints.lskel.h deleted file mode 100644 index 35618051598c..000000000000 --- a/drivers/hid/bpf/entrypoints/entrypoints.lskel.h +++ /dev/null @@ -1,248 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -/* THIS FILE IS AUTOGENERATED BY BPFTOOL! */ -#ifndef __ENTRYPOINTS_BPF_SKEL_H__ -#define __ENTRYPOINTS_BPF_SKEL_H__ - -#include - -struct entrypoints_bpf { - struct bpf_loader_ctx ctx; - struct { - struct bpf_map_desc hid_jmp_table; - } maps; - struct { - struct bpf_prog_desc hid_tail_call; - } progs; - struct { - int hid_tail_call_fd; - } links; -}; - -static inline int -entrypoints_bpf__hid_tail_call__attach(struct entrypoints_bpf *skel) -{ - int prog_fd = skel->progs.hid_tail_call.prog_fd; - int fd = skel_raw_tracepoint_open(NULL, prog_fd); - - if (fd > 0) - skel->links.hid_tail_call_fd = fd; - return fd; -} - -static inline int -entrypoints_bpf__attach(struct entrypoints_bpf *skel) -{ - int ret = 0; - - ret = ret < 0 ? ret : entrypoints_bpf__hid_tail_call__attach(skel); - return ret < 0 ? ret : 0; -} - -static inline void -entrypoints_bpf__detach(struct entrypoints_bpf *skel) -{ - skel_closenz(skel->links.hid_tail_call_fd); -} -static void -entrypoints_bpf__destroy(struct entrypoints_bpf *skel) -{ - if (!skel) - return; - entrypoints_bpf__detach(skel); - skel_closenz(skel->progs.hid_tail_call.prog_fd); - skel_closenz(skel->maps.hid_jmp_table.map_fd); - skel_free(skel); -} -static inline struct entrypoints_bpf * -entrypoints_bpf__open(void) -{ - struct entrypoints_bpf *skel; - - skel = skel_alloc(sizeof(*skel)); - if (!skel) - goto cleanup; - skel->ctx.sz = (void *)&skel->links - (void *)skel; - return skel; -cleanup: - entrypoints_bpf__destroy(skel); - return NULL; -} - -static inline int -entrypoints_bpf__load(struct entrypoints_bpf *skel) -{ - struct bpf_load_and_run_opts opts = {}; - int err; - - opts.ctx = (struct bpf_loader_ctx *)skel; - opts.data_sz = 2856; - opts.data = (void *)"\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x9f\xeb\x01\0\ -\x18\0\0\0\0\0\0\0\x60\x02\0\0\x60\x02\0\0\x12\x02\0\0\0\0\0\0\0\0\0\x02\x03\0\ -\0\0\x01\0\0\0\0\0\0\x01\x04\0\0\0\x20\0\0\x01\0\0\0\0\0\0\0\x03\0\0\0\0\x02\0\ -\0\0\x04\0\0\0\x03\0\0\0\x05\0\0\0\0\0\0\x01\x04\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\ -\x02\x06\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x02\0\0\0\x04\0\0\0\0\x04\0\0\0\0\0\0\ -\0\0\0\x02\x08\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x02\0\0\0\x04\0\0\0\x04\0\0\0\0\ -\0\0\0\x04\0\0\x04\x20\0\0\0\x19\0\0\0\x01\0\0\0\0\0\0\0\x1e\0\0\0\x05\0\0\0\ -\x40\0\0\0\x2a\0\0\0\x07\0\0\0\x80\0\0\0\x33\0\0\0\x07\0\0\0\xc0\0\0\0\x3e\0\0\ -\0\0\0\0\x0e\x09\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\x02\x0c\0\0\0\x4c\0\0\0\0\0\0\ -\x01\x08\0\0\0\x40\0\0\0\0\0\0\0\x01\0\0\x0d\x02\0\0\0\x5f\0\0\0\x0b\0\0\0\x63\ -\0\0\0\x01\0\0\x0c\x0d\0\0\0\x09\x01\0\0\x05\0\0\x04\x20\0\0\0\x15\x01\0\0\x10\ -\0\0\0\0\0\0\0\x1b\x01\0\0\x12\0\0\0\x40\0\0\0\x1f\x01\0\0\x10\0\0\0\x80\0\0\0\ -\x2e\x01\0\0\x14\0\0\0\xa0\0\0\0\0\0\0\0\x15\0\0\0\xc0\0\0\0\x3a\x01\0\0\0\0\0\ -\x08\x11\0\0\0\x40\x01\0\0\0\0\0\x01\x04\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\x02\x13\ -\0\0\0\0\0\0\0\0\0\0\x0a\x1c\0\0\0\x4d\x01\0\0\x04\0\0\x06\x04\0\0\0\x5d\x01\0\ -\0\0\0\0\0\x6e\x01\0\0\x01\0\0\0\x80\x01\0\0\x02\0\0\0\x93\x01\0\0\x03\0\0\0\0\ -\0\0\0\x02\0\0\x05\x04\0\0\0\xa4\x01\0\0\x16\0\0\0\0\0\0\0\xab\x01\0\0\x16\0\0\ -\0\0\0\0\0\xb0\x01\0\0\0\0\0\x08\x02\0\0\0\xec\x01\0\0\0\0\0\x01\x01\0\0\0\x08\ -\0\0\x01\0\0\0\0\0\0\0\x03\0\0\0\0\x17\0\0\0\x04\0\0\0\x04\0\0\0\xf1\x01\0\0\0\ -\0\0\x0e\x18\0\0\0\x01\0\0\0\xf9\x01\0\0\x01\0\0\x0f\x20\0\0\0\x0a\0\0\0\0\0\0\ -\0\x20\0\0\0\xff\x01\0\0\x01\0\0\x0f\x04\0\0\0\x19\0\0\0\0\0\0\0\x04\0\0\0\x07\ -\x02\0\0\0\0\0\x07\0\0\0\0\0\x69\x6e\x74\0\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\ -\x49\x5a\x45\x5f\x54\x59\x50\x45\x5f\x5f\0\x74\x79\x70\x65\0\x6d\x61\x78\x5f\ -\x65\x6e\x74\x72\x69\x65\x73\0\x6b\x65\x79\x5f\x73\x69\x7a\x65\0\x76\x61\x6c\ -\x75\x65\x5f\x73\x69\x7a\x65\0\x68\x69\x64\x5f\x6a\x6d\x70\x5f\x74\x61\x62\x6c\ -\x65\0\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x6c\x6f\x6e\x67\x20\x6c\x6f\x6e\x67\ -\0\x63\x74\x78\0\x68\x69\x64\x5f\x74\x61\x69\x6c\x5f\x63\x61\x6c\x6c\0\x66\x6d\ -\x6f\x64\x5f\x72\x65\x74\x2f\x5f\x5f\x68\x69\x64\x5f\x62\x70\x66\x5f\x74\x61\ -\x69\x6c\x5f\x63\x61\x6c\x6c\0\x2f\x68\x6f\x6d\x65\x2f\x62\x74\x69\x73\x73\x6f\ -\x69\x72\x2f\x53\x72\x63\x2f\x68\x69\x64\x2f\x64\x72\x69\x76\x65\x72\x73\x2f\ -\x68\x69\x64\x2f\x62\x70\x66\x2f\x65\x6e\x74\x72\x79\x70\x6f\x69\x6e\x74\x73\ -\x2f\x65\x6e\x74\x72\x79\x70\x6f\x69\x6e\x74\x73\x2e\x62\x70\x66\x2e\x63\0\x69\ -\x6e\x74\x20\x42\x50\x46\x5f\x50\x52\x4f\x47\x28\x68\x69\x64\x5f\x74\x61\x69\ -\x6c\x5f\x63\x61\x6c\x6c\x2c\x20\x73\x74\x72\x75\x63\x74\x20\x68\x69\x64\x5f\ -\x62\x70\x66\x5f\x63\x74\x78\x20\x2a\x68\x63\x74\x78\x29\0\x68\x69\x64\x5f\x62\ -\x70\x66\x5f\x63\x74\x78\0\x69\x6e\x64\x65\x78\0\x68\x69\x64\0\x61\x6c\x6c\x6f\ -\x63\x61\x74\x65\x64\x5f\x73\x69\x7a\x65\0\x72\x65\x70\x6f\x72\x74\x5f\x74\x79\ -\x70\x65\0\x5f\x5f\x75\x33\x32\0\x75\x6e\x73\x69\x67\x6e\x65\x64\x20\x69\x6e\ -\x74\0\x68\x69\x64\x5f\x72\x65\x70\x6f\x72\x74\x5f\x74\x79\x70\x65\0\x48\x49\ -\x44\x5f\x49\x4e\x50\x55\x54\x5f\x52\x45\x50\x4f\x52\x54\0\x48\x49\x44\x5f\x4f\ -\x55\x54\x50\x55\x54\x5f\x52\x45\x50\x4f\x52\x54\0\x48\x49\x44\x5f\x46\x45\x41\ -\x54\x55\x52\x45\x5f\x52\x45\x50\x4f\x52\x54\0\x48\x49\x44\x5f\x52\x45\x50\x4f\ -\x52\x54\x5f\x54\x59\x50\x45\x53\0\x72\x65\x74\x76\x61\x6c\0\x73\x69\x7a\x65\0\ -\x5f\x5f\x73\x33\x32\0\x30\x3a\x30\0\x09\x62\x70\x66\x5f\x74\x61\x69\x6c\x5f\ -\x63\x61\x6c\x6c\x28\x63\x74\x78\x2c\x20\x26\x68\x69\x64\x5f\x6a\x6d\x70\x5f\ -\x74\x61\x62\x6c\x65\x2c\x20\x68\x63\x74\x78\x2d\x3e\x69\x6e\x64\x65\x78\x29\ -\x3b\0\x63\x68\x61\x72\0\x4c\x49\x43\x45\x4e\x53\x45\0\x2e\x6d\x61\x70\x73\0\ -\x6c\x69\x63\x65\x6e\x73\x65\0\x68\x69\x64\x5f\x64\x65\x76\x69\x63\x65\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x8a\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x03\ -\0\0\0\x04\0\0\0\x04\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x69\x64\x5f\ -\x6a\x6d\x70\x5f\x74\x61\x62\x6c\x65\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\x47\x50\x4c\0\0\0\0\0\x79\x12\0\0\0\0\0\0\x61\x23\0\0\0\0\ -\0\0\x18\x52\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x85\0\0\0\x0c\0\0\0\xb7\0\0\0\0\0\0\0\ -\x95\0\0\0\0\0\0\0\0\0\0\0\x0e\0\0\0\0\0\0\0\x8e\0\0\0\xd3\0\0\0\x05\x48\0\0\ -\x01\0\0\0\x8e\0\0\0\xba\x01\0\0\x02\x50\0\0\x05\0\0\0\x8e\0\0\0\xd3\0\0\0\x05\ -\x48\0\0\x08\0\0\0\x0f\0\0\0\xb6\x01\0\0\0\0\0\0\x1a\0\0\0\x07\0\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x68\x69\ -\x64\x5f\x74\x61\x69\x6c\x5f\x63\x61\x6c\x6c\0\0\0\0\0\0\0\x1a\0\0\0\0\0\0\0\ -\x08\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x10\0\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\x01\0\ -\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\x5f\ -\x5f\x68\x69\x64\x5f\x62\x70\x66\x5f\x74\x61\x69\x6c\x5f\x63\x61\x6c\x6c\0\0\0\ -\0\0"; - opts.insns_sz = 1192; - opts.insns = (void *)"\ -\xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\ -\0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x11\0\0\0\0\0\x61\ -\xa1\x78\xff\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x7c\xff\ -\0\0\0\0\xd5\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa1\x80\xff\0\0\0\0\xd5\ -\x01\x01\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\ -\x01\0\0\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\ -\xbf\x70\0\0\0\0\0\0\x95\0\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\ -\0\0\0\0\0\xa8\x09\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\ -\0\0\0\0\0\0\0\xa4\x09\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\ -\0\0\0\0\0\0\0\0\0\x98\x09\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\ -\0\x05\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x09\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\ -\0\0\x12\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x90\x09\0\0\xb7\x03\0\0\x1c\0\0\0\ -\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd7\xff\0\0\0\0\x63\x7a\x78\ -\xff\0\0\0\0\x61\x60\x1c\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\ -\0\0\xbc\x09\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\ -\0\0\0\xb0\x09\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\ -\0\xc5\x07\xca\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\ -\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xf8\x09\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\ -\x0a\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0a\0\0\x18\x61\0\0\ -\0\0\0\0\0\0\0\0\x88\x0a\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\ -\x38\x0a\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xd0\x0a\0\0\x7b\x01\0\0\0\0\0\0\x18\ -\x60\0\0\0\0\0\0\0\0\0\0\x40\x0a\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe0\x0a\0\0\ -\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x0a\0\0\x18\x61\0\0\0\0\0\ -\0\0\0\0\0\0\x0b\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\ -\x18\x61\0\0\0\0\0\0\0\0\0\0\xf8\x0a\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\0\ -\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\x0a\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\ -\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x9c\x0a\0\0\x63\x01\0\0\0\0\0\0\x79\x60\ -\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xa0\x0a\0\0\x7b\x01\0\0\0\0\0\0\x61\ -\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x0a\0\0\x63\x01\0\0\0\0\0\ -\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x10\x0b\0\0\xb7\x02\0\0\x14\0\0\0\xb7\x03\0\0\ -\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\ -\x91\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x80\x0a\0\0\x63\x70\x6c\0\0\0\0\0\ -\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\ -\0\0\0\0\0\0\0\0\x80\x0a\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\ -\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xf0\x0a\0\0\x61\x01\0\0\0\0\0\0\xd5\ -\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x7f\xff\0\0\ -\0\0\x63\x7a\x80\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\ -\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\x63\x06\x28\0\0\0\ -\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\ -\0\0\xb7\0\0\0\0\0\0\0\x95\0\0\0\0\0\0\0"; - err = bpf_load_and_run(&opts); - if (err < 0) - return err; - return 0; -} - -static inline struct entrypoints_bpf * -entrypoints_bpf__open_and_load(void) -{ - struct entrypoints_bpf *skel; - - skel = entrypoints_bpf__open(); - if (!skel) - return NULL; - if (entrypoints_bpf__load(skel)) { - entrypoints_bpf__destroy(skel); - return NULL; - } - return skel; -} - -__attribute__((unused)) static void -entrypoints_bpf__assert(struct entrypoints_bpf *s __attribute__((unused))) -{ -#ifdef __cplusplus -#define _Static_assert static_assert -#endif -#ifdef __cplusplus -#undef _Static_assert -#endif -} - -#endif /* __ENTRYPOINTS_BPF_SKEL_H__ */ diff --git a/drivers/hid/bpf/hid_bpf_dispatch.c b/drivers/hid/bpf/hid_bpf_dispatch.c index 7216c3c7713d..06cc628e7bb4 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.c +++ b/drivers/hid/bpf/hid_bpf_dispatch.c @@ -3,7 +3,7 @@ /* * HID-BPF support for Linux * - * Copyright (c) 2022 Benjamin Tissoires + * Copyright (c) 2022-2024 Benjamin Tissoires */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -17,34 +17,11 @@ #include #include #include -#include #include "hid_bpf_dispatch.h" -#include "entrypoints/entrypoints.lskel.h" struct hid_ops *hid_ops; EXPORT_SYMBOL(hid_ops); -/** - * hid_bpf_device_event - Called whenever an event is coming in from the device - * - * @ctx: The HID-BPF context - * - * @return %0 on success and keep processing; a positive value to change the - * incoming size buffer; a negative error code to interrupt the processing - * of this event - * - * Declare an %fmod_ret tracing bpf program to this function and attach this - * program through hid_bpf_attach_prog() to have this helper called for - * any incoming event from the device itself. - * - * The function is called while on IRQ context, so we can not sleep. - */ -/* never used by the kernel but declared so we can load and attach a tracepoint */ -__weak noinline int hid_bpf_device_event(struct hid_bpf_ctx *ctx) -{ - return 0; -} - u8 * dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type, u8 *data, u32 *size, int interrupt) @@ -52,7 +29,6 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type struct hid_bpf_ctx_kern ctx_kern = { .ctx = { .hid = hdev, - .report_type = type, .allocated_size = hdev->bpf.allocated_data, .size = *size, }, @@ -86,11 +62,6 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type } rcu_read_unlock(); - ret = hid_bpf_prog_run(hdev, HID_BPF_PROG_TYPE_DEVICE_EVENT, &ctx_kern); - if (ret < 0) - return ERR_PTR(ret); - ret = ctx_kern.ctx.retval; - if (ret) { if (ret > ctx_kern.ctx.allocated_size) return ERR_PTR(-EINVAL); @@ -102,26 +73,6 @@ dispatch_hid_bpf_device_event(struct hid_device *hdev, enum hid_report_type type } EXPORT_SYMBOL_GPL(dispatch_hid_bpf_device_event); -/** - * hid_bpf_rdesc_fixup - Called when the probe function parses the report - * descriptor of the HID device - * - * @ctx: The HID-BPF context - * - * @return 0 on success and keep processing; a positive value to change the - * incoming size buffer; a negative error code to interrupt the processing - * of this event - * - * Declare an %fmod_ret tracing bpf program to this function and attach this - * program through hid_bpf_attach_prog() to have this helper called before any - * parsing of the report descriptor by HID. - */ -/* never used by the kernel but declared so we can load and attach a tracepoint */ -__weak noinline int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx) -{ - return 0; -} - u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *size) { int ret; @@ -133,16 +84,16 @@ u8 *call_hid_bpf_rdesc_fixup(struct hid_device *hdev, u8 *rdesc, unsigned int *s }, }; + if (!hdev->bpf.rdesc_ops) + goto ignore_bpf; + ctx_kern.data = kzalloc(ctx_kern.ctx.allocated_size, GFP_KERNEL); if (!ctx_kern.data) goto ignore_bpf; memcpy(ctx_kern.data, rdesc, min_t(unsigned int, *size, HID_MAX_DESCRIPTOR_SIZE)); - if (hdev->bpf.rdesc_ops) - ret = hdev->bpf.rdesc_ops->hid_rdesc_fixup(&ctx_kern.ctx); - else - ret = hid_bpf_prog_run(hdev, HID_BPF_PROG_TYPE_RDESC_FIXUP, &ctx_kern); + ret = hdev->bpf.rdesc_ops->hid_rdesc_fixup(&ctx_kern.ctx); if (ret < 0) goto ignore_bpf; @@ -242,39 +193,6 @@ int hid_bpf_reconnect(struct hid_device *hdev) return 0; } -static int do_hid_bpf_attach_prog(struct hid_device *hdev, int prog_fd, struct bpf_prog *prog, - __u32 flags) -{ - int fd, err, prog_type; - - prog_type = hid_bpf_get_prog_attach_type(prog); - if (prog_type < 0) - return prog_type; - - if (prog_type >= HID_BPF_PROG_TYPE_MAX) - return -EINVAL; - - if (prog_type == HID_BPF_PROG_TYPE_DEVICE_EVENT) { - err = hid_bpf_allocate_event_data(hdev); - if (err) - return err; - } - - fd = __hid_bpf_attach_prog(hdev, prog_type, prog_fd, prog, flags); - if (fd < 0) - return fd; - - if (prog_type == HID_BPF_PROG_TYPE_RDESC_FIXUP) { - err = hid_bpf_reconnect(hdev); - if (err) { - close_fd(fd); - return err; - } - } - - return fd; -} - /* Disables missing prototype warnings */ __bpf_kfunc_start_defs(); @@ -303,57 +221,6 @@ hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t rdwr return ctx_kern->data + offset; } -/** - * hid_bpf_attach_prog - Attach the given @prog_fd to the given HID device - * - * @hid_id: the system unique identifier of the HID device - * @prog_fd: an fd in the user process representing the program to attach - * @flags: any logical OR combination of &enum hid_bpf_attach_flags - * - * @returns an fd of a bpf_link object on success (> %0), an error code otherwise. - * Closing this fd will detach the program from the HID device (unless the bpf_link - * is pinned to the BPF file system). - */ -/* called from syscall */ -__bpf_kfunc int -hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, __u32 flags) -{ - struct hid_device *hdev; - struct bpf_prog *prog; - int err, fd; - - if ((flags & ~HID_BPF_FLAG_MASK)) - return -EINVAL; - - hdev = hid_get_device(hid_id); - if (IS_ERR(hdev)) - return PTR_ERR(hdev); - - /* - * take a ref on the prog itself, it will be released - * on errors or when it'll be detached - */ - prog = bpf_prog_get(prog_fd); - if (IS_ERR(prog)) { - err = PTR_ERR(prog); - goto out_dev_put; - } - - fd = do_hid_bpf_attach_prog(hdev, prog_fd, prog, flags); - if (fd < 0) { - err = fd; - goto out_prog_put; - } - - return fd; - - out_prog_put: - bpf_prog_put(prog); - out_dev_put: - hid_put_device(hdev); - return err; -} - /** * hid_bpf_allocate_context - Allocate a context to the given HID device * @@ -583,21 +450,8 @@ static const struct btf_kfunc_id_set hid_bpf_kfunc_set = { .set = &hid_bpf_kfunc_ids, }; -/* our HID-BPF entrypoints */ -BTF_SET8_START(hid_bpf_fmodret_ids) -BTF_ID_FLAGS(func, hid_bpf_device_event) -BTF_ID_FLAGS(func, hid_bpf_rdesc_fixup) -BTF_ID_FLAGS(func, __hid_bpf_tail_call) -BTF_SET8_END(hid_bpf_fmodret_ids) - -static const struct btf_kfunc_id_set hid_bpf_fmodret_set = { - .owner = THIS_MODULE, - .set = &hid_bpf_fmodret_ids, -}; - /* for syscall HID-BPF */ BTF_KFUNCS_START(hid_bpf_syscall_kfunc_ids) -BTF_ID_FLAGS(func, hid_bpf_attach_prog) BTF_ID_FLAGS(func, hid_bpf_allocate_context, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, hid_bpf_release_context, KF_RELEASE) BTF_ID_FLAGS(func, hid_bpf_hw_request) @@ -622,8 +476,6 @@ int hid_bpf_connect_device(struct hid_device *hdev) break; } } - if (rcu_dereference(hdev->bpf.progs[HID_BPF_PROG_TYPE_DEVICE_EVENT])) - need_to_allocate = true; rcu_read_unlock(); /* only allocate BPF data if there are programs attached */ @@ -650,14 +502,12 @@ void hid_bpf_destroy_device(struct hid_device *hdev) /* mark the device as destroyed in bpf so we don't reattach it */ hdev->bpf.destroyed = true; - __hid_bpf_destroy_device(hdev); __hid_bpf_ops_destroy_device(hdev); } EXPORT_SYMBOL_GPL(hid_bpf_destroy_device); void hid_bpf_device_init(struct hid_device *hdev) { - spin_lock_init(&hdev->bpf.progs_lock); INIT_LIST_HEAD(&hdev->bpf.prog_list); mutex_init(&hdev->bpf.prog_list_lock); } @@ -670,37 +520,15 @@ static int __init hid_bpf_init(void) /* Note: if we exit with an error any time here, we would entirely break HID, which * is probably not something we want. So we log an error and return success. * - * This is not a big deal: the syscall allowing to attach a BPF program to a HID device - * will not be available, so nobody will be able to use the functionality. + * This is not a big deal: nobody will be able to use the functionality. */ - err = register_btf_fmodret_id_set(&hid_bpf_fmodret_set); - if (err) { - pr_warn("error while registering fmodret entrypoints: %d", err); - return 0; - } - - err = hid_bpf_preload_skel(); - if (err) { - pr_warn("error while preloading HID BPF dispatcher: %d", err); - return 0; - } - - /* register tracing kfuncs after we are sure we can load our preloaded bpf program */ - err = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &hid_bpf_kfunc_set); - if (err) { - pr_warn("error while setting HID BPF tracing kfuncs: %d", err); - return 0; - } - - /* register struct_ops kfuncs after we are sure we can load our preloaded bpf program */ err = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &hid_bpf_kfunc_set); if (err) { pr_warn("error while setting HID BPF tracing kfuncs: %d", err); return 0; } - /* register syscalls after we are sure we can load our preloaded bpf program */ err = register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &hid_bpf_syscall_kfunc_set); if (err) { pr_warn("error while setting HID BPF syscall kfuncs: %d", err); @@ -710,15 +538,6 @@ static int __init hid_bpf_init(void) return 0; } -static void __exit hid_bpf_exit(void) -{ - /* HID depends on us, so if we hit that code, we are guaranteed that hid - * has been removed and thus we do not need to clear the HID devices - */ - hid_bpf_free_links_and_skel(); -} - late_initcall(hid_bpf_init); -module_exit(hid_bpf_exit); MODULE_AUTHOR("Benjamin Tissoires"); MODULE_LICENSE("GPL"); diff --git a/drivers/hid/bpf/hid_bpf_dispatch.h b/drivers/hid/bpf/hid_bpf_dispatch.h index e52c43d81650..835e6f69f479 100644 --- a/drivers/hid/bpf/hid_bpf_dispatch.h +++ b/drivers/hid/bpf/hid_bpf_dispatch.h @@ -13,15 +13,7 @@ struct hid_bpf_ctx_kern { struct hid_device *hid_get_device(unsigned int hid_id); void hid_put_device(struct hid_device *hid); int hid_bpf_allocate_event_data(struct hid_device *hdev); -int hid_bpf_preload_skel(void); -void hid_bpf_free_links_and_skel(void); -int hid_bpf_get_prog_attach_type(struct bpf_prog *prog); -int __hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, int prog_fd, - struct bpf_prog *prog, __u32 flags); -void __hid_bpf_destroy_device(struct hid_device *hdev); void __hid_bpf_ops_destroy_device(struct hid_device *hdev); -int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, - struct hid_bpf_ctx_kern *ctx_kern); int hid_bpf_reconnect(struct hid_device *hdev); struct bpf_prog; diff --git a/drivers/hid/bpf/hid_bpf_jmp_table.c b/drivers/hid/bpf/hid_bpf_jmp_table.c deleted file mode 100644 index 8a54ba447718..000000000000 --- a/drivers/hid/bpf/hid_bpf_jmp_table.c +++ /dev/null @@ -1,568 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -/* - * HID-BPF support for Linux - * - * Copyright (c) 2022 Benjamin Tissoires - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "hid_bpf_dispatch.h" -#include "entrypoints/entrypoints.lskel.h" - -#define HID_BPF_MAX_PROGS 1024 /* keep this in sync with preloaded bpf, - * needs to be a power of 2 as we use it as - * a circular buffer - */ - -#define NEXT(idx) (((idx) + 1) & (HID_BPF_MAX_PROGS - 1)) -#define PREV(idx) (((idx) - 1) & (HID_BPF_MAX_PROGS - 1)) - -/* - * represents one attached program stored in the hid jump table - */ -struct hid_bpf_prog_entry { - struct bpf_prog *prog; - struct hid_device *hdev; - enum hid_bpf_prog_type type; - u16 idx; -}; - -struct hid_bpf_jmp_table { - struct bpf_map *map; - struct hid_bpf_prog_entry entries[HID_BPF_MAX_PROGS]; /* compacted list, circular buffer */ - int tail, head; - struct bpf_prog *progs[HID_BPF_MAX_PROGS]; /* idx -> progs mapping */ - unsigned long enabled[BITS_TO_LONGS(HID_BPF_MAX_PROGS)]; -}; - -#define FOR_ENTRIES(__i, __start, __end) \ - for (__i = __start; CIRC_CNT(__end, __i, HID_BPF_MAX_PROGS); __i = NEXT(__i)) - -static struct hid_bpf_jmp_table jmp_table; - -static DEFINE_MUTEX(hid_bpf_attach_lock); /* held when attaching/detaching programs */ - -static void hid_bpf_release_progs(struct work_struct *work); - -static DECLARE_WORK(release_work, hid_bpf_release_progs); - -BTF_ID_LIST(hid_bpf_btf_ids) -BTF_ID(func, hid_bpf_device_event) /* HID_BPF_PROG_TYPE_DEVICE_EVENT */ -BTF_ID(func, hid_bpf_rdesc_fixup) /* HID_BPF_PROG_TYPE_RDESC_FIXUP */ - -static int hid_bpf_max_programs(enum hid_bpf_prog_type type) -{ - switch (type) { - case HID_BPF_PROG_TYPE_DEVICE_EVENT: - return HID_BPF_MAX_PROGS_PER_DEV; - case HID_BPF_PROG_TYPE_RDESC_FIXUP: - return 1; - default: - return -EINVAL; - } -} - -static int hid_bpf_program_count(struct hid_device *hdev, - struct bpf_prog *prog, - enum hid_bpf_prog_type type) -{ - int i, n = 0; - - if (type >= HID_BPF_PROG_TYPE_MAX) - return -EINVAL; - - if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && hdev->bpf.rdesc_ops) - n += 1; - - FOR_ENTRIES(i, jmp_table.tail, jmp_table.head) { - struct hid_bpf_prog_entry *entry = &jmp_table.entries[i]; - - if (type != HID_BPF_PROG_TYPE_UNDEF && entry->type != type) - continue; - - if (hdev && entry->hdev != hdev) - continue; - - if (prog && entry->prog != prog) - continue; - - n++; - } - - return n; -} - -__weak noinline int __hid_bpf_tail_call(struct hid_bpf_ctx *ctx) -{ - return 0; -} - -int hid_bpf_prog_run(struct hid_device *hdev, enum hid_bpf_prog_type type, - struct hid_bpf_ctx_kern *ctx_kern) -{ - struct hid_bpf_prog_list *prog_list; - int i, idx, err = 0; - - rcu_read_lock(); - prog_list = rcu_dereference(hdev->bpf.progs[type]); - - if (!prog_list) - goto out_unlock; - - for (i = 0; i < prog_list->prog_cnt; i++) { - idx = prog_list->prog_idx[i]; - - if (!test_bit(idx, jmp_table.enabled)) - continue; - - ctx_kern->ctx.index = idx; - err = __hid_bpf_tail_call(&ctx_kern->ctx); - if (err < 0) - break; - if (err) - ctx_kern->ctx.retval = err; - } - - out_unlock: - rcu_read_unlock(); - - return err; -} - -/* - * assign the list of programs attached to a given hid device. - */ -static void __hid_bpf_set_hdev_progs(struct hid_device *hdev, struct hid_bpf_prog_list *new_list, - enum hid_bpf_prog_type type) -{ - struct hid_bpf_prog_list *old_list; - - spin_lock(&hdev->bpf.progs_lock); - old_list = rcu_dereference_protected(hdev->bpf.progs[type], - lockdep_is_held(&hdev->bpf.progs_lock)); - rcu_assign_pointer(hdev->bpf.progs[type], new_list); - spin_unlock(&hdev->bpf.progs_lock); - synchronize_rcu(); - - kfree(old_list); -} - -/* - * allocate and populate the list of programs attached to a given hid device. - * - * Must be called under lock. - */ -static int hid_bpf_populate_hdev(struct hid_device *hdev, enum hid_bpf_prog_type type) -{ - struct hid_bpf_prog_list *new_list; - int i; - - if (type >= HID_BPF_PROG_TYPE_MAX || !hdev) - return -EINVAL; - - if (hdev->bpf.destroyed) - return 0; - - new_list = kzalloc(sizeof(*new_list), GFP_KERNEL); - if (!new_list) - return -ENOMEM; - - FOR_ENTRIES(i, jmp_table.tail, jmp_table.head) { - struct hid_bpf_prog_entry *entry = &jmp_table.entries[i]; - - if (entry->type == type && entry->hdev == hdev && - test_bit(entry->idx, jmp_table.enabled)) - new_list->prog_idx[new_list->prog_cnt++] = entry->idx; - } - - __hid_bpf_set_hdev_progs(hdev, new_list, type); - - return 0; -} - -static void __hid_bpf_do_release_prog(int map_fd, unsigned int idx) -{ - skel_map_delete_elem(map_fd, &idx); - jmp_table.progs[idx] = NULL; -} - -static void hid_bpf_release_progs(struct work_struct *work) -{ - int i, j, n, map_fd = -1; - bool hdev_destroyed; - - if (!jmp_table.map) - return; - - /* retrieve a fd of our prog_array map in BPF */ - map_fd = skel_map_get_fd_by_id(jmp_table.map->id); - if (map_fd < 0) - return; - - mutex_lock(&hid_bpf_attach_lock); /* protects against attaching new programs */ - - /* detach unused progs from HID devices */ - FOR_ENTRIES(i, jmp_table.tail, jmp_table.head) { - struct hid_bpf_prog_entry *entry = &jmp_table.entries[i]; - enum hid_bpf_prog_type type; - struct hid_device *hdev; - - if (test_bit(entry->idx, jmp_table.enabled)) - continue; - - /* we have an attached prog */ - if (entry->hdev) { - hdev = entry->hdev; - type = entry->type; - /* - * hdev is still valid, even if we are called after hid_destroy_device(): - * when hid_bpf_attach() gets called, it takes a ref on the dev through - * bus_find_device() - */ - hdev_destroyed = hdev->bpf.destroyed; - - hid_bpf_populate_hdev(hdev, type); - - /* mark all other disabled progs from hdev of the given type as detached */ - FOR_ENTRIES(j, i, jmp_table.head) { - struct hid_bpf_prog_entry *next; - - next = &jmp_table.entries[j]; - - if (test_bit(next->idx, jmp_table.enabled)) - continue; - - if (next->hdev == hdev && next->type == type) { - /* - * clear the hdev reference and decrement the device ref - * that was taken during bus_find_device() while calling - * hid_bpf_attach() - */ - next->hdev = NULL; - put_device(&hdev->dev); - } - } - - /* if type was rdesc fixup and the device is not gone, reconnect device */ - if (type == HID_BPF_PROG_TYPE_RDESC_FIXUP && !hdev_destroyed) - hid_bpf_reconnect(hdev); - } - } - - /* remove all unused progs from the jump table */ - FOR_ENTRIES(i, jmp_table.tail, jmp_table.head) { - struct hid_bpf_prog_entry *entry = &jmp_table.entries[i]; - - if (test_bit(entry->idx, jmp_table.enabled)) - continue; - - if (entry->prog) - __hid_bpf_do_release_prog(map_fd, entry->idx); - } - - /* compact the entry list */ - n = jmp_table.tail; - FOR_ENTRIES(i, jmp_table.tail, jmp_table.head) { - struct hid_bpf_prog_entry *entry = &jmp_table.entries[i]; - - if (!test_bit(entry->idx, jmp_table.enabled)) - continue; - - jmp_table.entries[n] = jmp_table.entries[i]; - n = NEXT(n); - } - - jmp_table.head = n; - - mutex_unlock(&hid_bpf_attach_lock); - - if (map_fd >= 0) - close_fd(map_fd); -} - -static void hid_bpf_release_prog_at(int idx) -{ - int map_fd = -1; - - /* retrieve a fd of our prog_array map in BPF */ - map_fd = skel_map_get_fd_by_id(jmp_table.map->id); - if (map_fd < 0) - return; - - __hid_bpf_do_release_prog(map_fd, idx); - - close(map_fd); -} - -/* - * Insert the given BPF program represented by its fd in the jmp table. - * Returns the index in the jump table or a negative error. - */ -static int hid_bpf_insert_prog(int prog_fd, struct bpf_prog *prog) -{ - int i, index = -1, map_fd = -1, err = -EINVAL; - - /* retrieve a fd of our prog_array map in BPF */ - map_fd = skel_map_get_fd_by_id(jmp_table.map->id); - - if (map_fd < 0) { - err = -EINVAL; - goto out; - } - - /* find the first available index in the jmp_table */ - for (i = 0; i < HID_BPF_MAX_PROGS; i++) { - if (!jmp_table.progs[i] && index < 0) { - /* mark the index as used */ - jmp_table.progs[i] = prog; - index = i; - __set_bit(i, jmp_table.enabled); - } - } - if (index < 0) { - err = -ENOMEM; - goto out; - } - - /* insert the program in the jump table */ - err = skel_map_update_elem(map_fd, &index, &prog_fd, 0); - if (err) - goto out; - - /* return the index */ - err = index; - - out: - if (err < 0) - __hid_bpf_do_release_prog(map_fd, index); - if (map_fd >= 0) - close_fd(map_fd); - return err; -} - -int hid_bpf_get_prog_attach_type(struct bpf_prog *prog) -{ - int prog_type = HID_BPF_PROG_TYPE_UNDEF; - int i; - - for (i = 0; i < HID_BPF_PROG_TYPE_MAX; i++) { - if (hid_bpf_btf_ids[i] == prog->aux->attach_btf_id) { - prog_type = i; - break; - } - } - - return prog_type; -} - -static void hid_bpf_link_release(struct bpf_link *link) -{ - struct hid_bpf_link *hid_link = - container_of(link, struct hid_bpf_link, link); - - __clear_bit(hid_link->hid_table_index, jmp_table.enabled); - schedule_work(&release_work); -} - -static void hid_bpf_link_dealloc(struct bpf_link *link) -{ - struct hid_bpf_link *hid_link = - container_of(link, struct hid_bpf_link, link); - - kfree(hid_link); -} - -static void hid_bpf_link_show_fdinfo(const struct bpf_link *link, - struct seq_file *seq) -{ - seq_printf(seq, - "attach_type:\tHID-BPF\n"); -} - -static const struct bpf_link_ops hid_bpf_link_lops = { - .release = hid_bpf_link_release, - .dealloc = hid_bpf_link_dealloc, - .show_fdinfo = hid_bpf_link_show_fdinfo, -}; - -/* called from syscall */ -noinline int -__hid_bpf_attach_prog(struct hid_device *hdev, enum hid_bpf_prog_type prog_type, - int prog_fd, struct bpf_prog *prog, __u32 flags) -{ - struct bpf_link_primer link_primer; - struct hid_bpf_link *link; - struct hid_bpf_prog_entry *prog_entry; - int cnt, err = -EINVAL, prog_table_idx = -1; - - mutex_lock(&hid_bpf_attach_lock); - - link = kzalloc(sizeof(*link), GFP_USER); - if (!link) { - err = -ENOMEM; - goto err_unlock; - } - - bpf_link_init(&link->link, BPF_LINK_TYPE_UNSPEC, - &hid_bpf_link_lops, prog); - - /* do not attach too many programs to a given HID device */ - cnt = hid_bpf_program_count(hdev, NULL, prog_type); - if (cnt < 0) { - err = cnt; - goto err_unlock; - } - - if (cnt >= hid_bpf_max_programs(prog_type)) { - err = -E2BIG; - goto err_unlock; - } - - prog_table_idx = hid_bpf_insert_prog(prog_fd, prog); - /* if the jmp table is full, abort */ - if (prog_table_idx < 0) { - err = prog_table_idx; - goto err_unlock; - } - - if (flags & HID_BPF_FLAG_INSERT_HEAD) { - /* take the previous prog_entry slot */ - jmp_table.tail = PREV(jmp_table.tail); - prog_entry = &jmp_table.entries[jmp_table.tail]; - } else { - /* take the next prog_entry slot */ - prog_entry = &jmp_table.entries[jmp_table.head]; - jmp_table.head = NEXT(jmp_table.head); - } - - /* we steal the ref here */ - prog_entry->prog = prog; - prog_entry->idx = prog_table_idx; - prog_entry->hdev = hdev; - prog_entry->type = prog_type; - - /* finally store the index in the device list */ - err = hid_bpf_populate_hdev(hdev, prog_type); - if (err) { - hid_bpf_release_prog_at(prog_table_idx); - goto err_unlock; - } - - link->hid_table_index = prog_table_idx; - - err = bpf_link_prime(&link->link, &link_primer); - if (err) - goto err_unlock; - - mutex_unlock(&hid_bpf_attach_lock); - - return bpf_link_settle(&link_primer); - - err_unlock: - mutex_unlock(&hid_bpf_attach_lock); - - kfree(link); - - return err; -} - -void __hid_bpf_destroy_device(struct hid_device *hdev) -{ - int type, i; - struct hid_bpf_prog_list *prog_list; - - rcu_read_lock(); - - for (type = 0; type < HID_BPF_PROG_TYPE_MAX; type++) { - prog_list = rcu_dereference(hdev->bpf.progs[type]); - - if (!prog_list) - continue; - - for (i = 0; i < prog_list->prog_cnt; i++) - __clear_bit(prog_list->prog_idx[i], jmp_table.enabled); - } - - rcu_read_unlock(); - - for (type = 0; type < HID_BPF_PROG_TYPE_MAX; type++) - __hid_bpf_set_hdev_progs(hdev, NULL, type); - - /* schedule release of all detached progs */ - schedule_work(&release_work); -} - -#define HID_BPF_PROGS_COUNT 1 - -static struct bpf_link *links[HID_BPF_PROGS_COUNT]; -static struct entrypoints_bpf *skel; - -void hid_bpf_free_links_and_skel(void) -{ - int i; - - /* the following is enough to release all programs attached to hid */ - if (jmp_table.map) - bpf_map_put_with_uref(jmp_table.map); - - for (i = 0; i < ARRAY_SIZE(links); i++) { - if (!IS_ERR_OR_NULL(links[i])) - bpf_link_put(links[i]); - } - entrypoints_bpf__destroy(skel); -} - -#define ATTACH_AND_STORE_LINK(__name) do { \ - err = entrypoints_bpf__##__name##__attach(skel); \ - if (err) \ - goto out; \ - \ - links[idx] = bpf_link_get_from_fd(skel->links.__name##_fd); \ - if (IS_ERR(links[idx])) { \ - err = PTR_ERR(links[idx]); \ - goto out; \ - } \ - \ - /* Avoid taking over stdin/stdout/stderr of init process. Zeroing out \ - * makes skel_closenz() a no-op later in iterators_bpf__destroy(). \ - */ \ - close_fd(skel->links.__name##_fd); \ - skel->links.__name##_fd = 0; \ - idx++; \ -} while (0) - -int hid_bpf_preload_skel(void) -{ - int err, idx = 0; - - skel = entrypoints_bpf__open(); - if (!skel) - return -ENOMEM; - - err = entrypoints_bpf__load(skel); - if (err) - goto out; - - jmp_table.map = bpf_map_get_with_uref(skel->maps.hid_jmp_table.map_fd); - if (IS_ERR(jmp_table.map)) { - err = PTR_ERR(jmp_table.map); - goto out; - } - - ATTACH_AND_STORE_LINK(hid_tail_call); - - return 0; -out: - hid_bpf_free_links_and_skel(); - return err; -} diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index c4f4ce10b7dd..447b94aa99ab 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -4,7 +4,7 @@ #define __HID_BPF_H #include -#include +#include #include struct hid_device; @@ -24,11 +24,7 @@ struct hid_device; * * All of these fields are currently read-only. * - * @index: program index in the jump table. No special meaning (a smaller index - * doesn't mean the program will be executed before another program with - * a bigger index). * @hid: the ``struct hid_device`` representing the device itself - * @report_type: used for ``hid_bpf_device_event()`` * @allocated_size: Allocated size of data. * * This is how much memory is available and can be requested @@ -47,54 +43,21 @@ struct hid_device; * @retval: Return value of the previous program. */ struct hid_bpf_ctx { - __u32 index; const struct hid_device *hid; __u32 allocated_size; - enum hid_report_type report_type; union { __s32 retval; __s32 size; }; }; -/** - * enum hid_bpf_attach_flags - flags used when attaching a HIF-BPF program - * - * @HID_BPF_FLAG_NONE: no specific flag is used, the kernel choses where to - * insert the program - * @HID_BPF_FLAG_INSERT_HEAD: insert the given program before any other program - * currently attached to the device. This doesn't - * guarantee that this program will always be first - */ -enum hid_bpf_attach_flags { - HID_BPF_FLAG_NONE = 0, - HID_BPF_FLAG_INSERT_HEAD = _BITUL(0), - - /* private: internal use only */ - HID_BPF_FLAG_MAX, -}; - -/* Following functions are tracepoints that BPF programs can attach to */ -int hid_bpf_device_event(struct hid_bpf_ctx *ctx); -int hid_bpf_rdesc_fixup(struct hid_bpf_ctx *ctx); - /* * Below is HID internal */ -/* internal function to call eBPF programs, not to be used by anybody */ -int __hid_bpf_tail_call(struct hid_bpf_ctx *ctx); - #define HID_BPF_MAX_PROGS_PER_DEV 64 #define HID_BPF_FLAG_MASK (((HID_BPF_FLAG_MAX - 1) << 1) - 1) -/* types of HID programs to attach to */ -enum hid_bpf_prog_type { - HID_BPF_PROG_TYPE_UNDEF = -1, - HID_BPF_PROG_TYPE_DEVICE_EVENT, /* an event is emitted from the device */ - HID_BPF_PROG_TYPE_RDESC_FIXUP, - HID_BPF_PROG_TYPE_MAX, -}; struct hid_report_enum; @@ -167,11 +130,6 @@ struct hid_bpf_ops { struct hid_device *hdev; }; -struct hid_bpf_prog_list { - u16 prog_idx[HID_BPF_MAX_PROGS_PER_DEV]; - u8 prog_cnt; -}; - /* stored in each device */ struct hid_bpf { u8 *device_data; /* allocated when a bpf program of type @@ -179,23 +137,13 @@ struct hid_bpf { * to this HID device */ u32 allocated_data; - - struct hid_bpf_prog_list __rcu *progs[HID_BPF_PROG_TYPE_MAX]; /* attached BPF progs */ bool destroyed; /* prevents the assignment of any progs */ - spinlock_t progs_lock; /* protects RCU update of progs */ - struct hid_bpf_ops *rdesc_ops; struct list_head prog_list; struct mutex prog_list_lock; /* protects prog_list update */ }; -/* specific HID-BPF link when a program is attached to a device */ -struct hid_bpf_link { - struct bpf_link link; - int hid_table_index; -}; - #ifdef CONFIG_HID_BPF u8 *dispatch_hid_bpf_device_event(struct hid_device *hid, enum hid_report_type type, u8 *data, u32 *size, int interrupt); -- cgit v1.2.3 From c5958697a5fa29d3ba9332205a88725afe9ed912 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:22 +0200 Subject: Documentation: HID: amend HID-BPF for struct_ops Now that we are using struct_ops, the docs need to be changed. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-10-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- Documentation/hid/hid-bpf.rst | 156 +++++++++++++++++++----------------------- include/linux/hid_bpf.h | 8 +-- 2 files changed, 76 insertions(+), 88 deletions(-) (limited to 'include') diff --git a/Documentation/hid/hid-bpf.rst b/Documentation/hid/hid-bpf.rst index 0765b3298ecf..456e15097d87 100644 --- a/Documentation/hid/hid-bpf.rst +++ b/Documentation/hid/hid-bpf.rst @@ -132,16 +132,17 @@ input events. Available types of programs =========================== -HID-BPF is built "on top" of BPF, meaning that we use tracing method to +HID-BPF is built "on top" of BPF, meaning that we use bpf struct_ops method to declare our programs. HID-BPF has the following attachment types available: -1. event processing/filtering with ``SEC("fmod_ret/hid_bpf_device_event")`` in libbpf +1. event processing/filtering with ``SEC("struct_ops/hid_device_event")`` in libbpf 2. actions coming from userspace with ``SEC("syscall")`` in libbpf -3. change of the report descriptor with ``SEC("fmod_ret/hid_bpf_rdesc_fixup")`` in libbpf +3. change of the report descriptor with ``SEC("struct_ops/hid_rdesc_fixup")`` or + ``SEC("struct_ops.s/hid_rdesc_fixup")`` in libbpf -A ``hid_bpf_device_event`` is calling a BPF program when an event is received from +A ``hid_device_event`` is calling a BPF program when an event is received from the device. Thus we are in IRQ context and can act on the data or notify userspace. And given that we are in IRQ context, we can not talk back to the device. @@ -149,37 +150,42 @@ A ``syscall`` means that userspace called the syscall ``BPF_PROG_RUN`` facility. This time, we can do any operations allowed by HID-BPF, and talking to the device is allowed. -Last, ``hid_bpf_rdesc_fixup`` is different from the others as there can be only one +Last, ``hid_rdesc_fixup`` is different from the others as there can be only one BPF program of this type. This is called on ``probe`` from the driver and allows to -change the report descriptor from the BPF program. Once a ``hid_bpf_rdesc_fixup`` +change the report descriptor from the BPF program. Once a ``hid_rdesc_fixup`` program has been loaded, it is not possible to overwrite it unless the program which inserted it allows us by pinning the program and closing all of its fds pointing to it. +Note that ``hid_rdesc_fixup`` can be declared as sleepable (``SEC("struct_ops.s/hid_rdesc_fixup")``). + + Developer API: ============== -User API data structures available in programs: ------------------------------------------------ +Available ``struct_ops`` for HID-BPF: +------------------------------------- .. kernel-doc:: include/linux/hid_bpf.h + :identifiers: hid_bpf_ops -Available tracing functions to attach a HID-BPF program: --------------------------------------------------------- -.. kernel-doc:: drivers/hid/bpf/hid_bpf_dispatch.c - :functions: hid_bpf_device_event hid_bpf_rdesc_fixup +User API data structures available in programs: +----------------------------------------------- + +.. kernel-doc:: include/linux/hid_bpf.h + :identifiers: hid_bpf_ctx -Available API that can be used in all HID-BPF programs: -------------------------------------------------------- +Available API that can be used in all HID-BPF struct_ops programs: +------------------------------------------------------------------ .. kernel-doc:: drivers/hid/bpf/hid_bpf_dispatch.c - :functions: hid_bpf_get_data + :identifiers: hid_bpf_get_data -Available API that can be used in syscall HID-BPF programs: ------------------------------------------------------------ +Available API that can be used in syscall HID-BPF programs or in sleepable HID-BPF struct_ops programs: +------------------------------------------------------------------------------------------------------- .. kernel-doc:: drivers/hid/bpf/hid_bpf_dispatch.c - :functions: hid_bpf_attach_prog hid_bpf_hw_request hid_bpf_hw_output_report hid_bpf_input_report hid_bpf_allocate_context hid_bpf_release_context + :identifiers: hid_bpf_hw_request hid_bpf_hw_output_report hid_bpf_input_report hid_bpf_allocate_context hid_bpf_release_context General overview of a HID-BPF program ===================================== @@ -222,20 +228,21 @@ This allows the following: Effect of a HID-BPF program --------------------------- -For all HID-BPF attachment types except for :c:func:`hid_bpf_rdesc_fixup`, several eBPF -programs can be attached to the same device. +For all HID-BPF attachment types except for :c:func:`hid_rdesc_fixup`, several eBPF +programs can be attached to the same device. If a HID-BPF struct_ops has a +:c:func:`hid_rdesc_fixup` while another is already attached to the device, the +kernel will return `-EINVAL` when attaching the struct_ops. -Unless ``HID_BPF_FLAG_INSERT_HEAD`` is added to the flags while attaching the -program, the new program is appended at the end of the list. -``HID_BPF_FLAG_INSERT_HEAD`` will insert the new program at the beginning of the -list which is useful for e.g. tracing where we need to get the unprocessed events -from the device. +Unless ``BPF_F_BEFORE`` is added to the flags while attaching the program, the new +program is appended at the end of the list. +``BPF_F_BEFORE`` will insert the new program at the beginning of the list which is +useful for e.g. tracing where we need to get the unprocessed events from the device. -Note that if there are multiple programs using the ``HID_BPF_FLAG_INSERT_HEAD`` flag, +Note that if there are multiple programs using the ``BPF_F_BEFORE`` flag, only the most recently loaded one is actually the first in the list. -``SEC("fmod_ret/hid_bpf_device_event")`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``SEC("struct_ops/hid_device_event")`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Whenever a matching event is raised, the eBPF programs are called one after the other and are working on the same data buffer. @@ -258,17 +265,17 @@ with, userspace needs to refer to the device by its unique system id (the last 4 in the sysfs path: ``/sys/bus/hid/devices/xxxx:yyyy:zzzz:0000``). To retrieve a context associated with the device, the program must call -:c:func:`hid_bpf_allocate_context` and must release it with :c:func:`hid_bpf_release_context` +hid_bpf_allocate_context() and must release it with hid_bpf_release_context() before returning. Once the context is retrieved, one can also request a pointer to kernel memory with -:c:func:`hid_bpf_get_data`. This memory is big enough to support all input/output/feature +hid_bpf_get_data(). This memory is big enough to support all input/output/feature reports of the given device. -``SEC("fmod_ret/hid_bpf_rdesc_fixup")`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +``SEC("struct_ops/hid_rdesc_fixup")`` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``hid_bpf_rdesc_fixup`` program works in a similar manner to -``.report_fixup`` of ``struct hid_driver``. +The ``hid_rdesc_fixup`` program works in a similar manner to ``.report_fixup`` +of ``struct hid_driver``. When the device is probed, the kernel sets the data buffer of the context with the content of the report descriptor. The memory associated with that buffer is @@ -277,33 +284,31 @@ content of the report descriptor. The memory associated with that buffer is The eBPF program can modify the data buffer at-will and the kernel uses the modified content and size as the report descriptor. -Whenever a ``SEC("fmod_ret/hid_bpf_rdesc_fixup")`` program is attached (if no -program was attached before), the kernel immediately disconnects the HID device -and does a reprobe. +Whenever a struct_ops containing a ``SEC("struct_ops/hid_rdesc_fixup")`` program +is attached (if no program was attached before), the kernel immediately disconnects +the HID device and does a reprobe. -In the same way, when the ``SEC("fmod_ret/hid_bpf_rdesc_fixup")`` program is -detached, the kernel issues a disconnect on the device. +In the same way, when this struct_ops is detached, the kernel issues a disconnect +on the device. There is no ``detach`` facility in HID-BPF. Detaching a program happens when -all the user space file descriptors pointing at a program are closed. +all the user space file descriptors pointing at a HID-BPF struct_ops link are closed. Thus, if we need to replace a report descriptor fixup, some cooperation is required from the owner of the original report descriptor fixup. -The previous owner will likely pin the program in the bpffs, and we can then +The previous owner will likely pin the struct_ops link in the bpffs, and we can then replace it through normal bpf operations. Attaching a bpf program to a device =================================== -``libbpf`` does not export any helper to attach a HID-BPF program. -Users need to use a dedicated ``syscall`` program which will call -``hid_bpf_attach_prog(hid_id, program_fd, flags)``. +We now use standard struct_ops attachment through ``bpf_map__attach_struct_ops()``. +But given that we need to attach a struct_ops to a dedicated HID device, the caller +must set ``hid_id`` in the struct_ops map before loading the program in the kernel. ``hid_id`` is the unique system ID of the HID device (the last 4 numbers in the sysfs path: ``/sys/bus/hid/devices/xxxx:yyyy:zzzz:0000``) -``progam_fd`` is the opened file descriptor of the program to attach. - -``flags`` is of type ``enum hid_bpf_attach_flags``. +One can also set ``flags``, which is of type ``enum hid_bpf_attach_flags``. We can not rely on hidraw to bind a BPF program to a HID device. hidraw is an artefact of the processing of the HID device, and is not stable. Some drivers @@ -358,32 +363,15 @@ For that, we can create a basic skeleton for our BPF program:: extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx, unsigned int offset, const size_t __sz) __ksym; - extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym; struct { __uint(type, BPF_MAP_TYPE_RINGBUF); __uint(max_entries, 4096 * 64); } ringbuf SEC(".maps"); - struct attach_prog_args { - int prog_fd; - unsigned int hid; - unsigned int flags; - int retval; - }; - - SEC("syscall") - int attach_prog(struct attach_prog_args *ctx) - { - ctx->retval = hid_bpf_attach_prog(ctx->hid, - ctx->prog_fd, - ctx->flags); - return 0; - } - __u8 current_value = 0; - SEC("?fmod_ret/hid_bpf_device_event") + SEC("struct_ops/hid_device_event") int BPF_PROG(filter_switch, struct hid_bpf_ctx *hid_ctx) { __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 192 /* size */); @@ -407,37 +395,37 @@ For that, we can create a basic skeleton for our BPF program:: return 0; } -To attach ``filter_switch``, userspace needs to call the ``attach_prog`` syscall -program first:: + SEC(".struct_ops.link") + struct hid_bpf_ops haptic_tablet = { + .hid_device_event = (void *)filter_switch, + }; + + +To attach ``haptic_tablet``, userspace needs to set ``hid_id`` first:: static int attach_filter(struct hid *hid_skel, int hid_id) { - int err, prog_fd; - int ret = -1; - struct attach_prog_args args = { - .hid = hid_id, - }; - DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs, - .ctx_in = &args, - .ctx_size_in = sizeof(args), - ); + int err, link_fd; - args.prog_fd = bpf_program__fd(hid_skel->progs.filter_switch); + hid_skel->struct_ops.haptic_tablet->hid_id = hid_id; + err = hid__load(skel); + if (err) + return err; - prog_fd = bpf_program__fd(hid_skel->progs.attach_prog); - - err = bpf_prog_test_run_opts(prog_fd, &tattrs); - if (err) - return err; + link_fd = bpf_map__attach_struct_ops(hid_skel->maps.haptic_tablet); + if (!link_fd) { + fprintf(stderr, "can not attach HID-BPF program: %m\n"); + return -1; + } - return args.retval; /* the fd of the created bpf_link */ + return link_fd; /* the fd of the created bpf_link */ } Our userspace program can now listen to notifications on the ring buffer, and is awaken only when the value changes. When the userspace program doesn't need to listen to events anymore, it can just -close the returned fd from :c:func:`attach_filter`, which will tell the kernel to +close the returned bpf link from :c:func:`attach_filter`, which will tell the kernel to detach the program from the HID device. Of course, in other use cases, the userspace program can also pin the fd to the diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 447b94aa99ab..1b4cc1b2c31d 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -20,11 +20,9 @@ struct hid_device; * struct hid_bpf_ctx - User accessible data for all HID programs * * ``data`` is not directly accessible from the context. We need to issue - * a call to ``hid_bpf_get_data()`` in order to get a pointer to that field. + * a call to hid_bpf_get_data() in order to get a pointer to that field. * - * All of these fields are currently read-only. - * - * @hid: the ``struct hid_device`` representing the device itself + * @hid: the &struct hid_device representing the device itself * @allocated_size: Allocated size of data. * * This is how much memory is available and can be requested @@ -41,6 +39,8 @@ struct hid_device; * ``size`` must always be less or equal than ``allocated_size`` (it is enforced * once all BPF programs have been run). * @retval: Return value of the previous program. + * + * ``hid`` and ``allocated_size`` are read-only, ``size`` and ``retval`` are read-write. */ struct hid_bpf_ctx { const struct hid_device *hid; -- cgit v1.2.3 From 33c0fb85b571b0f1bbdbf466e770eebeb29e6f41 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Sat, 8 Jun 2024 11:01:28 +0200 Subject: HID: bpf: make part of struct hid_device writable It is useful to change the name, the phys and/or the uniq of a struct hid_device during .rdesc_fixup(). For example, hid-uclogic.ko changes the uniq to store the firmware version to differentiate between 2 devices sharing the same PID. In the same way, changing the device name is useful when the device export 3 nodes, all with the same name. Link: https://lore.kernel.org/r/20240608-hid_bpf_struct_ops-v3-16-6ac6ade58329@kernel.org Signed-off-by: Benjamin Tissoires --- drivers/hid/bpf/hid_bpf_struct_ops.c | 3 +++ include/linux/hid_bpf.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c index 3ba46c3cd87b..5f200557ff12 100644 --- a/drivers/hid/bpf/hid_bpf_struct_ops.c +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -74,6 +74,9 @@ static int hid_bpf_ops_btf_struct_access(struct bpf_verifier_log *log, const struct hid_bpf_offset_write_range write_ranges[] = { WRITE_RANGE(hid_bpf_ctx, retval, false), + WRITE_RANGE(hid_device, name, true), + WRITE_RANGE(hid_device, uniq, true), + WRITE_RANGE(hid_device, phys, true), }; #undef WRITE_RANGE const struct btf_type *state = NULL; diff --git a/include/linux/hid_bpf.h b/include/linux/hid_bpf.h index 1b4cc1b2c31d..65d7e0acc8c2 100644 --- a/include/linux/hid_bpf.h +++ b/include/linux/hid_bpf.h @@ -43,7 +43,7 @@ struct hid_device; * ``hid`` and ``allocated_size`` are read-only, ``size`` and ``retval`` are read-write. */ struct hid_bpf_ctx { - const struct hid_device *hid; + struct hid_device *hid; __u32 allocated_size; union { __s32 retval; -- cgit v1.2.3 From 5e5f2f92cccc29f356422d3cbc104f7f42430f22 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Fri, 14 Jun 2024 02:43:39 +0300 Subject: platform: arm64: add Lenovo Yoga C630 WOS EC driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lenovo Yoga C630 WOS is a laptop using Snapdragon 850 SoC. Like many laptops it uses an embedded controller (EC) to perform various platform operations, including, but not limited, to Type-C port control or power supply handlng. Add the driver for the EC, that creates devices for UCSI and power supply devices. Reviewed-by: Bryan O'Donoghue Signed-off-by: Dmitry Baryshkov Link: https://lore.kernel.org/r/20240614-yoga-ec-driver-v7-2-9f0b9b40ae76@linaro.org [ij: added #include ] Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/platform/arm64/Kconfig | 14 ++ drivers/platform/arm64/Makefile | 1 + drivers/platform/arm64/lenovo-yoga-c630.c | 291 +++++++++++++++++++++++++ include/linux/platform_data/lenovo-yoga-c630.h | 44 ++++ 4 files changed, 350 insertions(+) create mode 100644 drivers/platform/arm64/lenovo-yoga-c630.c create mode 100644 include/linux/platform_data/lenovo-yoga-c630.h (limited to 'include') diff --git a/drivers/platform/arm64/Kconfig b/drivers/platform/arm64/Kconfig index 8fdca0f8e909..8c103b3150d1 100644 --- a/drivers/platform/arm64/Kconfig +++ b/drivers/platform/arm64/Kconfig @@ -32,4 +32,18 @@ config EC_ACER_ASPIRE1 laptop where this information is not properly exposed via the standard ACPI devices. +config EC_LENOVO_YOGA_C630 + tristate "Lenovo Yoga C630 Embedded Controller driver" + depends on I2C + help + Driver for the Embedded Controller in the Qualcomm Snapdragon-based + Lenovo Yoga C630, which provides battery and power adapter + information. + + This driver provides battery and AC status support for the mentioned + laptop where this information is not properly exposed via the + standard ACPI devices. + + Say M or Y here to include this support. + endif # ARM64_PLATFORM_DEVICES diff --git a/drivers/platform/arm64/Makefile b/drivers/platform/arm64/Makefile index 4fcc9855579b..b2ae9114fdd8 100644 --- a/drivers/platform/arm64/Makefile +++ b/drivers/platform/arm64/Makefile @@ -6,3 +6,4 @@ # obj-$(CONFIG_EC_ACER_ASPIRE1) += acer-aspire1-ec.o +obj-$(CONFIG_EC_LENOVO_YOGA_C630) += lenovo-yoga-c630.o diff --git a/drivers/platform/arm64/lenovo-yoga-c630.c b/drivers/platform/arm64/lenovo-yoga-c630.c new file mode 100644 index 000000000000..1f05c9a6a89d --- /dev/null +++ b/drivers/platform/arm64/lenovo-yoga-c630.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022-2024, Linaro Ltd + * Authors: + * Bjorn Andersson + * Dmitry Baryshkov + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define LENOVO_EC_RESPONSE_REG 0x01 +#define LENOVO_EC_REQUEST_REG 0x02 + +#define LENOVO_EC_UCSI_WRITE 0x20 +#define LENOVO_EC_UCSI_READ 0x21 + +#define LENOVO_EC_READ_REG 0xb0 +#define LENOVO_EC_REQUEST_NEXT_EVENT 0x84 + +#define LENOVO_EC_UCSI_VERSION 0x20 + +struct yoga_c630_ec { + struct i2c_client *client; + struct mutex lock; + struct blocking_notifier_head notifier_list; +}; + +static int yoga_c630_ec_request(struct yoga_c630_ec *ec, u8 *req, size_t req_len, + u8 *resp, size_t resp_len) +{ + int ret; + + lockdep_assert_held(&ec->lock); + + ret = i2c_smbus_write_i2c_block_data(ec->client, LENOVO_EC_REQUEST_REG, + req_len, req); + if (ret < 0) + return ret; + + return i2c_smbus_read_i2c_block_data(ec->client, LENOVO_EC_RESPONSE_REG, + resp_len, resp); +} + +int yoga_c630_ec_read8(struct yoga_c630_ec *ec, u8 addr) +{ + u8 req[2] = { LENOVO_EC_READ_REG, }; + int ret; + u8 val; + + guard(mutex)(&ec->lock); + + req[1] = addr; + ret = yoga_c630_ec_request(ec, req, sizeof(req), &val, 1); + if (ret < 0) + return ret; + + return val; +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_read8); + +int yoga_c630_ec_read16(struct yoga_c630_ec *ec, u8 addr) +{ + u8 req[2] = { LENOVO_EC_READ_REG, }; + int ret; + u8 msb; + u8 lsb; + + /* don't overflow the address */ + if (addr == 0xff) + return -EINVAL; + + guard(mutex)(&ec->lock); + + req[1] = addr; + ret = yoga_c630_ec_request(ec, req, sizeof(req), &lsb, 1); + if (ret < 0) + return ret; + + req[1] = addr + 1; + ret = yoga_c630_ec_request(ec, req, sizeof(req), &msb, 1); + if (ret < 0) + return ret; + + return msb << 8 | lsb; +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_read16); + +u16 yoga_c630_ec_ucsi_get_version(struct yoga_c630_ec *ec) +{ + u8 req[3] = { 0xb3, 0xf2, }; + int ret; + u8 msb; + u8 lsb; + + guard(mutex)(&ec->lock); + + req[2] = LENOVO_EC_UCSI_VERSION; + ret = yoga_c630_ec_request(ec, req, sizeof(req), &lsb, 1); + if (ret < 0) + return ret; + + req[2] = LENOVO_EC_UCSI_VERSION + 1; + ret = yoga_c630_ec_request(ec, req, sizeof(req), &msb, 1); + if (ret < 0) + return ret; + + return msb << 8 | lsb; +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_ucsi_get_version); + +int yoga_c630_ec_ucsi_write(struct yoga_c630_ec *ec, + const u8 req[YOGA_C630_UCSI_WRITE_SIZE]) +{ + int ret; + + mutex_lock(&ec->lock); + ret = i2c_smbus_write_i2c_block_data(ec->client, LENOVO_EC_UCSI_WRITE, + YOGA_C630_UCSI_WRITE_SIZE, req); + mutex_unlock(&ec->lock); + + return ret < 0 ? ret : 0; +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_ucsi_write); + +int yoga_c630_ec_ucsi_read(struct yoga_c630_ec *ec, + u8 resp[YOGA_C630_UCSI_READ_SIZE]) +{ + int ret; + + mutex_lock(&ec->lock); + ret = i2c_smbus_read_i2c_block_data(ec->client, LENOVO_EC_UCSI_READ, + YOGA_C630_UCSI_READ_SIZE, resp); + mutex_unlock(&ec->lock); + + return ret < 0 ? ret : 0; +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_ucsi_read); + +static irqreturn_t yoga_c630_ec_thread_intr(int irq, void *data) +{ + u8 req[] = { LENOVO_EC_REQUEST_NEXT_EVENT }; + struct yoga_c630_ec *ec = data; + u8 event; + int ret; + + mutex_lock(&ec->lock); + ret = yoga_c630_ec_request(ec, req, sizeof(req), &event, 1); + mutex_unlock(&ec->lock); + if (ret < 0) + return IRQ_HANDLED; + + blocking_notifier_call_chain(&ec->notifier_list, event, ec); + + return IRQ_HANDLED; +} + +/** + * yoga_c630_ec_register_notify - Register a notifier callback for EC events. + * @ec: Yoga C630 EC + * @nb: Notifier block pointer to register + * + * Return: 0 on success or negative error code. + */ +int yoga_c630_ec_register_notify(struct yoga_c630_ec *ec, struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&ec->notifier_list, nb); +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_register_notify); + +/** + * yoga_c630_ec_unregister_notify - Unregister notifier callback for EC events. + * @ec: Yoga C630 EC + * @nb: Notifier block pointer to unregister + * + * Unregister a notifier callback that was previously registered with + * yoga_c630_ec_register_notify(). + */ +void yoga_c630_ec_unregister_notify(struct yoga_c630_ec *ec, struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&ec->notifier_list, nb); +} +EXPORT_SYMBOL_GPL(yoga_c630_ec_unregister_notify); + +static void yoga_c630_aux_release(struct device *dev) +{ + struct auxiliary_device *adev = to_auxiliary_dev(dev); + + kfree(adev); +} + +static void yoga_c630_aux_remove(void *data) +{ + struct auxiliary_device *adev = data; + + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); +} + +static int yoga_c630_aux_init(struct device *parent, const char *name, + struct yoga_c630_ec *ec) +{ + struct auxiliary_device *adev; + int ret; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + adev->name = name; + adev->id = 0; + adev->dev.parent = parent; + adev->dev.release = yoga_c630_aux_release; + adev->dev.platform_data = ec; + + ret = auxiliary_device_init(adev); + if (ret) { + kfree(adev); + return ret; + } + + ret = auxiliary_device_add(adev); + if (ret) { + auxiliary_device_uninit(adev); + return ret; + } + + return devm_add_action_or_reset(parent, yoga_c630_aux_remove, adev); +} + +static int yoga_c630_ec_probe(struct i2c_client *client) +{ + struct device *dev = &client->dev; + struct yoga_c630_ec *ec; + int ret; + + ec = devm_kzalloc(dev, sizeof(*ec), GFP_KERNEL); + if (!ec) + return -ENOMEM; + + mutex_init(&ec->lock); + ec->client = client; + BLOCKING_INIT_NOTIFIER_HEAD(&ec->notifier_list); + + ret = devm_request_threaded_irq(dev, client->irq, + NULL, yoga_c630_ec_thread_intr, + IRQF_ONESHOT, "yoga_c630_ec", ec); + if (ret < 0) + return dev_err_probe(dev, ret, "unable to request irq\n"); + + ret = yoga_c630_aux_init(dev, YOGA_C630_DEV_PSY, ec); + if (ret) + return ret; + + return yoga_c630_aux_init(dev, YOGA_C630_DEV_UCSI, ec); +} + + +static const struct of_device_id yoga_c630_ec_of_match[] = { + { .compatible = "lenovo,yoga-c630-ec" }, + {} +}; +MODULE_DEVICE_TABLE(of, yoga_c630_ec_of_match); + +static const struct i2c_device_id yoga_c630_ec_i2c_id_table[] = { + { "yoga-c630-ec", }, + {} +}; +MODULE_DEVICE_TABLE(i2c, yoga_c630_ec_i2c_id_table); + +static struct i2c_driver yoga_c630_ec_i2c_driver = { + .driver = { + .name = "yoga-c630-ec", + .of_match_table = yoga_c630_ec_of_match + }, + .probe = yoga_c630_ec_probe, + .id_table = yoga_c630_ec_i2c_id_table, +}; +module_i2c_driver(yoga_c630_ec_i2c_driver); + +MODULE_DESCRIPTION("Lenovo Yoga C630 Embedded Controller"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_data/lenovo-yoga-c630.h b/include/linux/platform_data/lenovo-yoga-c630.h new file mode 100644 index 000000000000..5d1f9fb33cfc --- /dev/null +++ b/include/linux/platform_data/lenovo-yoga-c630.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022-2024, Linaro Ltd + * Authors: + * Bjorn Andersson + * Dmitry Baryshkov + */ + +#ifndef _LENOVO_YOGA_C630_DATA_H +#define _LENOVO_YOGA_C630_DATA_H + +struct yoga_c630_ec; +struct notifier_block; + +#define YOGA_C630_MOD_NAME "lenovo_yoga_c630" + +#define YOGA_C630_DEV_UCSI "ucsi" +#define YOGA_C630_DEV_PSY "psy" + +int yoga_c630_ec_read8(struct yoga_c630_ec *ec, u8 addr); +int yoga_c630_ec_read16(struct yoga_c630_ec *ec, u8 addr); + +int yoga_c630_ec_register_notify(struct yoga_c630_ec *ec, struct notifier_block *nb); +void yoga_c630_ec_unregister_notify(struct yoga_c630_ec *ec, struct notifier_block *nb); + +#define YOGA_C630_UCSI_WRITE_SIZE 8 +#define YOGA_C630_UCSI_CCI_SIZE 4 +#define YOGA_C630_UCSI_DATA_SIZE 16 +#define YOGA_C630_UCSI_READ_SIZE (YOGA_C630_UCSI_CCI_SIZE + YOGA_C630_UCSI_DATA_SIZE) + +u16 yoga_c630_ec_ucsi_get_version(struct yoga_c630_ec *ec); +int yoga_c630_ec_ucsi_write(struct yoga_c630_ec *ec, + const u8 req[YOGA_C630_UCSI_WRITE_SIZE]); +int yoga_c630_ec_ucsi_read(struct yoga_c630_ec *ec, + u8 resp[YOGA_C630_UCSI_READ_SIZE]); + +#define LENOVO_EC_EVENT_USB 0x20 +#define LENOVO_EC_EVENT_UCSI 0x21 +#define LENOVO_EC_EVENT_HPD 0x22 +#define LENOVO_EC_EVENT_BAT_STATUS 0x24 +#define LENOVO_EC_EVENT_BAT_INFO 0x25 +#define LENOVO_EC_EVENT_BAT_ADPT_STATUS 0x37 + +#endif -- cgit v1.2.3 From 1652b0bafeaa8281ca9a805d81e13d7647bd2f44 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:08 +0200 Subject: block: remove unused queue limits API Remove all APIs that are unused now that sd and sr have been converted to the atomic queue limits API. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-14-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-settings.c | 190 ------------------------------------------------- include/linux/blkdev.h | 24 ------- 2 files changed, 214 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index a49abdb35548..0b038729608f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -293,24 +293,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim) } EXPORT_SYMBOL_GPL(queue_limits_set); -/** - * blk_queue_chunk_sectors - set size of the chunk for this queue - * @q: the request queue for the device - * @chunk_sectors: chunk sectors in the usual 512b unit - * - * Description: - * If a driver doesn't want IOs to cross a given chunk size, it can set - * this limit and prevent merging across chunks. Note that the block layer - * must accept a page worth of data at any offset. So if the crossing of - * chunks is a hard limitation in the driver, it must still be prepared - * to split single page bios. - **/ -void blk_queue_chunk_sectors(struct request_queue *q, unsigned int chunk_sectors) -{ - q->limits.chunk_sectors = chunk_sectors; -} -EXPORT_SYMBOL(blk_queue_chunk_sectors); - /** * blk_queue_max_discard_sectors - set max sectors for a single discard * @q: the request queue for the device @@ -352,139 +334,6 @@ void blk_queue_max_write_zeroes_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors); -/** - * blk_queue_max_zone_append_sectors - set max sectors for a single zone append - * @q: the request queue for the device - * @max_zone_append_sectors: maximum number of sectors to write per command - * - * Sets the maximum number of sectors allowed for zone append commands. If - * Specifying 0 for @max_zone_append_sectors indicates that the queue does - * not natively support zone append operations and that the block layer must - * emulate these operations using regular writes. - **/ -void blk_queue_max_zone_append_sectors(struct request_queue *q, - unsigned int max_zone_append_sectors) -{ - unsigned int max_sectors = 0; - - if (WARN_ON(!blk_queue_is_zoned(q))) - return; - - if (max_zone_append_sectors) { - max_sectors = min(q->limits.max_hw_sectors, - max_zone_append_sectors); - max_sectors = min(q->limits.chunk_sectors, max_sectors); - - /* - * Signal eventual driver bugs resulting in the max_zone_append - * sectors limit being 0 due to the chunk_sectors limit (zone - * size) not set or the max_hw_sectors limit not set. - */ - WARN_ON_ONCE(!max_sectors); - } - - q->limits.max_zone_append_sectors = max_sectors; -} -EXPORT_SYMBOL_GPL(blk_queue_max_zone_append_sectors); - -/** - * blk_queue_logical_block_size - set logical block size for the queue - * @q: the request queue for the device - * @size: the logical block size, in bytes - * - * Description: - * This should be set to the lowest possible block size that the - * storage device can address. The default of 512 covers most - * hardware. - **/ -void blk_queue_logical_block_size(struct request_queue *q, unsigned int size) -{ - struct queue_limits *limits = &q->limits; - - limits->logical_block_size = size; - - if (limits->discard_granularity < limits->logical_block_size) - limits->discard_granularity = limits->logical_block_size; - - if (limits->physical_block_size < size) - limits->physical_block_size = size; - - if (limits->io_min < limits->physical_block_size) - limits->io_min = limits->physical_block_size; - - limits->max_hw_sectors = - round_down(limits->max_hw_sectors, size >> SECTOR_SHIFT); - limits->max_sectors = - round_down(limits->max_sectors, size >> SECTOR_SHIFT); -} -EXPORT_SYMBOL(blk_queue_logical_block_size); - -/** - * blk_queue_physical_block_size - set physical block size for the queue - * @q: the request queue for the device - * @size: the physical block size, in bytes - * - * Description: - * This should be set to the lowest possible sector size that the - * hardware can operate on without reverting to read-modify-write - * operations. - */ -void blk_queue_physical_block_size(struct request_queue *q, unsigned int size) -{ - q->limits.physical_block_size = size; - - if (q->limits.physical_block_size < q->limits.logical_block_size) - q->limits.physical_block_size = q->limits.logical_block_size; - - if (q->limits.discard_granularity < q->limits.physical_block_size) - q->limits.discard_granularity = q->limits.physical_block_size; - - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; -} -EXPORT_SYMBOL(blk_queue_physical_block_size); - -/** - * blk_queue_zone_write_granularity - set zone write granularity for the queue - * @q: the request queue for the zoned device - * @size: the zone write granularity size, in bytes - * - * Description: - * This should be set to the lowest possible size allowing to write in - * sequential zones of a zoned block device. - */ -void blk_queue_zone_write_granularity(struct request_queue *q, - unsigned int size) -{ - if (WARN_ON_ONCE(!blk_queue_is_zoned(q))) - return; - - q->limits.zone_write_granularity = size; - - if (q->limits.zone_write_granularity < q->limits.logical_block_size) - q->limits.zone_write_granularity = q->limits.logical_block_size; -} -EXPORT_SYMBOL_GPL(blk_queue_zone_write_granularity); - -/** - * blk_queue_alignment_offset - set physical block alignment offset - * @q: the request queue for the device - * @offset: alignment offset in bytes - * - * Description: - * Some devices are naturally misaligned to compensate for things like - * the legacy DOS partition table 63-sector offset. Low-level drivers - * should call this function for devices whose first sector is not - * naturally aligned. - */ -void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) -{ - q->limits.alignment_offset = - offset & (q->limits.physical_block_size - 1); - q->limits.misaligned = 0; -} -EXPORT_SYMBOL(blk_queue_alignment_offset); - void disk_update_readahead(struct gendisk *disk) { blk_apply_bdi_limits(disk->bdi, &disk->queue->limits); @@ -514,26 +363,6 @@ void blk_limits_io_min(struct queue_limits *limits, unsigned int min) } EXPORT_SYMBOL(blk_limits_io_min); -/** - * blk_queue_io_min - set minimum request size for the queue - * @q: the request queue for the device - * @min: smallest I/O size in bytes - * - * Description: - * Storage devices may report a granularity or preferred minimum I/O - * size which is the smallest request the device can perform without - * incurring a performance penalty. For disk drives this is often the - * physical block size. For RAID arrays it is often the stripe chunk - * size. A properly aligned multiple of minimum_io_size is the - * preferred request size for workloads where a high number of I/O - * operations is desired. - */ -void blk_queue_io_min(struct request_queue *q, unsigned int min) -{ - blk_limits_io_min(&q->limits, min); -} -EXPORT_SYMBOL(blk_queue_io_min); - /** * blk_limits_io_opt - set optimal request size for a device * @limits: the queue limits @@ -841,25 +670,6 @@ void blk_queue_write_cache(struct request_queue *q, bool wc, bool fua) } EXPORT_SYMBOL_GPL(blk_queue_write_cache); -/** - * disk_set_zoned - inidicate a zoned device - * @disk: gendisk to configure - */ -void disk_set_zoned(struct gendisk *disk) -{ - struct request_queue *q = disk->queue; - - WARN_ON_ONCE(!IS_ENABLED(CONFIG_BLK_DEV_ZONED)); - - /* - * Set the zone write granularity to the device logical block - * size by default. The driver can change this value if needed. - */ - q->limits.zoned = true; - blk_queue_zone_write_granularity(q, queue_logical_block_size(q)); -} -EXPORT_SYMBOL_GPL(disk_set_zoned); - int bdev_alignment_offset(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24c36929920b..ad995e7a7698 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -332,8 +332,6 @@ struct queue_limits { typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, void *data); -void disk_set_zoned(struct gendisk *disk); - #define BLK_ALL_ZONES ((unsigned int)-1) int blkdev_report_zones(struct block_device *bdev, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); @@ -638,18 +636,6 @@ static inline unsigned int disk_zone_no(struct gendisk *disk, sector_t sector) return sector >> ilog2(disk->queue->limits.chunk_sectors); } -static inline void disk_set_max_open_zones(struct gendisk *disk, - unsigned int max_open_zones) -{ - disk->queue->limits.max_open_zones = max_open_zones; -} - -static inline void disk_set_max_active_zones(struct gendisk *disk, - unsigned int max_active_zones) -{ - disk->queue->limits.max_active_zones = max_active_zones; -} - static inline unsigned int bdev_max_open_zones(struct block_device *bdev) { return bdev->bd_disk->queue->limits.max_open_zones; @@ -929,24 +915,14 @@ static inline void queue_limits_cancel_update(struct request_queue *q) /* * Access functions for manipulating queue properties */ -extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int); void blk_queue_max_secure_erase_sectors(struct request_queue *q, unsigned int max_sectors); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, unsigned int max_write_same_sectors); -extern void blk_queue_logical_block_size(struct request_queue *, unsigned int); -extern void blk_queue_max_zone_append_sectors(struct request_queue *q, - unsigned int max_zone_append_sectors); -extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); -void blk_queue_zone_write_granularity(struct request_queue *q, - unsigned int size); -extern void blk_queue_alignment_offset(struct request_queue *q, - unsigned int alignment); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); -extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth); extern void blk_set_stacking_limits(struct queue_limits *lim); -- cgit v1.2.3 From 73e3715ed14844067c5c598e72777641004a7f60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 31 May 2024 09:48:09 +0200 Subject: block: add special APIs for run-time disabling of discard and friends A few drivers optimistically try to support discard, write zeroes and secure erase and disable the features from the I/O completion handler if the hardware can't support them. This disable can't be done using the atomic queue limits API because the I/O completion handlers can't take sleeping locks or freeze the queue. Keep the existing clearing of the relevant field to zero, but replace the old blk_queue_max_* APIs with new disable APIs that force the value to 0. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Damien Le Moal Reviewed-by: John Garry Reviewed-by: Nitesh Shetty Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240531074837.1648501-15-hch@lst.de Signed-off-by: Jens Axboe --- arch/um/drivers/ubd_kern.c | 4 ++-- block/blk-settings.c | 41 ----------------------------------------- drivers/block/xen-blkfront.c | 4 ++-- drivers/scsi/sd.c | 4 ++-- include/linux/blkdev.h | 28 ++++++++++++++++++++++------ 5 files changed, 28 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 093c87879d08..cdcb75a68989 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -451,9 +451,9 @@ static void ubd_end_request(struct io_thread_req *io_req) { if (io_req->error == BLK_STS_NOTSUPP) { if (req_op(io_req->req) == REQ_OP_DISCARD) - blk_queue_max_discard_sectors(io_req->req->q, 0); + blk_queue_disable_discard(io_req->req->q); else if (req_op(io_req->req) == REQ_OP_WRITE_ZEROES) - blk_queue_max_write_zeroes_sectors(io_req->req->q, 0); + blk_queue_disable_write_zeroes(io_req->req->q); } blk_mq_end_request(io_req->req, io_req->error); kfree(io_req); diff --git a/block/blk-settings.c b/block/blk-settings.c index 0b038729608f..996f247fc98e 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -293,47 +293,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim) } EXPORT_SYMBOL_GPL(queue_limits_set); -/** - * blk_queue_max_discard_sectors - set max sectors for a single discard - * @q: the request queue for the device - * @max_discard_sectors: maximum number of sectors to discard - **/ -void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors) -{ - struct queue_limits *lim = &q->limits; - - lim->max_hw_discard_sectors = max_discard_sectors; - lim->max_discard_sectors = - min(max_discard_sectors, lim->max_user_discard_sectors); -} -EXPORT_SYMBOL(blk_queue_max_discard_sectors); - -/** - * blk_queue_max_secure_erase_sectors - set max sectors for a secure erase - * @q: the request queue for the device - * @max_sectors: maximum number of sectors to secure_erase - **/ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors) -{ - q->limits.max_secure_erase_sectors = max_sectors; -} -EXPORT_SYMBOL(blk_queue_max_secure_erase_sectors); - -/** - * blk_queue_max_write_zeroes_sectors - set max sectors for a single - * write zeroes - * @q: the request queue for the device - * @max_write_zeroes_sectors: maximum number of sectors to write per command - **/ -void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_zeroes_sectors) -{ - q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors; -} -EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors); - void disk_update_readahead(struct gendisk *disk) { blk_apply_bdi_limits(disk->bdi, &disk->queue->limits); diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index fd7c0ff2139c..9b4ec3e4908c 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1605,8 +1605,8 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; - blk_queue_max_discard_sectors(rq, 0); - blk_queue_max_secure_erase_sectors(rq, 0); + blk_queue_disable_discard(rq); + blk_queue_disable_secure_erase(rq); } break; case BLKIF_OP_FLUSH_DISKCACHE: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 049071b56819..d957e29b17a9 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -837,7 +837,7 @@ static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd, static void sd_disable_discard(struct scsi_disk *sdkp) { sdkp->provisioning_mode = SD_LBP_DISABLE; - blk_queue_max_discard_sectors(sdkp->disk->queue, 0); + blk_queue_disable_discard(sdkp->disk->queue); } static void sd_config_discard(struct scsi_disk *sdkp, struct queue_limits *lim, @@ -1019,7 +1019,7 @@ static void sd_disable_write_same(struct scsi_disk *sdkp) { sdkp->device->no_write_same = 1; sdkp->max_ws_blocks = 0; - blk_queue_max_write_zeroes_sectors(sdkp->disk->queue, 0); + blk_queue_disable_write_zeroes(sdkp->disk->queue); } static void sd_config_write_same(struct scsi_disk *sdkp, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad995e7a7698..ac8e0cb2353a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -912,15 +912,31 @@ static inline void queue_limits_cancel_update(struct request_queue *q) mutex_unlock(&q->limits_lock); } +/* + * These helpers are for drivers that have sloppy feature negotiation and might + * have to disable DISCARD, WRITE_ZEROES or SECURE_DISCARD from the I/O + * completion handler when the device returned an indicator that the respective + * feature is not actually supported. They are racy and the driver needs to + * cope with that. Try to avoid this scheme if you can. + */ +static inline void blk_queue_disable_discard(struct request_queue *q) +{ + q->limits.max_discard_sectors = 0; +} + +static inline void blk_queue_disable_secure_erase(struct request_queue *q) +{ + q->limits.max_secure_erase_sectors = 0; +} + +static inline void blk_queue_disable_write_zeroes(struct request_queue *q) +{ + q->limits.max_write_zeroes_sectors = 0; +} + /* * Access functions for manipulating queue properties */ -void blk_queue_max_secure_erase_sectors(struct request_queue *q, - unsigned int max_sectors); -extern void blk_queue_max_discard_sectors(struct request_queue *q, - unsigned int max_discard_sectors); -extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q, - unsigned int max_write_same_sectors); void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From e9f5f44ad3725335d9c559c3c22cd3726152a7b1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:15 +0200 Subject: block: remove the blk_integrity_profile structure Block layer integrity configuration is a bit complex right now, as it indirects through operation vectors for a simple two-dimensional configuration: a) the checksum type of none, ip checksum, crc, crc64 b) the presence or absence of a reference tag Remove the integrity profile, and instead add a separate csum_type flag which replaces the existing ip-checksum field and a new flag that indicates the presence of the reference tag. This removes up to two layers of indirect calls, remove the need to offload the no-op verification of non-PI metadata to a workqueue and generally simplifies the code. The downside is that block/t10-pi.c now has to be built into the kernel when CONFIG_BLK_DEV_INTEGRITY is supported. Given that both nvme and SCSI require t10-pi.ko, it is loaded for all usual configurations that enabled CONFIG_BLK_DEV_INTEGRITY already, though. Signed-off-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-6-hch@lst.de Signed-off-by: Jens Axboe --- block/Kconfig | 8 +- block/Makefile | 3 +- block/bio-integrity.c | 32 ++--- block/blk-integrity.c | 66 +++++----- block/blk-mq.c | 13 +- block/blk.h | 8 ++ block/t10-pi.c | 241 +++++++++++++----------------------- drivers/md/dm-crypt.c | 2 +- drivers/nvme/host/Kconfig | 1 - drivers/nvme/host/core.c | 17 ++- drivers/nvme/target/Kconfig | 1 - drivers/nvme/target/io-cmd-bdev.c | 16 +-- drivers/scsi/Kconfig | 1 - drivers/scsi/sd_dif.c | 19 ++- drivers/target/target_core_iblock.c | 49 ++++---- include/linux/blk-integrity.h | 35 ++---- include/linux/blkdev.h | 9 +- include/linux/t10-pi.h | 8 -- 18 files changed, 215 insertions(+), 314 deletions(-) (limited to 'include') diff --git a/block/Kconfig b/block/Kconfig index dc12af58dbae..5b623b876d3b 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -62,6 +62,8 @@ config BLK_DEV_BSGLIB config BLK_DEV_INTEGRITY bool "Block layer data integrity support" + select CRC_T10DIF + select CRC64_ROCKSOFT help Some storage devices allow extra information to be stored/retrieved to help protect the data. The block layer @@ -72,12 +74,6 @@ config BLK_DEV_INTEGRITY T10/SCSI Data Integrity Field or the T13/ATA External Path Protection. If in doubt, say N. -config BLK_DEV_INTEGRITY_T10 - tristate - depends on BLK_DEV_INTEGRITY - select CRC_T10DIF - select CRC64_ROCKSOFT - config BLK_DEV_WRITE_MOUNTED bool "Allow writing to mounted block devices" default y diff --git a/block/Makefile b/block/Makefile index 168150b9c510..ddfd21c1a9ff 100644 --- a/block/Makefile +++ b/block/Makefile @@ -26,8 +26,7 @@ obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o obj-$(CONFIG_IOSCHED_BFQ) += bfq.o -obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o -obj-$(CONFIG_BLK_DEV_INTEGRITY_T10) += t10-pi.o +obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o diff --git a/block/bio-integrity.c b/block/bio-integrity.c index af7f71d16114..31dbc2853f92 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -378,10 +378,9 @@ EXPORT_SYMBOL_GPL(bio_integrity_map_user); * bio_integrity_process - Process integrity metadata for a bio * @bio: bio to generate/verify integrity metadata for * @proc_iter: iterator to process - * @proc_fn: Pointer to the relevant processing function */ static blk_status_t bio_integrity_process(struct bio *bio, - struct bvec_iter *proc_iter, integrity_processing_fn *proc_fn) + struct bvec_iter *proc_iter) { struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); struct blk_integrity_iter iter; @@ -392,17 +391,18 @@ static blk_status_t bio_integrity_process(struct bio *bio, iter.disk_name = bio->bi_bdev->bd_disk->disk_name; iter.interval = 1 << bi->interval_exp; - iter.tuple_size = bi->tuple_size; iter.seed = proc_iter->bi_sector; iter.prot_buf = bvec_virt(bip->bip_vec); - iter.pi_offset = bi->pi_offset; __bio_for_each_segment(bv, bio, bviter, *proc_iter) { void *kaddr = bvec_kmap_local(&bv); iter.data_buf = kaddr; iter.data_size = bv.bv_len; - ret = proc_fn(&iter); + if (bio_data_dir(bio) == WRITE) + blk_integrity_generate(&iter, bi); + else + ret = blk_integrity_verify(&iter, bi); kunmap_local(kaddr); if (ret) @@ -448,12 +448,10 @@ bool bio_integrity_prep(struct bio *bio) return true; if (bio_data_dir(bio) == READ) { - if (!bi->profile->verify_fn || - !(bi->flags & BLK_INTEGRITY_VERIFY)) + if (!(bi->flags & BLK_INTEGRITY_VERIFY)) return true; } else { - if (!bi->profile->generate_fn || - !(bi->flags & BLK_INTEGRITY_GENERATE)) + if (!(bi->flags & BLK_INTEGRITY_GENERATE)) return true; /* @@ -488,7 +486,7 @@ bool bio_integrity_prep(struct bio *bio) bip->bip_flags |= BIP_BLOCK_INTEGRITY; bip_set_seed(bip, bio->bi_iter.bi_sector); - if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM) + if (bi->csum_type == BLK_INTEGRITY_CSUM_IP) bip->bip_flags |= BIP_IP_CHECKSUM; /* Map it */ @@ -511,12 +509,10 @@ bool bio_integrity_prep(struct bio *bio) } /* Auto-generate integrity metadata if this is a write */ - if (bio_data_dir(bio) == WRITE) { - bio_integrity_process(bio, &bio->bi_iter, - bi->profile->generate_fn); - } else { + if (bio_data_dir(bio) == WRITE) + bio_integrity_process(bio, &bio->bi_iter); + else bip->bio_iter = bio->bi_iter; - } return true; err_end_io: @@ -539,15 +535,13 @@ static void bio_integrity_verify_fn(struct work_struct *work) struct bio_integrity_payload *bip = container_of(work, struct bio_integrity_payload, bip_work); struct bio *bio = bip->bip_bio; - struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); /* * At the moment verify is called bio's iterator was advanced * during split and completion, we need to rewind iterator to * it's original position. */ - bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, - bi->profile->verify_fn); + bio->bi_status = bio_integrity_process(bio, &bip->bio_iter); bio_integrity_free(bio); bio_endio(bio); } @@ -569,7 +563,7 @@ bool __bio_integrity_endio(struct bio *bio) struct bio_integrity_payload *bip = bio_integrity(bio); if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && - (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) { + (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->csum_type) { INIT_WORK(&bip->bip_work, bio_integrity_verify_fn); queue_work(kintegrityd_wq, &bip->bip_work); return false; diff --git a/block/blk-integrity.c b/block/blk-integrity.c index ccbeb6dfa87a..17d37badfbb8 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -123,10 +123,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) struct blk_integrity *b1 = &gd1->queue->integrity; struct blk_integrity *b2 = &gd2->queue->integrity; - if (!b1->profile && !b2->profile) + if (!b1->tuple_size && !b2->tuple_size) return 0; - if (!b1->profile || !b2->profile) + if (!b1->tuple_size || !b2->tuple_size) return -1; if (b1->interval_exp != b2->interval_exp) { @@ -150,10 +150,13 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) return -1; } - if (b1->profile != b2->profile) { + if (b1->csum_type != b2->csum_type || + (b1->flags & BLK_INTEGRITY_REF_TAG) != + (b2->flags & BLK_INTEGRITY_REF_TAG)) { pr_err("%s: %s/%s type %s != %s\n", __func__, gd1->disk_name, gd2->disk_name, - b1->profile->name, b2->profile->name); + blk_integrity_profile_name(b1), + blk_integrity_profile_name(b2)); return -1; } @@ -217,14 +220,37 @@ static inline struct blk_integrity *dev_to_bi(struct device *dev) return &dev_to_disk(dev)->queue->integrity; } +const char *blk_integrity_profile_name(struct blk_integrity *bi) +{ + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_IP: + if (bi->flags & BLK_INTEGRITY_REF_TAG) + return "T10-DIF-TYPE1-IP"; + return "T10-DIF-TYPE3-IP"; + case BLK_INTEGRITY_CSUM_CRC: + if (bi->flags & BLK_INTEGRITY_REF_TAG) + return "T10-DIF-TYPE1-CRC"; + return "T10-DIF-TYPE3-CRC"; + case BLK_INTEGRITY_CSUM_CRC64: + if (bi->flags & BLK_INTEGRITY_REF_TAG) + return "EXT-DIF-TYPE1-CRC64"; + return "EXT-DIF-TYPE3-CRC64"; + case BLK_INTEGRITY_CSUM_NONE: + break; + } + + return "nop"; +} +EXPORT_SYMBOL_GPL(blk_integrity_profile_name); + static ssize_t format_show(struct device *dev, struct device_attribute *attr, char *page) { struct blk_integrity *bi = dev_to_bi(dev); - if (bi->profile && bi->profile->name) - return sysfs_emit(page, "%s\n", bi->profile->name); - return sysfs_emit(page, "none\n"); + if (!bi->tuple_size) + return sysfs_emit(page, "none\n"); + return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi)); } static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr, @@ -326,28 +352,6 @@ const struct attribute_group blk_integrity_attr_group = { .attrs = integrity_attrs, }; -static blk_status_t blk_integrity_nop_fn(struct blk_integrity_iter *iter) -{ - return BLK_STS_OK; -} - -static void blk_integrity_nop_prepare(struct request *rq) -{ -} - -static void blk_integrity_nop_complete(struct request *rq, - unsigned int nr_bytes) -{ -} - -static const struct blk_integrity_profile nop_profile = { - .name = "nop", - .generate_fn = blk_integrity_nop_fn, - .verify_fn = blk_integrity_nop_fn, - .prepare_fn = blk_integrity_nop_prepare, - .complete_fn = blk_integrity_nop_complete, -}; - /** * blk_integrity_register - Register a gendisk as being integrity-capable * @disk: struct gendisk pointer to make integrity-aware @@ -363,11 +367,11 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template { struct blk_integrity *bi = &disk->queue->integrity; + bi->csum_type = template->csum_type; bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE | template->flags; bi->interval_exp = template->interval_exp ? : ilog2(queue_logical_block_size(disk->queue)); - bi->profile = template->profile ? template->profile : &nop_profile; bi->tuple_size = template->tuple_size; bi->tag_size = template->tag_size; bi->pi_offset = template->pi_offset; @@ -394,7 +398,7 @@ void blk_integrity_unregister(struct gendisk *disk) { struct blk_integrity *bi = &disk->queue->integrity; - if (!bi->profile) + if (!bi->tuple_size) return; /* ensure all bios are off the integrity workqueue */ diff --git a/block/blk-mq.c b/block/blk-mq.c index 3b4df8e5ac9e..0d4cd39c3d25 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -804,10 +804,8 @@ static void blk_complete_request(struct request *req) if (!bio) return; -#ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ) - req->q->integrity.profile->complete_fn(req, total_bytes); -#endif + blk_integrity_complete(req, total_bytes); /* * Upper layers may call blk_crypto_evict_key() anytime after the last @@ -875,11 +873,9 @@ bool blk_update_request(struct request *req, blk_status_t error, if (!req->bio) return false; -#ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(req) && req_op(req) == REQ_OP_READ && error == BLK_STS_OK) - req->q->integrity.profile->complete_fn(req, nr_bytes); -#endif + blk_integrity_complete(req, nr_bytes); /* * Upper layers may call blk_crypto_evict_key() anytime after the last @@ -1264,10 +1260,9 @@ void blk_mq_start_request(struct request *rq) WRITE_ONCE(rq->state, MQ_RQ_IN_FLIGHT); rq->mq_hctx->tags->rqs[rq->tag] = rq; -#ifdef CONFIG_BLK_DEV_INTEGRITY if (blk_integrity_rq(rq) && req_op(rq) == REQ_OP_WRITE) - q->integrity.profile->prepare_fn(rq); -#endif + blk_integrity_prepare(rq); + if (rq->bio && rq->bio->bi_opf & REQ_POLLED) WRITE_ONCE(rq->bio->bi_cookie, rq->mq_hctx->queue_num); } diff --git a/block/blk.h b/block/blk.h index 189bc25beb50..79e8d5d4fe0c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -9,6 +9,7 @@ #include #include "blk-crypto-internal.h" +struct blk_integrity_iter; struct elevator_type; /* Max future timer expiry for timeouts */ @@ -673,4 +674,11 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file); int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); +void blk_integrity_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi); +blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, + struct blk_integrity *bi); +void blk_integrity_prepare(struct request *rq); +void blk_integrity_complete(struct request *rq, unsigned int nr_bytes); + #endif /* BLK_INTERNAL_H */ diff --git a/block/t10-pi.c b/block/t10-pi.c index f4cc91156da1..dadecf621497 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -11,17 +11,14 @@ #include #include #include +#include "blk.h" -typedef __be16 (csum_fn) (__be16, void *, unsigned int); - -static __be16 t10_pi_crc_fn(__be16 crc, void *data, unsigned int len) -{ - return cpu_to_be16(crc_t10dif_update(be16_to_cpu(crc), data, len)); -} - -static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len) +static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len, + unsigned char csum_type) { - return (__force __be16)ip_compute_csum(data, len); + if (csum_type == BLK_INTEGRITY_CSUM_IP) + return (__force __be16)ip_compute_csum(data, len); + return cpu_to_be16(crc_t10dif_update(be16_to_cpu(csum), data, len)); } /* @@ -29,48 +26,44 @@ static __be16 t10_pi_ip_fn(__be16 csum, void *data, unsigned int len) * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref * tag. */ -static blk_status_t t10_pi_generate(struct blk_integrity_iter *iter, - csum_fn *fn, enum t10_dif_type type) +static void t10_pi_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; for (i = 0 ; i < iter->data_size ; i += iter->interval) { struct t10_pi_tuple *pi = iter->prot_buf + offset; - pi->guard_tag = fn(0, iter->data_buf, iter->interval); + pi->guard_tag = t10_pi_csum(0, iter->data_buf, iter->interval, + bi->csum_type); if (offset) - pi->guard_tag = fn(pi->guard_tag, iter->prot_buf, - offset); + pi->guard_tag = t10_pi_csum(pi->guard_tag, + iter->prot_buf, offset, bi->csum_type); pi->app_tag = 0; - if (type == T10_PI_TYPE1_PROTECTION) + if (bi->flags & BLK_INTEGRITY_REF_TAG) pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed)); else pi->ref_tag = 0; iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } - - return BLK_STS_OK; } static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, - csum_fn *fn, enum t10_dif_type type) + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; - BUG_ON(type == T10_PI_TYPE0_PROTECTION); - for (i = 0 ; i < iter->data_size ; i += iter->interval) { struct t10_pi_tuple *pi = iter->prot_buf + offset; __be16 csum; - if (type == T10_PI_TYPE1_PROTECTION || - type == T10_PI_TYPE2_PROTECTION) { + if (bi->flags & BLK_INTEGRITY_REF_TAG) { if (pi->app_tag == T10_PI_APP_ESCAPE) goto next; @@ -82,15 +75,17 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, iter->seed, be32_to_cpu(pi->ref_tag)); return BLK_STS_PROTECTION; } - } else if (type == T10_PI_TYPE3_PROTECTION) { + } else { if (pi->app_tag == T10_PI_APP_ESCAPE && pi->ref_tag == T10_PI_REF_ESCAPE) goto next; } - csum = fn(0, iter->data_buf, iter->interval); + csum = t10_pi_csum(0, iter->data_buf, iter->interval, + bi->csum_type); if (offset) - csum = fn(csum, iter->prot_buf, offset); + csum = t10_pi_csum(csum, iter->prot_buf, offset, + bi->csum_type); if (pi->guard_tag != csum) { pr_err("%s: guard tag error at sector %llu " \ @@ -102,33 +97,13 @@ static blk_status_t t10_pi_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } return BLK_STS_OK; } -static blk_status_t t10_pi_type1_generate_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t t10_pi_type1_generate_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t t10_pi_type1_verify_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t t10_pi_type1_verify_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE1_PROTECTION); -} - /** * t10_pi_type1_prepare - prepare PI prior submitting request to device * @rq: request with PI that should be prepared @@ -225,81 +200,15 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) } } -static blk_status_t t10_pi_type3_generate_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION); -} - -static blk_status_t t10_pi_type3_generate_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_generate(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION); -} - -static blk_status_t t10_pi_type3_verify_crc(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_crc_fn, T10_PI_TYPE3_PROTECTION); -} - -static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) -{ - return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION); -} - -/* Type 3 does not have a reference tag so no remapping is required. */ -static void t10_pi_type3_prepare(struct request *rq) -{ -} - -/* Type 3 does not have a reference tag so no remapping is required. */ -static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes) -{ -} - -const struct blk_integrity_profile t10_pi_type1_crc = { - .name = "T10-DIF-TYPE1-CRC", - .generate_fn = t10_pi_type1_generate_crc, - .verify_fn = t10_pi_type1_verify_crc, - .prepare_fn = t10_pi_type1_prepare, - .complete_fn = t10_pi_type1_complete, -}; -EXPORT_SYMBOL(t10_pi_type1_crc); - -const struct blk_integrity_profile t10_pi_type1_ip = { - .name = "T10-DIF-TYPE1-IP", - .generate_fn = t10_pi_type1_generate_ip, - .verify_fn = t10_pi_type1_verify_ip, - .prepare_fn = t10_pi_type1_prepare, - .complete_fn = t10_pi_type1_complete, -}; -EXPORT_SYMBOL(t10_pi_type1_ip); - -const struct blk_integrity_profile t10_pi_type3_crc = { - .name = "T10-DIF-TYPE3-CRC", - .generate_fn = t10_pi_type3_generate_crc, - .verify_fn = t10_pi_type3_verify_crc, - .prepare_fn = t10_pi_type3_prepare, - .complete_fn = t10_pi_type3_complete, -}; -EXPORT_SYMBOL(t10_pi_type3_crc); - -const struct blk_integrity_profile t10_pi_type3_ip = { - .name = "T10-DIF-TYPE3-IP", - .generate_fn = t10_pi_type3_generate_ip, - .verify_fn = t10_pi_type3_verify_ip, - .prepare_fn = t10_pi_type3_prepare, - .complete_fn = t10_pi_type3_complete, -}; -EXPORT_SYMBOL(t10_pi_type3_ip); - static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len) { return cpu_to_be64(crc64_rocksoft_update(crc, data, len)); } -static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter, - enum t10_dif_type type) +static void ext_pi_crc64_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; for (i = 0 ; i < iter->data_size ; i += iter->interval) { @@ -311,17 +220,15 @@ static blk_status_t ext_pi_crc64_generate(struct blk_integrity_iter *iter, iter->prot_buf, offset); pi->app_tag = 0; - if (type == T10_PI_TYPE1_PROTECTION) + if (bi->flags & BLK_INTEGRITY_REF_TAG) put_unaligned_be48(iter->seed, pi->ref_tag); else put_unaligned_be48(0ULL, pi->ref_tag); iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } - - return BLK_STS_OK; } static bool ext_pi_ref_escape(u8 *ref_tag) @@ -332,9 +239,9 @@ static bool ext_pi_ref_escape(u8 *ref_tag) } static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, - enum t10_dif_type type) + struct blk_integrity *bi) { - u8 offset = iter->pi_offset; + u8 offset = bi->pi_offset; unsigned int i; for (i = 0; i < iter->data_size; i += iter->interval) { @@ -342,7 +249,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, u64 ref, seed; __be64 csum; - if (type == T10_PI_TYPE1_PROTECTION) { + if (bi->flags & BLK_INTEGRITY_REF_TAG) { if (pi->app_tag == T10_PI_APP_ESCAPE) goto next; @@ -353,7 +260,7 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, iter->disk_name, seed, ref); return BLK_STS_PROTECTION; } - } else if (type == T10_PI_TYPE3_PROTECTION) { + } else { if (pi->app_tag == T10_PI_APP_ESCAPE && ext_pi_ref_escape(pi->ref_tag)) goto next; @@ -374,23 +281,13 @@ static blk_status_t ext_pi_crc64_verify(struct blk_integrity_iter *iter, next: iter->data_buf += iter->interval; - iter->prot_buf += iter->tuple_size; + iter->prot_buf += bi->tuple_size; iter->seed++; } return BLK_STS_OK; } -static blk_status_t ext_pi_type1_verify_crc64(struct blk_integrity_iter *iter) -{ - return ext_pi_crc64_verify(iter, T10_PI_TYPE1_PROTECTION); -} - -static blk_status_t ext_pi_type1_generate_crc64(struct blk_integrity_iter *iter) -{ - return ext_pi_crc64_generate(iter, T10_PI_TYPE1_PROTECTION); -} - static void ext_pi_type1_prepare(struct request *rq) { struct blk_integrity *bi = &rq->q->integrity; @@ -467,33 +364,61 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) } } -static blk_status_t ext_pi_type3_verify_crc64(struct blk_integrity_iter *iter) +void blk_integrity_generate(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - return ext_pi_crc64_verify(iter, T10_PI_TYPE3_PROTECTION); + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + ext_pi_crc64_generate(iter, bi); + break; + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + t10_pi_generate(iter, bi); + break; + default: + break; + } } -static blk_status_t ext_pi_type3_generate_crc64(struct blk_integrity_iter *iter) +blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, + struct blk_integrity *bi) { - return ext_pi_crc64_generate(iter, T10_PI_TYPE3_PROTECTION); + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_CRC64: + return ext_pi_crc64_verify(iter, bi); + case BLK_INTEGRITY_CSUM_CRC: + case BLK_INTEGRITY_CSUM_IP: + return t10_pi_verify(iter, bi); + default: + return BLK_STS_OK; + } } -const struct blk_integrity_profile ext_pi_type1_crc64 = { - .name = "EXT-DIF-TYPE1-CRC64", - .generate_fn = ext_pi_type1_generate_crc64, - .verify_fn = ext_pi_type1_verify_crc64, - .prepare_fn = ext_pi_type1_prepare, - .complete_fn = ext_pi_type1_complete, -}; -EXPORT_SYMBOL_GPL(ext_pi_type1_crc64); - -const struct blk_integrity_profile ext_pi_type3_crc64 = { - .name = "EXT-DIF-TYPE3-CRC64", - .generate_fn = ext_pi_type3_generate_crc64, - .verify_fn = ext_pi_type3_verify_crc64, - .prepare_fn = t10_pi_type3_prepare, - .complete_fn = t10_pi_type3_complete, -}; -EXPORT_SYMBOL_GPL(ext_pi_type3_crc64); +void blk_integrity_prepare(struct request *rq) +{ + struct blk_integrity *bi = &rq->q->integrity; + + if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) + return; + + if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) + ext_pi_type1_prepare(rq); + else + t10_pi_type1_prepare(rq); +} + +void blk_integrity_complete(struct request *rq, unsigned int nr_bytes) +{ + struct blk_integrity *bi = &rq->q->integrity; + + if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) + return; + + if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC64) + ext_pi_type1_complete(rq, nr_bytes); + else + t10_pi_type1_complete(rq, nr_bytes); +} MODULE_DESCRIPTION("T10 Protection Information module"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 1dfc462f29cd..6c013ceb0e5f 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1177,7 +1177,7 @@ static int crypt_integrity_ctr(struct crypt_config *cc, struct dm_target *ti) struct mapped_device *md = dm_table_get_md(ti->table); /* We require an underlying device with non-PI metadata */ - if (!bi || strcmp(bi->profile->name, "nop")) { + if (!bi || bi->csum_type != BLK_INTEGRITY_CSUM_NONE) { ti->error = "Integrity profile not supported."; return -EINVAL; } diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index b309c8be720f..a3caef75aa0a 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only config NVME_CORE tristate - select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY config BLK_DEV_NVME tristate "NVM Express block device" diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f5d150c62955..14bac248cde4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1744,17 +1744,16 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE3: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.profile = &t10_pi_type3_crc; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; integrity.tag_size = sizeof(u16) + sizeof(u32); integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; case NVME_NVM_NS_64B_GUARD: - integrity.profile = &ext_pi_type3_crc64; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; integrity.tag_size = sizeof(u16) + 6; integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; default: - integrity.profile = NULL; break; } break; @@ -1762,22 +1761,22 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE2: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.profile = &t10_pi_type1_crc; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; case NVME_NVM_NS_64B_GUARD: - integrity.profile = &ext_pi_type1_crc64; + integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; default: - integrity.profile = NULL; break; } break; default: - integrity.profile = NULL; break; } diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index 872dd1a0acd8..c42aec41cc7b 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -6,7 +6,6 @@ config NVME_TARGET depends on CONFIGFS_FS select NVME_KEYRING if NVME_TARGET_TCP_TLS select KEYS if NVME_TARGET_TCP_TLS - select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help This enabled target side support for the NVMe protocol, that is diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 6426aac2634a..b628bc5ee998 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -61,15 +61,17 @@ static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) { struct blk_integrity *bi = bdev_get_integrity(ns->bdev); - if (bi) { + if (!bi) + return; + + if (bi->csum_type == BLK_INTEGRITY_CSUM_CRC) { ns->metadata_size = bi->tuple_size; - if (bi->profile == &t10_pi_type1_crc) + if (bi->flags & BLK_INTEGRITY_REF_TAG) ns->pi_type = NVME_NS_DPS_PI_TYPE1; - else if (bi->profile == &t10_pi_type3_crc) - ns->pi_type = NVME_NS_DPS_PI_TYPE3; else - /* Unsupported metadata type */ - ns->metadata_size = 0; + ns->pi_type = NVME_NS_DPS_PI_TYPE3; + } else { + ns->metadata_size = 0; } } @@ -102,7 +104,7 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) ns->pi_type = 0; ns->metadata_size = 0; - if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) nvmet_bdev_ns_enable_integrity(ns); if (bdev_is_zoned(ns->bdev)) { diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 065db86d6021..37c24ffea65c 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -82,7 +82,6 @@ comment "SCSI support type (disk, tape, CD-ROM)" config BLK_DEV_SD tristate "SCSI disk support" depends on SCSI - select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY help If you want to use SCSI hard disks, Fibre Channel disks, Serial ATA (SATA) or Parallel ATA (PATA) hard disks, diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 1df847b5f747..6f0921c7db78 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -47,18 +47,13 @@ void sd_dif_config_host(struct scsi_disk *sdkp) memset(&bi, 0, sizeof(bi)); /* Enable DMA of protection information */ - if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) { - if (type == T10_PI_TYPE3_PROTECTION) - bi.profile = &t10_pi_type3_ip; - else - bi.profile = &t10_pi_type1_ip; + if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) + bi.csum_type = BLK_INTEGRITY_CSUM_IP; + else + bi.csum_type = BLK_INTEGRITY_CSUM_CRC; - bi.flags |= BLK_INTEGRITY_IP_CHECKSUM; - } else - if (type == T10_PI_TYPE3_PROTECTION) - bi.profile = &t10_pi_type3_crc; - else - bi.profile = &t10_pi_type1_crc; + if (type != T10_PI_TYPE3_PROTECTION) + bi.flags |= BLK_INTEGRITY_REF_TAG; bi.tuple_size = sizeof(struct t10_pi_tuple); @@ -76,7 +71,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp) sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIX %s, application tag size %u bytes\n", - bi.profile->name, bi.tag_size); + blk_integrity_profile_name(&bi), bi.tag_size); out: blk_integrity_register(disk, &bi); } diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 7f6ca8177845..a3e09adc4e76 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -148,35 +148,38 @@ static int iblock_configure_device(struct se_device *dev) dev->dev_attrib.is_nonrot = 1; bi = bdev_get_integrity(bd); - if (bi) { - struct bio_set *bs = &ib_dev->ibd_bio_set; - - if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-IP") || - !strcmp(bi->profile->name, "T10-DIF-TYPE1-IP")) { - pr_err("IBLOCK export of blk_integrity: %s not" - " supported\n", bi->profile->name); - ret = -ENOSYS; - goto out_blkdev_put; - } + if (!bi) + return 0; - if (!strcmp(bi->profile->name, "T10-DIF-TYPE3-CRC")) { - dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT; - } else if (!strcmp(bi->profile->name, "T10-DIF-TYPE1-CRC")) { + switch (bi->csum_type) { + case BLK_INTEGRITY_CSUM_IP: + pr_err("IBLOCK export of blk_integrity: %s not supported\n", + blk_integrity_profile_name(bi)); + ret = -ENOSYS; + goto out_blkdev_put; + case BLK_INTEGRITY_CSUM_CRC: + if (bi->flags & BLK_INTEGRITY_REF_TAG) dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE1_PROT; - } + else + dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT; + break; + default: + break; + } - if (dev->dev_attrib.pi_prot_type) { - if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) { - pr_err("Unable to allocate bioset for PI\n"); - ret = -ENOMEM; - goto out_blkdev_put; - } - pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n", - &bs->bio_integrity_pool); + if (dev->dev_attrib.pi_prot_type) { + struct bio_set *bs = &ib_dev->ibd_bio_set; + + if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) < 0) { + pr_err("Unable to allocate bioset for PI\n"); + ret = -ENOMEM; + goto out_blkdev_put; } - dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type; + pr_debug("IBLOCK setup BIP bs->bio_integrity_pool: %p\n", + &bs->bio_integrity_pool); } + dev->dev_attrib.hw_pi_prot_type = dev->dev_attrib.pi_prot_type; return 0; out_blkdev_put: diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 7428cb43952d..56ce1ae35580 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -10,7 +10,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_VERIFY = 1 << 0, BLK_INTEGRITY_GENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, - BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, + BLK_INTEGRITY_REF_TAG = 1 << 3, }; struct blk_integrity_iter { @@ -19,22 +19,10 @@ struct blk_integrity_iter { sector_t seed; unsigned int data_size; unsigned short interval; - unsigned char tuple_size; - unsigned char pi_offset; const char *disk_name; }; -typedef blk_status_t (integrity_processing_fn) (struct blk_integrity_iter *); -typedef void (integrity_prepare_fn) (struct request *); -typedef void (integrity_complete_fn) (struct request *, unsigned int); - -struct blk_integrity_profile { - integrity_processing_fn *generate_fn; - integrity_processing_fn *verify_fn; - integrity_prepare_fn *prepare_fn; - integrity_complete_fn *complete_fn; - const char *name; -}; +const char *blk_integrity_profile_name(struct blk_integrity *bi); #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_integrity_register(struct gendisk *, struct blk_integrity *); @@ -44,14 +32,17 @@ int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +static inline bool +blk_integrity_queue_supports_integrity(struct request_queue *q) { - struct blk_integrity *bi = &disk->queue->integrity; + return q->integrity.tuple_size; +} - if (!bi->profile) +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +{ + if (!blk_integrity_queue_supports_integrity(disk->queue)) return NULL; - - return bi; + return &disk->queue->integrity; } static inline struct blk_integrity * @@ -60,12 +51,6 @@ bdev_get_integrity(struct block_device *bdev) return blk_get_integrity(bdev->bd_disk); } -static inline bool -blk_integrity_queue_supports_integrity(struct request_queue *q) -{ - return q->integrity.profile; -} - static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ac8e0cb2353a..bdd33388e1ce 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -105,9 +105,16 @@ enum { struct disk_events; struct badblocks; +enum blk_integrity_checksum { + BLK_INTEGRITY_CSUM_NONE = 0, + BLK_INTEGRITY_CSUM_IP = 1, + BLK_INTEGRITY_CSUM_CRC = 2, + BLK_INTEGRITY_CSUM_CRC64 = 3, +} __packed ; + struct blk_integrity { - const struct blk_integrity_profile *profile; unsigned char flags; + enum blk_integrity_checksum csum_type; unsigned char tuple_size; unsigned char pi_offset; unsigned char interval_exp; diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 248f4ac95642..d2bafb76badf 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -48,11 +48,6 @@ static inline u32 t10_pi_ref_tag(struct request *rq) return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } -extern const struct blk_integrity_profile t10_pi_type1_crc; -extern const struct blk_integrity_profile t10_pi_type1_ip; -extern const struct blk_integrity_profile t10_pi_type3_crc; -extern const struct blk_integrity_profile t10_pi_type3_ip; - struct crc64_pi_tuple { __be64 guard_tag; __be16 app_tag; @@ -79,7 +74,4 @@ static inline u64 ext_pi_ref_tag(struct request *rq) return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -extern const struct blk_integrity_profile ext_pi_type1_crc64; -extern const struct blk_integrity_profile ext_pi_type3_crc64; - #endif -- cgit v1.2.3 From 3c3e85ddffae93eba1a257eb6939bf5dc1e93b9e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:20 +0200 Subject: block: bypass the STABLE_WRITES flag for protection information Currently registering a checksum-enabled (aka PI) integrity profile sets the QUEUE_FLAG_STABLE_WRITE flag, and unregistering it clears the flag. This can incorrectly clear the flag when the driver requires stable writes even without PI, e.g. in case of iSCSI or NVMe/TCP with data digest enabled. Fix this by looking at the csum_type directly in bdev_stable_writes and not setting the queue flag. Also remove the blk_queue_stable_writes helper as the only user in nvme wants to only look at the actual QUEUE_FLAG_STABLE_WRITE flag as it inherits the integrity configuration by other means. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20240613084839.1044015-11-hch@lst.de Signed-off-by: Jens Axboe --- block/blk-integrity.c | 6 ------ drivers/nvme/host/multipath.c | 3 ++- include/linux/blkdev.h | 12 ++++++++---- 3 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 1d2d371cd632..bec0d1df387c 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -379,9 +379,6 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template bi->tag_size = template->tag_size; bi->pi_offset = template->pi_offset; - if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE) - blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, disk->queue); - #ifdef CONFIG_BLK_INLINE_ENCRYPTION if (disk->queue->crypto_profile) { pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); @@ -404,9 +401,6 @@ void blk_integrity_unregister(struct gendisk *disk) if (!bi->tuple_size) return; - - if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE) - blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, disk->queue); memset(bi, 0, sizeof(*bi)); } EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d8b6b4648eaf..12c59db02539 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -875,7 +875,8 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) nvme_mpath_set_live(ns); } - if (blk_queue_stable_writes(ns->queue) && ns->head->disk) + if (test_bit(QUEUE_FLAG_STABLE_WRITES, &ns->queue->queue_flags) && + ns->head->disk) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->head->disk->queue); #ifdef CONFIG_BLK_DEV_ZONED diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bdd33388e1ce..f9089750919c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -571,8 +571,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_noxmerges(q) \ test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) -#define blk_queue_stable_writes(q) \ - test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ @@ -1300,8 +1298,14 @@ static inline bool bdev_synchronous(struct block_device *bdev) static inline bool bdev_stable_writes(struct block_device *bdev) { - return test_bit(QUEUE_FLAG_STABLE_WRITES, - &bdev_get_queue(bdev)->queue_flags); + struct request_queue *q = bdev_get_queue(bdev); + +#ifdef CONFIG_BLK_DEV_INTEGRITY + /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ + if (q->integrity.csum_type != 0) + return true; +#endif + return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } static inline bool bdev_write_cache(struct block_device *bdev) -- cgit v1.2.3 From 9f4aa46f2a7401025d8561495cf8740f773310fc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:21 +0200 Subject: block: invert the BLK_INTEGRITY_{GENERATE,VERIFY} flags Invert the flags so that user set values will be able to persist revalidating the integrity information once we switch the integrity information to queue_limits. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-12-hch@lst.de Signed-off-by: Jens Axboe --- block/bio-integrity.c | 4 ++-- block/blk-integrity.c | 18 +++++++++--------- include/linux/blk-integrity.h | 4 ++-- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 31dbc2853f92..173ffd4d6237 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -448,10 +448,10 @@ bool bio_integrity_prep(struct bio *bio) return true; if (bio_data_dir(bio) == READ) { - if (!(bi->flags & BLK_INTEGRITY_VERIFY)) + if (bi->flags & BLK_INTEGRITY_NOVERIFY) return true; } else { - if (!(bi->flags & BLK_INTEGRITY_GENERATE)) + if (bi->flags & BLK_INTEGRITY_NOGENERATE) return true; /* diff --git a/block/blk-integrity.c b/block/blk-integrity.c index bec0d1df387c..b37b8855eed1 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -254,10 +254,11 @@ static ssize_t flag_store(struct device *dev, struct device_attribute *attr, if (err) return err; + /* the flags are inverted vs the values in the sysfs files */ if (val) - bi->flags |= flag; - else bi->flags &= ~flag; + else + bi->flags |= flag; return count; } @@ -266,7 +267,7 @@ static ssize_t flag_show(struct device *dev, struct device_attribute *attr, { struct blk_integrity *bi = dev_to_bi(dev); - return sysfs_emit(page, "%d\n", !!(bi->flags & flag)); + return sysfs_emit(page, "%d\n", !(bi->flags & flag)); } static ssize_t format_show(struct device *dev, struct device_attribute *attr, @@ -301,26 +302,26 @@ static ssize_t read_verify_store(struct device *dev, struct device_attribute *attr, const char *page, size_t count) { - return flag_store(dev, attr, page, count, BLK_INTEGRITY_VERIFY); + return flag_store(dev, attr, page, count, BLK_INTEGRITY_NOVERIFY); } static ssize_t read_verify_show(struct device *dev, struct device_attribute *attr, char *page) { - return flag_show(dev, attr, page, BLK_INTEGRITY_VERIFY); + return flag_show(dev, attr, page, BLK_INTEGRITY_NOVERIFY); } static ssize_t write_generate_store(struct device *dev, struct device_attribute *attr, const char *page, size_t count) { - return flag_store(dev, attr, page, count, BLK_INTEGRITY_GENERATE); + return flag_store(dev, attr, page, count, BLK_INTEGRITY_NOGENERATE); } static ssize_t write_generate_show(struct device *dev, struct device_attribute *attr, char *page) { - return flag_show(dev, attr, page, BLK_INTEGRITY_GENERATE); + return flag_show(dev, attr, page, BLK_INTEGRITY_NOGENERATE); } static ssize_t device_is_integrity_capable_show(struct device *dev, @@ -371,8 +372,7 @@ void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template struct blk_integrity *bi = &disk->queue->integrity; bi->csum_type = template->csum_type; - bi->flags = BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE | - template->flags; + bi->flags = template->flags; bi->interval_exp = template->interval_exp ? : ilog2(queue_logical_block_size(disk->queue)); bi->tuple_size = template->tuple_size; diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index 56ce1ae35580..bafa01d4e7f9 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -7,8 +7,8 @@ struct request; enum blk_integrity_flags { - BLK_INTEGRITY_VERIFY = 1 << 0, - BLK_INTEGRITY_GENERATE = 1 << 1, + BLK_INTEGRITY_NOVERIFY = 1 << 0, + BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_REF_TAG = 1 << 3, }; -- cgit v1.2.3 From c6e56cf6b2e79a463af21286ba951714ed20828c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jun 2024 10:48:22 +0200 Subject: block: move integrity information into queue_limits Move the integrity information into the queue limits so that it can be set atomically with other queue limits, and that the sysfs changes to the read_verify and write_generate flags are properly synchronized. This also allows to provide a more useful helper to stack the integrity fields, although it still is separate from the main stacking function as not all stackable devices want to inherit the integrity settings. Even with that it greatly simplifies the code in md and dm. Note that the integrity field is moved as-is into the queue limits. While there are good arguments for removing the separate blk_integrity structure, this would cause a lot of churn and might better be done at a later time if desired. However the integrity field in the queue_limits structure is now unconditional so that various ifdefs can be avoided or replaced with IS_ENABLED(). Given that tiny size of it that seems like a worthwhile trade off. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20240613084839.1044015-13-hch@lst.de Signed-off-by: Jens Axboe --- Documentation/block/data-integrity.rst | 49 +--------- block/blk-integrity.c | 124 +++---------------------- block/blk-settings.c | 118 ++++++++++++++++++++++-- block/t10-pi.c | 12 +-- drivers/md/dm-core.h | 1 - drivers/md/dm-integrity.c | 27 +++--- drivers/md/dm-table.c | 161 ++++++--------------------------- drivers/md/md.c | 72 ++++----------- drivers/md/md.h | 5 +- drivers/md/raid0.c | 7 +- drivers/md/raid1.c | 10 +- drivers/md/raid10.c | 10 +- drivers/md/raid5.c | 2 +- drivers/nvdimm/btt.c | 13 +-- drivers/nvme/host/core.c | 70 +++++++------- drivers/scsi/sd.c | 8 +- drivers/scsi/sd.h | 12 +-- drivers/scsi/sd_dif.c | 34 +++---- include/linux/blk-integrity.h | 27 +++--- include/linux/blkdev.h | 12 +-- include/linux/t10-pi.h | 12 +-- 21 files changed, 289 insertions(+), 497 deletions(-) (limited to 'include') diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst index 6a760c0eb192..99905e880a0e 100644 --- a/Documentation/block/data-integrity.rst +++ b/Documentation/block/data-integrity.rst @@ -153,18 +153,11 @@ bio_free() will automatically free the bip. 4.2 Block Device ---------------- -Because the format of the protection data is tied to the physical -disk, each block device has been extended with a block integrity -profile (struct blk_integrity). This optional profile is registered -with the block layer using blk_integrity_register(). - -The profile contains callback functions for generating and verifying -the protection data, as well as getting and setting application tags. -The profile also contains a few constants to aid in completing, -merging and splitting the integrity metadata. +Block devices can set up the integrity information in the integrity +sub-struture of the queue_limits structure. Layered block devices will need to pick a profile that's appropriate -for all subdevices. blk_integrity_compare() can help with that. DM +for all subdevices. queue_limits_stack_integrity() can help with that. DM and MD linear, RAID0 and RAID1 are currently supported. RAID4/5/6 will require extra work due to the application tag. @@ -250,42 +243,6 @@ will require extra work due to the application tag. integrity upon completion. -5.4 Registering A Block Device As Capable Of Exchanging Integrity Metadata --------------------------------------------------------------------------- - - To enable integrity exchange on a block device the gendisk must be - registered as capable: - - `int blk_integrity_register(gendisk, blk_integrity);` - - The blk_integrity struct is a template and should contain the - following:: - - static struct blk_integrity my_profile = { - .name = "STANDARDSBODY-TYPE-VARIANT-CSUM", - .generate_fn = my_generate_fn, - .verify_fn = my_verify_fn, - .tuple_size = sizeof(struct my_tuple_size), - .tag_size = , - }; - - 'name' is a text string which will be visible in sysfs. This is - part of the userland API so chose it carefully and never change - it. The format is standards body-type-variant. - E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC. - - 'generate_fn' generates appropriate integrity metadata (for WRITE). - - 'verify_fn' verifies that the data buffer matches the integrity - metadata. - - 'tuple_size' must be set to match the size of the integrity - metadata per sector. I.e. 8 for DIF and EPP. - - 'tag_size' must be set to identify how many bytes of tag space - are available per hardware sector. For DIF this is either 2 or - 0 depending on the value of the Control Mode Page ATO bit. - ---------------------------------------------------------------------- 2007-12-24 Martin K. Petersen diff --git a/block/blk-integrity.c b/block/blk-integrity.c index b37b8855eed1..05a48689a424 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -107,63 +107,6 @@ new_segment: } EXPORT_SYMBOL(blk_rq_map_integrity_sg); -/** - * blk_integrity_compare - Compare integrity profile of two disks - * @gd1: Disk to compare - * @gd2: Disk to compare - * - * Description: Meta-devices like DM and MD need to verify that all - * sub-devices use the same integrity format before advertising to - * upper layers that they can send/receive integrity metadata. This - * function can be used to check whether two gendisk devices have - * compatible integrity formats. - */ -int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2) -{ - struct blk_integrity *b1 = &gd1->queue->integrity; - struct blk_integrity *b2 = &gd2->queue->integrity; - - if (!b1->tuple_size && !b2->tuple_size) - return 0; - - if (!b1->tuple_size || !b2->tuple_size) - return -1; - - if (b1->interval_exp != b2->interval_exp) { - pr_err("%s: %s/%s protection interval %u != %u\n", - __func__, gd1->disk_name, gd2->disk_name, - 1 << b1->interval_exp, 1 << b2->interval_exp); - return -1; - } - - if (b1->tuple_size != b2->tuple_size) { - pr_err("%s: %s/%s tuple sz %u != %u\n", __func__, - gd1->disk_name, gd2->disk_name, - b1->tuple_size, b2->tuple_size); - return -1; - } - - if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { - pr_err("%s: %s/%s tag sz %u != %u\n", __func__, - gd1->disk_name, gd2->disk_name, - b1->tag_size, b2->tag_size); - return -1; - } - - if (b1->csum_type != b2->csum_type || - (b1->flags & BLK_INTEGRITY_REF_TAG) != - (b2->flags & BLK_INTEGRITY_REF_TAG)) { - pr_err("%s: %s/%s type %s != %s\n", __func__, - gd1->disk_name, gd2->disk_name, - blk_integrity_profile_name(b1), - blk_integrity_profile_name(b2)); - return -1; - } - - return 0; -} -EXPORT_SYMBOL(blk_integrity_compare); - bool blk_integrity_merge_rq(struct request_queue *q, struct request *req, struct request *next) { @@ -217,7 +160,7 @@ bool blk_integrity_merge_bio(struct request_queue *q, struct request *req, static inline struct blk_integrity *dev_to_bi(struct device *dev) { - return &dev_to_disk(dev)->queue->integrity; + return &dev_to_disk(dev)->queue->limits.integrity; } const char *blk_integrity_profile_name(struct blk_integrity *bi) @@ -246,7 +189,8 @@ EXPORT_SYMBOL_GPL(blk_integrity_profile_name); static ssize_t flag_store(struct device *dev, struct device_attribute *attr, const char *page, size_t count, unsigned char flag) { - struct blk_integrity *bi = dev_to_bi(dev); + struct request_queue *q = dev_to_disk(dev)->queue; + struct queue_limits lim; unsigned long val; int err; @@ -254,11 +198,18 @@ static ssize_t flag_store(struct device *dev, struct device_attribute *attr, if (err) return err; - /* the flags are inverted vs the values in the sysfs files */ + /* note that the flags are inverted vs the values in the sysfs files */ + lim = queue_limits_start_update(q); if (val) - bi->flags &= ~flag; + lim.integrity.flags &= ~flag; else - bi->flags |= flag; + lim.integrity.flags |= flag; + + blk_mq_freeze_queue(q); + err = queue_limits_commit_update(q, &lim); + blk_mq_unfreeze_queue(q); + if (err) + return err; return count; } @@ -355,52 +306,3 @@ const struct attribute_group blk_integrity_attr_group = { .name = "integrity", .attrs = integrity_attrs, }; - -/** - * blk_integrity_register - Register a gendisk as being integrity-capable - * @disk: struct gendisk pointer to make integrity-aware - * @template: block integrity profile to register - * - * Description: When a device needs to advertise itself as being able to - * send/receive integrity metadata it must use this function to register - * the capability with the block layer. The template is a blk_integrity - * struct with values appropriate for the underlying hardware. See - * Documentation/block/data-integrity.rst. - */ -void blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - bi->csum_type = template->csum_type; - bi->flags = template->flags; - bi->interval_exp = template->interval_exp ? : - ilog2(queue_logical_block_size(disk->queue)); - bi->tuple_size = template->tuple_size; - bi->tag_size = template->tag_size; - bi->pi_offset = template->pi_offset; - -#ifdef CONFIG_BLK_INLINE_ENCRYPTION - if (disk->queue->crypto_profile) { - pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together. Disabling hardware inline encryption.\n"); - disk->queue->crypto_profile = NULL; - } -#endif -} -EXPORT_SYMBOL(blk_integrity_register); - -/** - * blk_integrity_unregister - Unregister block integrity profile - * @disk: disk whose integrity profile to unregister - * - * Description: This function unregisters the integrity capability from - * a block device. - */ -void blk_integrity_unregister(struct gendisk *disk) -{ - struct blk_integrity *bi = &disk->queue->integrity; - - if (!bi->tuple_size) - return; - memset(bi, 0, sizeof(*bi)); -} -EXPORT_SYMBOL(blk_integrity_unregister); diff --git a/block/blk-settings.c b/block/blk-settings.c index 996f247fc98e..f11c8676eb4c 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -97,6 +97,36 @@ static int blk_validate_zoned_limits(struct queue_limits *lim) return 0; } +static int blk_validate_integrity_limits(struct queue_limits *lim) +{ + struct blk_integrity *bi = &lim->integrity; + + if (!bi->tuple_size) { + if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE || + bi->tag_size || ((bi->flags & BLK_INTEGRITY_REF_TAG))) { + pr_warn("invalid PI settings.\n"); + return -EINVAL; + } + return 0; + } + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { + pr_warn("integrity support disabled.\n"); + return -EINVAL; + } + + if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE && + (bi->flags & BLK_INTEGRITY_REF_TAG)) { + pr_warn("ref tag not support without checksum.\n"); + return -EINVAL; + } + + if (!bi->interval_exp) + bi->interval_exp = ilog2(lim->logical_block_size); + + return 0; +} + /* * Check that the limits in lim are valid, initialize defaults for unset * values, and cap values based on others where needed. @@ -105,6 +135,7 @@ static int blk_validate_limits(struct queue_limits *lim) { unsigned int max_hw_sectors; unsigned int logical_block_sectors; + int err; /* * Unless otherwise specified, default to 512 byte logical blocks and a @@ -230,6 +261,9 @@ static int blk_validate_limits(struct queue_limits *lim) lim->misaligned = 0; } + err = blk_validate_integrity_limits(lim); + if (err) + return err; return blk_validate_zoned_limits(lim); } @@ -263,13 +297,24 @@ int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim) __releases(q->limits_lock) { - int error = blk_validate_limits(lim); + int error; - if (!error) { - q->limits = *lim; - if (q->disk) - blk_apply_bdi_limits(q->disk->bdi, lim); + error = blk_validate_limits(lim); + if (error) + goto out_unlock; + +#ifdef CONFIG_BLK_INLINE_ENCRYPTION + if (q->crypto_profile && lim->integrity.tag_size) { + pr_warn("blk-integrity: Integrity and hardware inline encryption are not supported together.\n"); + error = -EINVAL; + goto out_unlock; } +#endif + + q->limits = *lim; + if (q->disk) + blk_apply_bdi_limits(q->disk->bdi, lim); +out_unlock: mutex_unlock(&q->limits_lock); return error; } @@ -575,6 +620,67 @@ void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev, } EXPORT_SYMBOL_GPL(queue_limits_stack_bdev); +/** + * queue_limits_stack_integrity - stack integrity profile + * @t: target queue limits + * @b: base queue limits + * + * Check if the integrity profile in the @b can be stacked into the + * target @t. Stacking is possible if either: + * + * a) does not have any integrity information stacked into it yet + * b) the integrity profile in @b is identical to the one in @t + * + * If @b can be stacked into @t, return %true. Else return %false and clear the + * integrity information in @t. + */ +bool queue_limits_stack_integrity(struct queue_limits *t, + struct queue_limits *b) +{ + struct blk_integrity *ti = &t->integrity; + struct blk_integrity *bi = &b->integrity; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return true; + + if (!ti->tuple_size) { + /* inherit the settings from the first underlying device */ + if (!(ti->flags & BLK_INTEGRITY_STACKED)) { + ti->flags = BLK_INTEGRITY_DEVICE_CAPABLE | + (bi->flags & BLK_INTEGRITY_REF_TAG); + ti->csum_type = bi->csum_type; + ti->tuple_size = bi->tuple_size; + ti->pi_offset = bi->pi_offset; + ti->interval_exp = bi->interval_exp; + ti->tag_size = bi->tag_size; + goto done; + } + if (!bi->tuple_size) + goto done; + } + + if (ti->tuple_size != bi->tuple_size) + goto incompatible; + if (ti->interval_exp != bi->interval_exp) + goto incompatible; + if (ti->tag_size != bi->tag_size) + goto incompatible; + if (ti->csum_type != bi->csum_type) + goto incompatible; + if ((ti->flags & BLK_INTEGRITY_REF_TAG) != + (bi->flags & BLK_INTEGRITY_REF_TAG)) + goto incompatible; + +done: + ti->flags |= BLK_INTEGRITY_STACKED; + return true; + +incompatible: + memset(ti, 0, sizeof(*ti)); + return false; +} +EXPORT_SYMBOL_GPL(queue_limits_stack_integrity); + /** * blk_queue_update_dma_pad - update pad mask * @q: the request queue for the device diff --git a/block/t10-pi.c b/block/t10-pi.c index dadecf621497..cd7fa60d63ff 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -116,7 +116,7 @@ next: */ static void t10_pi_type1_prepare(struct request *rq) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; const int tuple_sz = bi->tuple_size; u32 ref_tag = t10_pi_ref_tag(rq); u8 offset = bi->pi_offset; @@ -167,7 +167,7 @@ static void t10_pi_type1_prepare(struct request *rq) */ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; unsigned intervals = nr_bytes >> bi->interval_exp; const int tuple_sz = bi->tuple_size; u32 ref_tag = t10_pi_ref_tag(rq); @@ -290,7 +290,7 @@ next: static void ext_pi_type1_prepare(struct request *rq) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; const int tuple_sz = bi->tuple_size; u64 ref_tag = ext_pi_ref_tag(rq); u8 offset = bi->pi_offset; @@ -330,7 +330,7 @@ static void ext_pi_type1_prepare(struct request *rq) static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; unsigned intervals = nr_bytes >> bi->interval_exp; const int tuple_sz = bi->tuple_size; u64 ref_tag = ext_pi_ref_tag(rq); @@ -396,7 +396,7 @@ blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter, void blk_integrity_prepare(struct request *rq) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) return; @@ -409,7 +409,7 @@ void blk_integrity_prepare(struct request *rq) void blk_integrity_complete(struct request *rq, unsigned int nr_bytes) { - struct blk_integrity *bi = &rq->q->integrity; + struct blk_integrity *bi = &rq->q->limits.integrity; if (!(bi->flags & BLK_INTEGRITY_REF_TAG)) return; diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 08700bfc3e23..14a44c0f8286 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -206,7 +206,6 @@ struct dm_table { bool integrity_supported:1; bool singleton:1; - unsigned integrity_added:1; /* * Indicates the rw permissions for the new logical device. This diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index c1cc27541673..2a89f8eb4713 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -3475,6 +3475,17 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim limits->dma_alignment = limits->logical_block_size - 1; limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT; } + + if (!ic->internal_hash) { + struct blk_integrity *bi = &limits->integrity; + + memset(bi, 0, sizeof(*bi)); + bi->tuple_size = ic->tag_size; + bi->tag_size = bi->tuple_size; + bi->interval_exp = + ic->sb->log2_sectors_per_block + SECTOR_SHIFT; + } + limits->max_integrity_segments = USHRT_MAX; } @@ -3631,19 +3642,6 @@ try_smaller_buffer: return 0; } -static void dm_integrity_set(struct dm_target *ti, struct dm_integrity_c *ic) -{ - struct gendisk *disk = dm_disk(dm_table_get_md(ti->table)); - struct blk_integrity bi; - - memset(&bi, 0, sizeof(bi)); - bi.tuple_size = ic->tag_size; - bi.tag_size = bi.tuple_size; - bi.interval_exp = ic->sb->log2_sectors_per_block + SECTOR_SHIFT; - - blk_integrity_register(disk, &bi); -} - static void dm_integrity_free_page_list(struct page_list *pl) { unsigned int i; @@ -4629,9 +4627,6 @@ try_smaller_buffer: } } - if (!ic->internal_hash) - dm_integrity_set(ti, ic); - ti->num_flush_bios = 1; ti->flush_supported = true; if (ic->discard) diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index b2d5246cff21..fd789eeb62d9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -425,6 +425,13 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.logical_block_size, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); + + /* + * Only stack the integrity profile if the target doesn't have native + * integrity support. + */ + if (!dm_target_has_integrity(ti->type)) + queue_limits_stack_integrity_bdev(limits, bdev); return 0; } @@ -702,9 +709,6 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->immutable_target_type = ti->type; } - if (dm_target_has_integrity(ti->type)) - t->integrity_added = 1; - ti->table = t; ti->begin = start; ti->len = len; @@ -1119,99 +1123,6 @@ static int dm_table_build_index(struct dm_table *t) return r; } -static bool integrity_profile_exists(struct gendisk *disk) -{ - return !!blk_get_integrity(disk); -} - -/* - * Get a disk whose integrity profile reflects the table's profile. - * Returns NULL if integrity support was inconsistent or unavailable. - */ -static struct gendisk *dm_table_get_integrity_disk(struct dm_table *t) -{ - struct list_head *devices = dm_table_get_devices(t); - struct dm_dev_internal *dd = NULL; - struct gendisk *prev_disk = NULL, *template_disk = NULL; - - for (unsigned int i = 0; i < t->num_targets; i++) { - struct dm_target *ti = dm_table_get_target(t, i); - - if (!dm_target_passes_integrity(ti->type)) - goto no_integrity; - } - - list_for_each_entry(dd, devices, list) { - template_disk = dd->dm_dev->bdev->bd_disk; - if (!integrity_profile_exists(template_disk)) - goto no_integrity; - else if (prev_disk && - blk_integrity_compare(prev_disk, template_disk) < 0) - goto no_integrity; - prev_disk = template_disk; - } - - return template_disk; - -no_integrity: - if (prev_disk) - DMWARN("%s: integrity not set: %s and %s profile mismatch", - dm_device_name(t->md), - prev_disk->disk_name, - template_disk->disk_name); - return NULL; -} - -/* - * Register the mapped device for blk_integrity support if the - * underlying devices have an integrity profile. But all devices may - * not have matching profiles (checking all devices isn't reliable - * during table load because this table may use other DM device(s) which - * must be resumed before they will have an initialized integity - * profile). Consequently, stacked DM devices force a 2 stage integrity - * profile validation: First pass during table load, final pass during - * resume. - */ -static int dm_table_register_integrity(struct dm_table *t) -{ - struct mapped_device *md = t->md; - struct gendisk *template_disk = NULL; - - /* If target handles integrity itself do not register it here. */ - if (t->integrity_added) - return 0; - - template_disk = dm_table_get_integrity_disk(t); - if (!template_disk) - return 0; - - if (!integrity_profile_exists(dm_disk(md))) { - t->integrity_supported = true; - /* - * Register integrity profile during table load; we can do - * this because the final profile must match during resume. - */ - blk_integrity_register(dm_disk(md), - blk_get_integrity(template_disk)); - return 0; - } - - /* - * If DM device already has an initialized integrity - * profile the new profile should not conflict. - */ - if (blk_integrity_compare(dm_disk(md), template_disk) < 0) { - DMERR("%s: conflict with existing integrity profile: %s profile mismatch", - dm_device_name(t->md), - template_disk->disk_name); - return 1; - } - - /* Preserve existing integrity profile */ - t->integrity_supported = true; - return 0; -} - #ifdef CONFIG_BLK_INLINE_ENCRYPTION struct dm_crypto_profile { @@ -1423,12 +1334,6 @@ int dm_table_complete(struct dm_table *t) return r; } - r = dm_table_register_integrity(t); - if (r) { - DMERR("could not register integrity profile."); - return r; - } - r = dm_table_construct_crypto_profile(t); if (r) { DMERR("could not construct crypto profile."); @@ -1688,6 +1593,14 @@ int dm_calculate_queue_limits(struct dm_table *t, blk_set_stacking_limits(limits); + t->integrity_supported = true; + for (unsigned int i = 0; i < t->num_targets; i++) { + struct dm_target *ti = dm_table_get_target(t, i); + + if (!dm_target_passes_integrity(ti->type)) + t->integrity_supported = false; + } + for (unsigned int i = 0; i < t->num_targets; i++) { struct dm_target *ti = dm_table_get_target(t, i); @@ -1738,6 +1651,18 @@ combine_limits: dm_device_name(t->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); + + if (t->integrity_supported || + dm_target_has_integrity(ti->type)) { + if (!queue_limits_stack_integrity(limits, &ti_limits)) { + DMWARN("%s: adding target device (start sect %llu len %llu) " + "disabled integrity support due to incompatibility", + dm_device_name(t->md), + (unsigned long long) ti->begin, + (unsigned long long) ti->len); + t->integrity_supported = false; + } + } } /* @@ -1761,36 +1686,6 @@ combine_limits: return validate_hardware_logical_block_alignment(t, limits); } -/* - * Verify that all devices have an integrity profile that matches the - * DM device's registered integrity profile. If the profiles don't - * match then unregister the DM device's integrity profile. - */ -static void dm_table_verify_integrity(struct dm_table *t) -{ - struct gendisk *template_disk = NULL; - - if (t->integrity_added) - return; - - if (t->integrity_supported) { - /* - * Verify that the original integrity profile - * matches all the devices in this table. - */ - template_disk = dm_table_get_integrity_disk(t); - if (template_disk && - blk_integrity_compare(dm_disk(t->md), template_disk) >= 0) - return; - } - - if (integrity_profile_exists(dm_disk(t->md))) { - DMWARN("%s: unable to establish an integrity profile", - dm_device_name(t->md)); - blk_integrity_unregister(dm_disk(t->md)); - } -} - static int device_flush_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -2004,8 +1899,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - dm_table_verify_integrity(t); - /* * Some devices don't use blk_integrity but still want stable pages * because they do their own checksumming. diff --git a/drivers/md/md.c b/drivers/md/md.c index aff9118ff697..67ece2cd725f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2410,36 +2410,10 @@ static LIST_HEAD(pending_raid_disks); */ int md_integrity_register(struct mddev *mddev) { - struct md_rdev *rdev, *reference = NULL; - if (list_empty(&mddev->disks)) return 0; /* nothing to do */ - if (mddev_is_dm(mddev) || blk_get_integrity(mddev->gendisk)) - return 0; /* shouldn't register, or already is */ - rdev_for_each(rdev, mddev) { - /* skip spares and non-functional disks */ - if (test_bit(Faulty, &rdev->flags)) - continue; - if (rdev->raid_disk < 0) - continue; - if (!reference) { - /* Use the first rdev as the reference */ - reference = rdev; - continue; - } - /* does this rdev's profile match the reference profile? */ - if (blk_integrity_compare(reference->bdev->bd_disk, - rdev->bdev->bd_disk) < 0) - return -EINVAL; - } - if (!reference || !bdev_get_integrity(reference->bdev)) - return 0; - /* - * All component devices are integrity capable and have matching - * profiles, register the common profile for the md device. - */ - blk_integrity_register(mddev->gendisk, - bdev_get_integrity(reference->bdev)); + if (mddev_is_dm(mddev) || !blk_get_integrity(mddev->gendisk)) + return 0; /* shouldn't register */ pr_debug("md: data integrity enabled on %s\n", mdname(mddev)); if (bioset_integrity_create(&mddev->bio_set, BIO_POOL_SIZE) || @@ -2459,32 +2433,6 @@ int md_integrity_register(struct mddev *mddev) } EXPORT_SYMBOL(md_integrity_register); -/* - * Attempt to add an rdev, but only if it is consistent with the current - * integrity profile - */ -int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev) -{ - struct blk_integrity *bi_mddev; - - if (mddev_is_dm(mddev)) - return 0; - - bi_mddev = blk_get_integrity(mddev->gendisk); - - if (!bi_mddev) /* nothing to do */ - return 0; - - if (blk_integrity_compare(mddev->gendisk, rdev->bdev->bd_disk) != 0) { - pr_err("%s: incompatible integrity profile for %pg\n", - mdname(mddev), rdev->bdev); - return -ENXIO; - } - - return 0; -} -EXPORT_SYMBOL(md_integrity_add_rdev); - static bool rdev_read_only(struct md_rdev *rdev) { return bdev_read_only(rdev->bdev) || @@ -5755,14 +5703,20 @@ static const struct kobj_type md_ktype = { int mdp_major = 0; /* stack the limit for all rdevs into lim */ -void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim) +int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim, + unsigned int flags) { struct md_rdev *rdev; rdev_for_each(rdev, mddev) { queue_limits_stack_bdev(lim, rdev->bdev, rdev->data_offset, mddev->gendisk->disk_name); + if ((flags & MDDEV_STACK_INTEGRITY) && + !queue_limits_stack_integrity_bdev(lim, rdev->bdev)) + return -EINVAL; } + + return 0; } EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits); @@ -5777,6 +5731,14 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev) lim = queue_limits_start_update(mddev->gendisk->queue); queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset, mddev->gendisk->disk_name); + + if (!queue_limits_stack_integrity_bdev(&lim, rdev->bdev)) { + pr_err("%s: incompatible integrity profile for %pg\n", + mdname(mddev), rdev->bdev); + queue_limits_cancel_update(mddev->gendisk->queue); + return -ENXIO; + } + return queue_limits_commit_update(mddev->gendisk->queue, &lim); } EXPORT_SYMBOL_GPL(mddev_stack_new_rdev); diff --git a/drivers/md/md.h b/drivers/md/md.h index ca085ecad504..6733b0b0abf9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -809,7 +809,6 @@ extern void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev); extern void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors); extern int md_check_no_bitmap(struct mddev *mddev); extern int md_integrity_register(struct mddev *mddev); -extern int md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev); extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale); extern int mddev_init(struct mddev *mddev); @@ -908,7 +907,9 @@ void md_autostart_arrays(int part); int md_set_array_info(struct mddev *mddev, struct mdu_array_info_s *info); int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info); int do_md_run(struct mddev *mddev); -void mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim); +#define MDDEV_STACK_INTEGRITY (1u << 0) +int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim, + unsigned int flags); int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev); void mddev_update_io_opt(struct mddev *mddev, unsigned int nr_stripes); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 81c01347cd24..62634e2a33bd 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -377,13 +377,18 @@ static void raid0_free(struct mddev *mddev, void *priv) static int raid0_set_limits(struct mddev *mddev) { struct queue_limits lim; + int err; blk_set_stacking_limits(&lim); lim.max_hw_sectors = mddev->chunk_sectors; lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; - mddev_stack_rdev_limits(mddev, &lim); + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 1f321826ef02..779cad62f6f8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1907,9 +1907,6 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (mddev->recovery_disabled == conf->recovery_disabled) return -EBUSY; - if (md_integrity_add_rdev(rdev, mddev)) - return -ENXIO; - if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; @@ -3197,10 +3194,15 @@ static struct r1conf *setup_conf(struct mddev *mddev) static int raid1_set_limits(struct mddev *mddev) { struct queue_limits lim; + int err; blk_set_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; - mddev_stack_rdev_limits(mddev, &lim); + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a4556d2e46bf..5f6885b53b69 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2083,9 +2083,6 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev) if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1)) return -EINVAL; - if (md_integrity_add_rdev(rdev, mddev)) - return -ENXIO; - if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; @@ -3980,12 +3977,17 @@ static int raid10_set_queue_limits(struct mddev *mddev) { struct r10conf *conf = mddev->private; struct queue_limits lim; + int err; blk_set_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); - mddev_stack_rdev_limits(mddev, &lim); + err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); + if (err) { + queue_limits_cancel_update(mddev->gendisk->queue); + return err; + } return queue_limits_set(mddev->gendisk->queue, &lim); } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2bd1ce9b3922..675c68fa6c64 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7708,7 +7708,7 @@ static int raid5_set_limits(struct mddev *mddev) lim.raid_partial_stripes_expensive = 1; lim.discard_granularity = stripe; lim.max_write_zeroes_sectors = 0; - mddev_stack_rdev_limits(mddev, &lim); + mddev_stack_rdev_limits(mddev, &lim, 0); rdev_for_each(rdev, mddev) queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset, mddev->gendisk->disk_name); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 1e5aedaf8c7b..c5f8451b494d 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1504,6 +1504,11 @@ static int btt_blk_init(struct btt *btt) }; int rc; + if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { + lim.integrity.tuple_size = btt_meta_size(btt); + lim.integrity.tag_size = btt_meta_size(btt); + } + btt->btt_disk = blk_alloc_disk(&lim, NUMA_NO_NODE); if (IS_ERR(btt->btt_disk)) return PTR_ERR(btt->btt_disk); @@ -1516,14 +1521,6 @@ static int btt_blk_init(struct btt *btt) blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue); blk_queue_flag_set(QUEUE_FLAG_SYNCHRONOUS, btt->btt_disk->queue); - if (btt_meta_size(btt) && IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) { - struct blk_integrity bi = { - .tuple_size = btt_meta_size(btt), - .tag_size = btt_meta_size(btt), - }; - blk_integrity_register(btt->btt_disk, &bi); - } - set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9); rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL); if (rc) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 14bac248cde4..5a673fa5cb26 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1723,11 +1723,12 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) +static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, + struct queue_limits *lim) { - struct blk_integrity integrity = { }; + struct blk_integrity *bi = &lim->integrity; - blk_integrity_unregister(disk); + memset(bi, 0, sizeof(*bi)); if (!head->ms) return true; @@ -1744,14 +1745,14 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE3: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; - integrity.tag_size = sizeof(u16) + sizeof(u32); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC; + bi->tag_size = sizeof(u16) + sizeof(u32); + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; case NVME_NVM_NS_64B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; - integrity.tag_size = sizeof(u16) + 6; - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC64; + bi->tag_size = sizeof(u16) + 6; + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; break; default: break; @@ -1761,16 +1762,16 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) case NVME_NS_DPS_PI_TYPE2: switch (head->guard_type) { case NVME_NVM_NS_16B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC; - integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | - BLK_INTEGRITY_REF_TAG; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC; + bi->tag_size = sizeof(u16); + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; case NVME_NVM_NS_64B_GUARD: - integrity.csum_type = BLK_INTEGRITY_CSUM_CRC64; - integrity.tag_size = sizeof(u16); - integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE | - BLK_INTEGRITY_REF_TAG; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC64; + bi->tag_size = sizeof(u16); + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE | + BLK_INTEGRITY_REF_TAG; break; default: break; @@ -1780,9 +1781,8 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head) break; } - integrity.tuple_size = head->ms; - integrity.pi_offset = head->pi_offset; - blk_integrity_register(disk, &integrity); + bi->tuple_size = head->ms; + bi->pi_offset = head->pi_offset; return true; } @@ -2105,11 +2105,6 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && ns->head->ids.csi == NVME_CSI_ZNS) nvme_update_zone_info(ns, &lim, &zi); - ret = queue_limits_commit_update(ns->disk->queue, &lim); - if (ret) { - blk_mq_unfreeze_queue(ns->disk->queue); - goto out; - } /* * Register a metadata profile for PI, or the plain non-integrity NVMe @@ -2117,9 +2112,15 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->disk, ns->head)) + if (!nvme_init_integrity(ns->disk, ns->head, &lim)) capacity = 0; + ret = queue_limits_commit_update(ns->disk->queue, &lim); + if (ret) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } + set_capacity_and_notify(ns->disk, capacity); /* @@ -2191,14 +2192,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) struct queue_limits lim; blk_mq_freeze_queue(ns->head->disk->queue); - if (unsupported) - ns->head->disk->flags |= GENHD_FL_HIDDEN; - else - nvme_init_integrity(ns->head->disk, ns->head); - set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); - set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); - nvme_mpath_revalidate_paths(ns); - /* * queue_limits mixes values that are the hardware limitations * for bio splitting with what is the device configuration. @@ -2221,7 +2214,16 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) lim.io_opt = ns_lim->io_opt; queue_limits_stack_bdev(&lim, ns->disk->part0, 0, ns->head->disk->disk_name); + if (unsupported) + ns->head->disk->flags |= GENHD_FL_HIDDEN; + else + nvme_init_integrity(ns->head->disk, ns->head, &lim); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); + + set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); + set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info)); + nvme_mpath_revalidate_paths(ns); + blk_mq_unfreeze_queue(ns->head->disk->queue); } diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d957e29b17a9..e01393ed4207 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2482,11 +2482,13 @@ static int sd_read_protection_type(struct scsi_disk *sdkp, unsigned char *buffer return 0; } -static void sd_config_protection(struct scsi_disk *sdkp) +static void sd_config_protection(struct scsi_disk *sdkp, + struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; - sd_dif_config_host(sdkp); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + sd_dif_config_host(sdkp, lim); if (!sdkp->protection_type) return; @@ -3677,7 +3679,7 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_app_tag_own(sdkp, buffer); sd_read_write_same(sdkp, buffer); sd_read_security(sdkp, buffer); - sd_config_protection(sdkp); + sd_config_protection(sdkp, &lim); } /* diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index b4170b17bad4..726f1613f6cb 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -220,17 +220,7 @@ static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sec return sector >> (ilog2(sdev->sector_size) - 9); } -#ifdef CONFIG_BLK_DEV_INTEGRITY - -extern void sd_dif_config_host(struct scsi_disk *); - -#else /* CONFIG_BLK_DEV_INTEGRITY */ - -static inline void sd_dif_config_host(struct scsi_disk *disk) -{ -} - -#endif /* CONFIG_BLK_DEV_INTEGRITY */ +void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim); static inline int sd_is_zoned(struct scsi_disk *sdkp) { diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c index 6f0921c7db78..ae6ce6f5d622 100644 --- a/drivers/scsi/sd_dif.c +++ b/drivers/scsi/sd_dif.c @@ -24,14 +24,15 @@ /* * Configure exchange of protection information between OS and HBA. */ -void sd_dif_config_host(struct scsi_disk *sdkp) +void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim) { struct scsi_device *sdp = sdkp->device; - struct gendisk *disk = sdkp->disk; u8 type = sdkp->protection_type; - struct blk_integrity bi; + struct blk_integrity *bi = &lim->integrity; int dif, dix; + memset(bi, 0, sizeof(*bi)); + dif = scsi_host_dif_capable(sdp->host, type); dix = scsi_host_dix_capable(sdp->host, type); @@ -39,40 +40,33 @@ void sd_dif_config_host(struct scsi_disk *sdkp) dif = 0; dix = 1; } - if (!dix) { - blk_integrity_unregister(disk); + if (!dix) return; - } - - memset(&bi, 0, sizeof(bi)); /* Enable DMA of protection information */ if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) - bi.csum_type = BLK_INTEGRITY_CSUM_IP; + bi->csum_type = BLK_INTEGRITY_CSUM_IP; else - bi.csum_type = BLK_INTEGRITY_CSUM_CRC; + bi->csum_type = BLK_INTEGRITY_CSUM_CRC; if (type != T10_PI_TYPE3_PROTECTION) - bi.flags |= BLK_INTEGRITY_REF_TAG; + bi->flags |= BLK_INTEGRITY_REF_TAG; - bi.tuple_size = sizeof(struct t10_pi_tuple); + bi->tuple_size = sizeof(struct t10_pi_tuple); if (dif && type) { - bi.flags |= BLK_INTEGRITY_DEVICE_CAPABLE; + bi->flags |= BLK_INTEGRITY_DEVICE_CAPABLE; if (!sdkp->ATO) - goto out; + return; if (type == T10_PI_TYPE3_PROTECTION) - bi.tag_size = sizeof(u16) + sizeof(u32); + bi->tag_size = sizeof(u16) + sizeof(u32); else - bi.tag_size = sizeof(u16); + bi->tag_size = sizeof(u16); } sd_first_printk(KERN_NOTICE, sdkp, "Enabling DIX %s, application tag size %u bytes\n", - blk_integrity_profile_name(&bi), bi.tag_size); -out: - blk_integrity_register(disk, &bi); + blk_integrity_profile_name(bi), bi->tag_size); } - diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h index bafa01d4e7f9..d201140d77a3 100644 --- a/include/linux/blk-integrity.h +++ b/include/linux/blk-integrity.h @@ -11,6 +11,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_NOGENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, BLK_INTEGRITY_REF_TAG = 1 << 3, + BLK_INTEGRITY_STACKED = 1 << 4, }; struct blk_integrity_iter { @@ -23,11 +24,15 @@ struct blk_integrity_iter { }; const char *blk_integrity_profile_name(struct blk_integrity *bi); +bool queue_limits_stack_integrity(struct queue_limits *t, + struct queue_limits *b); +static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t, + struct block_device *bdev) +{ + return queue_limits_stack_integrity(t, &bdev->bd_disk->queue->limits); +} #ifdef CONFIG_BLK_DEV_INTEGRITY -void blk_integrity_register(struct gendisk *, struct blk_integrity *); -void blk_integrity_unregister(struct gendisk *); -int blk_integrity_compare(struct gendisk *, struct gendisk *); int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); @@ -35,14 +40,14 @@ int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); static inline bool blk_integrity_queue_supports_integrity(struct request_queue *q) { - return q->integrity.tuple_size; + return q->limits.integrity.tuple_size; } static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { if (!blk_integrity_queue_supports_integrity(disk->queue)) return NULL; - return &disk->queue->integrity; + return &disk->queue->limits.integrity; } static inline struct blk_integrity * @@ -119,17 +124,6 @@ blk_integrity_queue_supports_integrity(struct request_queue *q) { return false; } -static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) -{ - return 0; -} -static inline void blk_integrity_register(struct gendisk *d, - struct blk_integrity *b) -{ -} -static inline void blk_integrity_unregister(struct gendisk *d) -{ -} static inline unsigned short queue_max_integrity_segments(const struct request_queue *q) { @@ -157,4 +151,5 @@ static inline struct bio_vec *rq_integrity_vec(struct request *rq) return NULL; } #endif /* CONFIG_BLK_DEV_INTEGRITY */ + #endif /* _LINUX_BLK_INTEGRITY_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f9089750919c..0c247a716885 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -334,6 +334,8 @@ struct queue_limits { * due to possible offsets. */ unsigned int dma_alignment; + + struct blk_integrity integrity; }; typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx, @@ -419,10 +421,6 @@ struct request_queue { struct queue_limits limits; -#ifdef CONFIG_BLK_DEV_INTEGRITY - struct blk_integrity integrity; -#endif /* CONFIG_BLK_DEV_INTEGRITY */ - #ifdef CONFIG_PM struct device *dev; enum rpm_status rpm_status; @@ -1300,11 +1298,9 @@ static inline bool bdev_stable_writes(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -#ifdef CONFIG_BLK_DEV_INTEGRITY - /* BLK_INTEGRITY_CSUM_NONE is not available in blkdev.h */ - if (q->integrity.csum_type != 0) + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + q->limits.integrity.csum_type != BLK_INTEGRITY_CSUM_NONE) return true; -#endif return test_bit(QUEUE_FLAG_STABLE_WRITES, &q->queue_flags); } diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index d2bafb76badf..1773610010eb 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -39,12 +39,8 @@ struct t10_pi_tuple { static inline u32 t10_pi_ref_tag(struct request *rq) { - unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + unsigned int shift = rq->q->limits.integrity.interval_exp; -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (rq->q->integrity.interval_exp) - shift = rq->q->integrity.interval_exp; -#endif return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff; } @@ -65,12 +61,8 @@ static inline u64 lower_48_bits(u64 n) static inline u64 ext_pi_ref_tag(struct request *rq) { - unsigned int shift = ilog2(queue_logical_block_size(rq->q)); + unsigned int shift = rq->q->limits.integrity.interval_exp; -#ifdef CONFIG_BLK_DEV_INTEGRITY - if (rq->q->integrity.interval_exp) - shift = rq->q->integrity.interval_exp; -#endif return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT)); } -- cgit v1.2.3 From 0a5d3258d7c97295a89d22e54733b54aacb62562 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 3 Jun 2024 22:23:15 -0700 Subject: compiler_types.h: Define __retain for __attribute__((__retain__)) Some code includes the __used macro to prevent functions and data from being optimized out. This macro implements __attribute__((__used__)), which operates at the compiler and IR-level, and so still allows a linker to remove objects intended to be kept. Compilers supporting __attribute__((__retain__)) can address this gap by setting the flag SHF_GNU_RETAIN on the section of a function/variable, indicating to the linker the object should be retained. This attribute is available since gcc 11, clang 13, and binutils 2.36. Provide a __retain macro implementing __attribute__((__retain__)), whose first user will be the '__bpf_kfunc' tag. [ Additional remark from discussion: Why is CONFIG_LTO_CLANG added here? The __used macro permits garbage collection at section level, so CLANG_LTO_CLANG without CONFIG_LD_DEAD_CODE_DATA_ELIMINATION should not change final section dynamics? The conditional guard was included to ensure consistent behaviour between __retain and other features forcing split sections. In particular, the same guard is used in vmlinux.lds.h to merge split sections where needed. For example, using __retain in LLVM builds without CONFIG_LTO was failing CI tests on kernel-patches/bpf because the kernel didn't boot properly. And in further testing, the kernel had no issues loading BPF kfunc modules with such split sections, so the module (partial) linking scripts were left alone. ] Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Cc: Yonghong Song Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/ Link: https://lore.kernel.org/bpf/b31bca5a5e6765a0f32cc8c19b1d9cdbfaa822b5.1717477560.git.Tony.Ambardar@gmail.com --- include/linux/compiler_types.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 93600de3800b..f14c275950b5 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -143,6 +143,29 @@ static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } # define __preserve_most #endif +/* + * Annotating a function/variable with __retain tells the compiler to place + * the object in its own section and set the flag SHF_GNU_RETAIN. This flag + * instructs the linker to retain the object during garbage-cleanup or LTO + * phases. + * + * Note that the __used macro is also used to prevent functions or data + * being optimized out, but operates at the compiler/IR-level and may still + * allow unintended removal of objects during linking. + * + * Optional: only supported since gcc >= 11, clang >= 13 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-retain-function-attribute + * clang: https://clang.llvm.org/docs/AttributeReference.html#retain + */ +#if __has_attribute(__retain__) && \ + (defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || \ + defined(CONFIG_LTO_CLANG)) +# define __retain __attribute__((__retain__)) +#else +# define __retain +#endif + /* Compiler specific macros. */ #ifdef __clang__ #include -- cgit v1.2.3 From 7bdcedd5c8fb88e7176b93812b139eca5fe0aa46 Mon Sep 17 00:00:00 2001 From: Tony Ambardar Date: Mon, 3 Jun 2024 22:23:16 -0700 Subject: bpf: Harden __bpf_kfunc tag against linker kfunc removal BPF kfuncs are often not directly referenced and may be inadvertently removed by optimization steps during kernel builds, thus the __bpf_kfunc tag mitigates against this removal by including the __used macro. However, this macro alone does not prevent removal during linking, and may still yield build warnings (e.g. on mips64el): [...] LD vmlinux BTFIDS vmlinux WARN: resolve_btfids: unresolved symbol bpf_verify_pkcs7_signature WARN: resolve_btfids: unresolved symbol bpf_lookup_user_key WARN: resolve_btfids: unresolved symbol bpf_lookup_system_key WARN: resolve_btfids: unresolved symbol bpf_key_put WARN: resolve_btfids: unresolved symbol bpf_iter_task_next WARN: resolve_btfids: unresolved symbol bpf_iter_css_task_new WARN: resolve_btfids: unresolved symbol bpf_get_file_xattr WARN: resolve_btfids: unresolved symbol bpf_ct_insert_entry WARN: resolve_btfids: unresolved symbol bpf_cgroup_release WARN: resolve_btfids: unresolved symbol bpf_cgroup_from_id WARN: resolve_btfids: unresolved symbol bpf_cgroup_acquire WARN: resolve_btfids: unresolved symbol bpf_arena_free_pages NM System.map SORTTAB vmlinux OBJCOPY vmlinux.32 [...] Update the __bpf_kfunc tag to better guard against linker optimization by including the new __retain compiler macro, which fixes the warnings above. Verify the __retain macro with readelf by checking object flags for 'R': $ readelf -Wa kernel/trace/bpf_trace.o Section Headers: [Nr] Name Type Address Off Size ES Flg Lk Inf Al [...] [178] .text.bpf_key_put PROGBITS 00000000 6420 0050 00 AXR 0 0 8 [...] Key to Flags: [...] R (retain), D (mbind), p (processor specific) Fixes: 57e7c169cd6a ("bpf: Add __bpf_kfunc tag for marking kernel functions as kfuncs") Reported-by: kernel test robot Signed-off-by: Tony Ambardar Signed-off-by: Daniel Borkmann Tested-by: Jiri Olsa Reviewed-by: Jiri Olsa Cc: Yonghong Song Closes: https://lore.kernel.org/r/202401211357.OCX9yllM-lkp@intel.com/ Link: https://lore.kernel.org/bpf/ZlmGoT9KiYLZd91S@krava/T/ Link: https://lore.kernel.org/bpf/e9c64e9b5c073dabd457ff45128aabcab7630098.1717477560.git.Tony.Ambardar@gmail.com --- include/linux/btf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/btf.h b/include/linux/btf.h index f9e56fd12a9f..7c3e40c3295e 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -82,7 +82,7 @@ * as to avoid issues such as the compiler inlining or eliding either a static * kfunc, or a global kfunc in an LTO build. */ -#define __bpf_kfunc __used noinline +#define __bpf_kfunc __used __retain noinline #define __bpf_kfunc_start_defs() \ __diag_push(); \ -- cgit v1.2.3 From 98d7ca374ba4b39e7535613d40e159f09ca14da2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 12 Jun 2024 18:38:13 -0700 Subject: bpf: Track delta between "linked" registers. Compilers can generate the code r1 = r2 r1 += 0x1 if r2 < 1000 goto ... use knowledge of r2 range in subsequent r1 operations So remember constant delta between r2 and r1 and update r1 after 'if' condition. Unfortunately LLVM still uses this pattern for loops with 'can_loop' construct: for (i = 0; i < 1000 && can_loop; i++) The "undo" pass was introduced in LLVM https://reviews.llvm.org/D121937 to prevent this optimization, but it cannot cover all cases. Instead of fighting middle end optimizer in BPF backend teach the verifier about this pattern. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20240613013815.953-3-alexei.starovoitov@gmail.com --- include/linux/bpf_verifier.h | 12 +++- kernel/bpf/log.c | 4 +- kernel/bpf/verifier.c | 95 +++++++++++++++++++++++--- tools/testing/selftests/bpf/verifier/precise.c | 22 +++--- 4 files changed, 109 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 50aa87f8d77f..2b54e25d2364 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -73,7 +73,10 @@ enum bpf_iter_state { struct bpf_reg_state { /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; - /* Fixed part of pointer offset, pointer types only */ + /* + * Fixed part of pointer offset, pointer types only. + * Or constant delta between "linked" scalars with the same ID. + */ s32 off; union { /* valid when type == PTR_TO_PACKET */ @@ -167,6 +170,13 @@ struct bpf_reg_state { * Similarly to dynptrs, we use ID to track "belonging" of a reference * to a specific instance of bpf_iter. */ + /* + * Upper bit of ID is used to remember relationship between "linked" + * registers. Example: + * r1 = r2; both will have r1->id == r2->id == N + * r1 += 10; r1->id == N | BPF_ADD_CONST and r1->off == 10 + */ +#define BPF_ADD_CONST (1U << 31) u32 id; /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned * from a pointer-cast helper, bpf_sk_fullsock() and diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 4bd8f17a9f24..3f4ae92e549f 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -708,7 +708,9 @@ static void print_reg_state(struct bpf_verifier_env *env, verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id)); verbose(env, "("); if (reg->id) - verbose_a("id=%d", reg->id); + verbose_a("id=%d", reg->id & ~BPF_ADD_CONST); + if (reg->id & BPF_ADD_CONST) + verbose(env, "%+d", reg->off); if (reg->ref_obj_id) verbose_a("ref_obj_id=%d", reg->ref_obj_id); if (type_is_non_owning_ref(reg->type)) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e857b08e1f2d..dcbbf5f64c5d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3991,7 +3991,7 @@ static bool idset_contains(struct bpf_idset *s, u32 id) u32 i; for (i = 0; i < s->count; ++i) - if (s->ids[i] == id) + if (s->ids[i] == (id & ~BPF_ADD_CONST)) return true; return false; @@ -4001,7 +4001,7 @@ static int idset_push(struct bpf_idset *s, u32 id) { if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids))) return -EFAULT; - s->ids[s->count++] = id; + s->ids[s->count++] = id & ~BPF_ADD_CONST; return 0; } @@ -4438,8 +4438,20 @@ static bool __is_pointer_value(bool allow_ptr_leaks, static void assign_scalar_id_before_mov(struct bpf_verifier_env *env, struct bpf_reg_state *src_reg) { - if (src_reg->type == SCALAR_VALUE && !src_reg->id && - !tnum_is_const(src_reg->var_off)) + if (src_reg->type != SCALAR_VALUE) + return; + + if (src_reg->id & BPF_ADD_CONST) { + /* + * The verifier is processing rX = rY insn and + * rY->id has special linked register already. + * Cleared it, since multiple rX += const are not supported. + */ + src_reg->id = 0; + src_reg->off = 0; + } + + if (!src_reg->id && !tnum_is_const(src_reg->var_off)) /* Ensure that src_reg has a valid ID that will be copied to * dst_reg and then will be used by find_equal_scalars() to * propagate min/max range. @@ -14042,6 +14054,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; + bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64); u8 opcode = BPF_OP(insn->code); int err; @@ -14064,11 +14077,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, if (dst_reg->type != SCALAR_VALUE) ptr_reg = dst_reg; - else - /* Make sure ID is cleared otherwise dst_reg min/max could be - * incorrectly propagated into other registers by find_equal_scalars() - */ - dst_reg->id = 0; + if (BPF_SRC(insn->code) == BPF_X) { src_reg = ®s[insn->src_reg]; if (src_reg->type != SCALAR_VALUE) { @@ -14132,7 +14141,43 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } - return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + err = adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg); + if (err) + return err; + /* + * Compilers can generate the code + * r1 = r2 + * r1 += 0x1 + * if r2 < 1000 goto ... + * use r1 in memory access + * So remember constant delta between r2 and r1 and update r1 after + * 'if' condition. + */ + if (env->bpf_capable && BPF_OP(insn->code) == BPF_ADD && + dst_reg->id && is_reg_const(src_reg, alu32)) { + u64 val = reg_const_value(src_reg, alu32); + + if ((dst_reg->id & BPF_ADD_CONST) || + /* prevent overflow in find_equal_scalars() later */ + val > (u32)S32_MAX) { + /* + * If the register already went through rX += val + * we cannot accumulate another val into rx->off. + */ + dst_reg->off = 0; + dst_reg->id = 0; + } else { + dst_reg->id |= BPF_ADD_CONST; + dst_reg->off = val; + } + } else { + /* + * Make sure ID is cleared otherwise dst_reg min/max could be + * incorrectly propagated into other registers by find_equal_scalars() + */ + dst_reg->id = 0; + } + return 0; } /* check validity of 32-bit and 64-bit arithmetic operations */ @@ -15104,12 +15149,36 @@ static bool try_match_pkt_pointers(const struct bpf_insn *insn, static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg) { + struct bpf_reg_state fake_reg; struct bpf_func_state *state; struct bpf_reg_state *reg; bpf_for_each_reg_in_vstate(vstate, state, reg, ({ - if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) + if (reg->type != SCALAR_VALUE || reg == known_reg) + continue; + if ((reg->id & ~BPF_ADD_CONST) != (known_reg->id & ~BPF_ADD_CONST)) + continue; + if ((!(reg->id & BPF_ADD_CONST) && !(known_reg->id & BPF_ADD_CONST)) || + reg->off == known_reg->off) { copy_register_state(reg, known_reg); + } else { + s32 saved_off = reg->off; + + fake_reg.type = SCALAR_VALUE; + __mark_reg_known(&fake_reg, (s32)reg->off - (s32)known_reg->off); + + /* reg = known_reg; reg += delta */ + copy_register_state(reg, known_reg); + /* + * Must preserve off, id and add_const flag, + * otherwise another find_equal_scalars() will be incorrect. + */ + reg->off = saved_off; + + scalar32_min_max_add(reg, &fake_reg); + scalar_min_max_add(reg, &fake_reg); + reg->var_off = tnum_add(reg->var_off, fake_reg.var_off); + } })); } @@ -16738,6 +16807,10 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, } if (!rold->precise && exact == NOT_EXACT) return true; + if ((rold->id & BPF_ADD_CONST) != (rcur->id & BPF_ADD_CONST)) + return false; + if ((rold->id & BPF_ADD_CONST) && (rold->off != rcur->off)) + return false; /* Why check_ids() for scalar registers? * * Consider the following BPF code: diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0a9293a57211..90643ccc221d 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -39,12 +39,12 @@ .result = VERBOSE_ACCEPT, .errstr = "mark_precise: frame0: last_idx 26 first_idx 20\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: regs=r2 stack= before 20\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: regs=r2,r9 stack= before 20\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 19 first_idx 10\ mark_precise: frame0: regs=r2,r9 stack= before 19\ mark_precise: frame0: regs=r9 stack= before 18\ @@ -100,11 +100,11 @@ .errstr = "26: (85) call bpf_probe_read_kernel#113\ mark_precise: frame0: last_idx 26 first_idx 22\ - mark_precise: frame0: regs=r2 stack= before 25\ - mark_precise: frame0: regs=r2 stack= before 24\ - mark_precise: frame0: regs=r2 stack= before 23\ - mark_precise: frame0: regs=r2 stack= before 22\ - mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: regs=r2,r9 stack= before 25\ + mark_precise: frame0: regs=r2,r9 stack= before 24\ + mark_precise: frame0: regs=r2,r9 stack= before 23\ + mark_precise: frame0: regs=r2,r9 stack= before 22\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ mark_precise: frame0: last_idx 20 first_idx 20\ mark_precise: frame0: regs=r2,r9 stack= before 20\ mark_precise: frame0: parent state regs=r2,r9 stack=:\ -- cgit v1.2.3 From 894376385a2d80a96816449e4991587a9a5ef0dd Mon Sep 17 00:00:00 2001 From: Sebastian Ene Date: Thu, 13 Jun 2024 13:20:33 +0000 Subject: KVM: arm64: Add support for FFA_PARTITION_INFO_GET Handle the FFA_PARTITION_INFO_GET host call inside the pKVM hypervisor and copy the response message back to the host buffers. Signed-off-by: Sebastian Ene Reviewed-by: Sudeep Holla Tested-by: Sudeep Holla Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20240613132035.1070360-3-sebastianene@google.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/ffa.c | 52 +++++++++++++++++++++++++++++++++++++++++++ include/linux/arm_ffa.h | 3 +++ 2 files changed, 55 insertions(+) (limited to 'include') diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index c8ab51c331f0..4eaef673e98d 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -719,6 +719,55 @@ unlock: hyp_spin_unlock(&version_lock); } +static void do_ffa_part_get(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, uuid0, ctxt, 1); + DECLARE_REG(u32, uuid1, ctxt, 2); + DECLARE_REG(u32, uuid2, ctxt, 3); + DECLARE_REG(u32, uuid3, ctxt, 4); + DECLARE_REG(u32, flags, ctxt, 5); + u32 count, partition_sz, copy_sz; + + hyp_spin_lock(&host_buffers.lock); + if (!host_buffers.rx) { + ffa_to_smccc_res(res, FFA_RET_BUSY); + goto out_unlock; + } + + arm_smccc_1_1_smc(FFA_PARTITION_INFO_GET, uuid0, uuid1, + uuid2, uuid3, flags, 0, 0, + res); + + if (res->a0 != FFA_SUCCESS) + goto out_unlock; + + count = res->a2; + if (!count) + goto out_unlock; + + if (hyp_ffa_version > FFA_VERSION_1_0) { + /* Get the number of partitions deployed in the system */ + if (flags & 0x1) + goto out_unlock; + + partition_sz = res->a3; + } else { + /* FFA_VERSION_1_0 lacks the size in the response */ + partition_sz = FFA_1_0_PARTITON_INFO_SZ; + } + + copy_sz = partition_sz * count; + if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) { + ffa_to_smccc_res(res, FFA_RET_ABORTED); + goto out_unlock; + } + + memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz); +out_unlock: + hyp_spin_unlock(&host_buffers.lock); +} + bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) { struct arm_smccc_res res; @@ -773,6 +822,9 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) case FFA_VERSION: do_ffa_version(&res, host_ctxt); goto out_handled; + case FFA_PARTITION_INFO_GET: + do_ffa_part_get(&res, host_ctxt); + goto out_handled; } if (ffa_call_supported(func_id)) diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h index c82d56768101..c6d18f50f671 100644 --- a/include/linux/arm_ffa.h +++ b/include/linux/arm_ffa.h @@ -212,6 +212,9 @@ bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } extern const struct bus_type ffa_bus_type; +/* The FF-A 1.0 partition structure lacks the uuid[4] */ +#define FFA_1_0_PARTITON_INFO_SZ (8) + /* FFA transport related */ struct ffa_partition_info { u16 id; -- cgit v1.2.3 From 51104c19d8570eb23208e08eac0e9ae09ced1c15 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 12 Jun 2024 12:59:18 -0700 Subject: kunit: test: Add vm_mmap() allocation resource manager For tests that need to allocate using vm_mmap() (e.g. usercopy and execve), provide the interface to have the allocation tracked by KUnit itself. This requires bringing up a placeholder userspace mm. This combines my earlier attempt at this with Mark Rutland's version[1]. Normally alloc_mm() and arch_pick_mmap_layout() aren't exported for modules, so export these only for KUnit testing. Link: https://lore.kernel.org/lkml/20230321122514.1743889-2-mark.rutland@arm.com/ [1] Co-developed-by: Mark Rutland Signed-off-by: Mark Rutland Reviewed-by: David Gow Signed-off-by: Kees Cook Signed-off-by: Shuah Khan --- include/kunit/test.h | 17 ++++++++ kernel/fork.c | 3 ++ lib/kunit/Makefile | 1 + lib/kunit/user_alloc.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++ mm/util.c | 3 ++ 5 files changed, 137 insertions(+) create mode 100644 lib/kunit/user_alloc.c (limited to 'include') diff --git a/include/kunit/test.h b/include/kunit/test.h index e32b4cb7afa2..ec61cad6b71d 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -480,6 +480,23 @@ static inline void *kunit_kcalloc(struct kunit *test, size_t n, size_t size, gfp return kunit_kmalloc_array(test, n, size, gfp | __GFP_ZERO); } +/** + * kunit_vm_mmap() - Allocate KUnit-tracked vm_mmap() area + * @test: The test context object. + * @file: struct file pointer to map from, if any + * @addr: desired address, if any + * @len: how many bytes to allocate + * @prot: mmap PROT_* bits + * @flag: mmap flags + * @offset: offset into @file to start mapping from. + * + * See vm_mmap() for more information. + */ +unsigned long kunit_vm_mmap(struct kunit *test, struct file *file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flag, + unsigned long offset); + void kunit_cleanup(struct kunit *test); void __printf(2, 3) kunit_log_append(struct string_stream *log, const char *fmt, ...); diff --git a/kernel/fork.c b/kernel/fork.c index 99076dbe27d8..cea203197136 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -115,6 +115,8 @@ #define CREATE_TRACE_POINTS #include +#include + /* * Minimum number of threads to boot the kernel */ @@ -1334,6 +1336,7 @@ struct mm_struct *mm_alloc(void) memset(mm, 0, sizeof(*mm)); return mm_init(mm, current, current_user_ns()); } +EXPORT_SYMBOL_IF_KUNIT(mm_alloc); static inline void __mmput(struct mm_struct *mm) { diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile index 900e6447c8e8..30f6bbf04a4a 100644 --- a/lib/kunit/Makefile +++ b/lib/kunit/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_KUNIT) += kunit.o kunit-objs += test.o \ resource.o \ + user_alloc.o \ static_stub.o \ string-stream.o \ assert.o \ diff --git a/lib/kunit/user_alloc.c b/lib/kunit/user_alloc.c new file mode 100644 index 000000000000..76d3d1345ed7 --- /dev/null +++ b/lib/kunit/user_alloc.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnit userspace memory allocation resource management. + */ +#include +#include +#include +#include + +struct kunit_vm_mmap_resource { + unsigned long addr; + size_t size; +}; + +/* vm_mmap() arguments */ +struct kunit_vm_mmap_params { + struct file *file; + unsigned long addr; + unsigned long len; + unsigned long prot; + unsigned long flag; + unsigned long offset; +}; + +/* Create and attach a new mm if it doesn't already exist. */ +static int kunit_attach_mm(void) +{ + struct mm_struct *mm; + + if (current->mm) + return 0; + + mm = mm_alloc(); + if (!mm) + return -ENOMEM; + + /* Define the task size. */ + mm->task_size = TASK_SIZE; + + /* Make sure we can allocate new VMAs. */ + arch_pick_mmap_layout(mm, ¤t->signal->rlim[RLIMIT_STACK]); + + /* Attach the mm. It will be cleaned up when the process dies. */ + kthread_use_mm(mm); + + return 0; +} + +static int kunit_vm_mmap_init(struct kunit_resource *res, void *context) +{ + struct kunit_vm_mmap_params *p = context; + struct kunit_vm_mmap_resource vres; + int ret; + + ret = kunit_attach_mm(); + if (ret) + return ret; + + vres.size = p->len; + vres.addr = vm_mmap(p->file, p->addr, p->len, p->prot, p->flag, p->offset); + if (!vres.addr) + return -ENOMEM; + res->data = kmemdup(&vres, sizeof(vres), GFP_KERNEL); + if (!res->data) { + vm_munmap(vres.addr, vres.size); + return -ENOMEM; + } + + return 0; +} + +static void kunit_vm_mmap_free(struct kunit_resource *res) +{ + struct kunit_vm_mmap_resource *vres = res->data; + + /* + * Since this is executed from the test monitoring process, + * the test's mm has already been torn down. We don't need + * to run vm_munmap(vres->addr, vres->size), only clean up + * the vres. + */ + + kfree(vres); + res->data = NULL; +} + +unsigned long kunit_vm_mmap(struct kunit *test, struct file *file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flag, + unsigned long offset) +{ + struct kunit_vm_mmap_params params = { + .file = file, + .addr = addr, + .len = len, + .prot = prot, + .flag = flag, + .offset = offset, + }; + struct kunit_vm_mmap_resource *vres; + + vres = kunit_alloc_resource(test, + kunit_vm_mmap_init, + kunit_vm_mmap_free, + GFP_KERNEL, + ¶ms); + if (vres) + return vres->addr; + return 0; +} +EXPORT_SYMBOL_GPL(kunit_vm_mmap); + +MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING); diff --git a/mm/util.c b/mm/util.c index c9e519e6811f..df37c47d9374 100644 --- a/mm/util.c +++ b/mm/util.c @@ -26,6 +26,8 @@ #include +#include + #include "internal.h" #include "swap.h" @@ -482,6 +484,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) clear_bit(MMF_TOPDOWN, &mm->flags); } #endif +EXPORT_SYMBOL_IF_KUNIT(arch_pick_mmap_layout); /** * __account_locked_vm - account locked pages to an mm's locked_vm -- cgit v1.2.3 From e575d3a6dd22123888defb622b1742aa2d45b942 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Fri, 14 Jun 2024 00:00:31 +0300 Subject: net/mlx5: Correct TASR typo into TSAR TSAR is the correct spelling (Transmit Scheduling ARbiter). Signed-off-by: Cosmin Ratiu Reviewed-by: Gal Pressman Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240613210036.1125203-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 2 +- include/linux/mlx5/mlx5_ifc.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index d2ebe56c3977..20146a2dc7f4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -531,7 +531,7 @@ static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) switch (type) { case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TASR; + ELEMENT_TYPE_CAP_MASK_TSAR; case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: return MLX5_CAP_QOS(dev, esw_element_type) & ELEMENT_TYPE_CAP_MASK_VPORT; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 17acd0f3ca8e..466dcda40bb5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -3914,7 +3914,7 @@ enum { }; enum { - ELEMENT_TYPE_CAP_MASK_TASR = 1 << 0, + ELEMENT_TYPE_CAP_MASK_TSAR = 1 << 0, ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, -- cgit v1.2.3 From 296eaab825060d0f25e89b2f85ab9fc26128cea8 Mon Sep 17 00:00:00 2001 From: Rahul Rameshbabu Date: Fri, 14 Jun 2024 00:00:36 +0300 Subject: net/mlx5e: Support SWP-mode offload L4 csum calculation Calculate the pseudo-header checksum for both IPSec transport mode and IPSec tunnel mode for mlx5 devices that do not implement a pure hardware checksum offload for L4 checksum calculation. Introduce a capability bit that identifies such mlx5 devices. Signed-off-by: Rahul Rameshbabu Reviewed-by: Gal Pressman Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://lore.kernel.org/r/20240613210036.1125203-7-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 37 ++++++++++++++++++++++ .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 6 +++- include/linux/mlx5/mlx5_ifc.h | 3 +- 3 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index d1f0f868d494..5ec468268d1a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -6,6 +6,8 @@ #include "en.h" #include +#include +#include #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) @@ -479,6 +481,41 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg, } } +static inline void +mlx5e_swp_encap_csum_partial(struct mlx5_core_dev *mdev, struct sk_buff *skb, bool tunnel) +{ + const struct iphdr *ip = tunnel ? inner_ip_hdr(skb) : ip_hdr(skb); + const struct ipv6hdr *ip6; + struct tcphdr *th; + struct udphdr *uh; + int len; + + if (!MLX5_CAP_ETH(mdev, swp_csum_l4_partial) || !skb_is_gso(skb)) + return; + + if (skb_is_gso_tcp(skb)) { + th = inner_tcp_hdr(skb); + len = skb_shinfo(skb)->gso_size + inner_tcp_hdrlen(skb); + + if (ip->version == 4) { + th->check = ~tcp_v4_check(len, ip->saddr, ip->daddr, 0); + } else { + ip6 = tunnel ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + th->check = ~tcp_v6_check(len, &ip6->saddr, &ip6->daddr, 0); + } + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + uh = (struct udphdr *)skb_inner_transport_header(skb); + len = skb_shinfo(skb)->gso_size + sizeof(struct udphdr); + + if (ip->version == 4) { + uh->check = ~udp_v4_check(len, ip->saddr, ip->daddr, 0); + } else { + ip6 = tunnel ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); + uh->check = ~udp_v6_check(len, &ip6->saddr, &ip6->daddr, 0); + } + } +} + #define MLX5E_STOP_ROOM(wqebbs) ((wqebbs) * 2 - 1) static inline u16 mlx5e_stop_room_for_wqe(struct mlx5_core_dev *mdev, u16 wqe_size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 359050f0b54d..3cc640669247 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -116,6 +116,7 @@ static inline bool mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { + struct mlx5_core_dev *mdev = sq->mdev; u8 inner_ipproto; if (!mlx5e_ipsec_eseg_meta(eseg)) @@ -125,9 +126,12 @@ mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, inner_ipproto = xfrm_offload(skb)->inner_ipproto; if (inner_ipproto) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; - if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) { + mlx5e_swp_encap_csum_partial(mdev, skb, true); eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + } } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + mlx5e_swp_encap_csum_partial(mdev, skb, false); eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial_inner++; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 466dcda40bb5..66b921c81c0f 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1093,7 +1093,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 tunnel_stateless_ip_over_ip_tx[0x1]; u8 reserved_at_2e[0x2]; u8 max_vxlan_udp_ports[0x8]; - u8 reserved_at_38[0x6]; + u8 swp_csum_l4_partial[0x1]; + u8 reserved_at_39[0x5]; u8 max_geneve_opt_len[0x1]; u8 tunnel_stateless_geneve_rx[0x1]; -- cgit v1.2.3 From 6c3282a6b296385bee2c383442c39f507b0d51dd Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 13 Jun 2024 11:36:06 +0100 Subject: net: stmmac: add select_pcs() platform method Allow platform drivers to provide their logic to select an appropriate PCS. Tested-by: Romain Gantois Reviewed-by: Romain Gantois Signed-off-by: Russell King (Oracle) Link: https://lore.kernel.org/r/E1sHhoM-00Fesu-8E@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 7 +++++++ include/linux/stmmac.h | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index bbedf2a8c60f..302aa4080de3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -949,6 +949,13 @@ static struct phylink_pcs *stmmac_mac_select_pcs(struct phylink_config *config, phy_interface_t interface) { struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); + struct phylink_pcs *pcs; + + if (priv->plat->select_pcs) { + pcs = priv->plat->select_pcs(priv, interface); + if (!IS_ERR(pcs)) + return pcs; + } if (priv->hw->xpcs) return &priv->hw->xpcs->pcs; diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 8f0f156d50d3..9c54f82901a1 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -13,7 +13,7 @@ #define __STMMAC_PLATFORM_DATA #include -#include +#include #define MTL_MAX_RX_QUEUES 8 #define MTL_MAX_TX_QUEUES 8 @@ -271,6 +271,8 @@ struct plat_stmmacenet_data { void (*dump_debug_regs)(void *priv); int (*pcs_init)(struct stmmac_priv *priv); void (*pcs_exit)(struct stmmac_priv *priv); + struct phylink_pcs *(*select_pcs)(struct stmmac_priv *priv, + phy_interface_t interface); void *bsp_priv; struct clk *stmmac_clk; struct clk *pclk; -- cgit v1.2.3 From e7da16abf030b39c3598574d5457f324a6f0e4a7 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:33 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from async_outbound_complete_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-2-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 6 +++--- include/trace/events/firewire.h | 17 ++++++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 571fdff65c2b..de75e758fd07 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -174,8 +174,8 @@ static void transmit_complete_callback(struct fw_packet *packet, struct fw_transaction *t = container_of(packet, struct fw_transaction, packet); - trace_async_request_outbound_complete((uintptr_t)t, packet->generation, packet->speed, - status, packet->timestamp); + trace_async_request_outbound_complete((uintptr_t)t, card->index, packet->generation, + packet->speed, status, packet->timestamp); switch (status) { case ACK_COMPLETE: @@ -674,7 +674,7 @@ static void free_response_callback(struct fw_packet *packet, { struct fw_request *request = container_of(packet, struct fw_request, response); - trace_async_response_outbound_complete((uintptr_t)request, packet->generation, + trace_async_response_outbound_complete((uintptr_t)request, card->index, packet->generation, packet->speed, status, packet->timestamp); // Decrease the reference count since not at in-flight. diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index d695a560673f..ca6ea9bd1eba 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -71,10 +71,11 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem. DECLARE_EVENT_CLASS(async_outbound_complete_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __field(u8, status) @@ -82,14 +83,16 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; __entry->status = status; __entry->timestamp = timestamp; ), TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, @@ -144,8 +147,8 @@ DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate, ); DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp) ); DEFINE_EVENT(async_inbound_template, async_response_inbound, @@ -194,8 +197,8 @@ DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_ini ); DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), - TP_ARGS(transaction, generation, scode, status, timestamp) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp) ); #undef ASYNC_HEADER_GET_DESTINATION -- cgit v1.2.3 From 64e02b64fb1d832a9a5254055a829db64750421a Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:34 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from async_outbound_initiate_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-3-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 10 ++++++---- include/trace/events/firewire.h | 20 ++++++++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index de75e758fd07..3f9361d15607 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -398,7 +398,8 @@ void __fw_send_request(struct fw_card *card, struct fw_transaction *t, int tcode spin_unlock_irqrestore(&card->lock, flags); - trace_async_request_outbound_initiate((uintptr_t)t, generation, speed, t->packet.header, payload, + trace_async_request_outbound_initiate((uintptr_t)t, card->index, generation, speed, + t->packet.header, payload, tcode_is_read_request(tcode) ? 0 : length / 4); card->driver->send_request(card, &t->packet); @@ -879,9 +880,10 @@ void fw_send_response(struct fw_card *card, // Increase the reference count so that the object is kept during in-flight. fw_request_get(request); - trace_async_response_outbound_initiate((uintptr_t)request, request->response.generation, - request->response.speed, request->response.header, - data, data ? data_length / 4 : 0); + trace_async_response_outbound_initiate((uintptr_t)request, card->index, + request->response.generation, request->response.speed, + request->response.header, data, + data ? data_length / 4 : 0); card->driver->send_response(card, &request->response); } diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index ca6ea9bd1eba..a3d9916cbad1 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -36,10 +36,11 @@ #define QUADLET_SIZE 4 DECLARE_EVENT_CLASS(async_outbound_initiate_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __array(u32, header, ASYNC_HEADER_QUADLET_COUNT) @@ -47,6 +48,7 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; memcpy(__entry->header, header, QUADLET_SIZE * ASYNC_HEADER_QUADLET_COUNT); @@ -54,8 +56,9 @@ DECLARE_EVENT_CLASS(async_outbound_initiate_template, ), // This format is for the request subaction. TP_printk( - "transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, ASYNC_HEADER_GET_DESTINATION(__entry->header), @@ -142,8 +145,8 @@ DECLARE_EVENT_CLASS(async_inbound_template, ); DEFINE_EVENT(async_outbound_initiate_template, async_request_outbound_initiate, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count) ); DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, @@ -178,11 +181,12 @@ DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound, ); DEFINE_EVENT_PRINT(async_outbound_initiate_template, async_response_outbound_initiate, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, header, data, data_count), TP_printk( - "transaction=0x%llx generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, ASYNC_HEADER_GET_DESTINATION(__entry->header), -- cgit v1.2.3 From 65ec7ebefe7de01281cce1f552ebd4dd00386665 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:35 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from async_inbound_template The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-4-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 8 ++++---- include/trace/events/firewire.h | 20 ++++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 3f9361d15607..3503c238f8ae 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -1009,8 +1009,8 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) return; } - trace_async_request_inbound((uintptr_t)request, p->generation, p->speed, p->ack, - p->timestamp, p->header, request->data, + trace_async_request_inbound((uintptr_t)request, card->index, p->generation, p->speed, + p->ack, p->timestamp, p->header, request->data, tcode_is_read_request(tcode) ? 0 : request->length / 4); offset = async_header_get_offset(p->header); @@ -1080,8 +1080,8 @@ void fw_core_handle_response(struct fw_card *card, struct fw_packet *p) } spin_unlock_irqrestore(&card->lock, flags); - trace_async_response_inbound((uintptr_t)t, p->generation, p->speed, p->ack, p->timestamp, - p->header, data, data_length / 4); + trace_async_response_inbound((uintptr_t)t, card->index, p->generation, p->speed, p->ack, + p->timestamp, p->header, data, data_length / 4); if (!t) { timed_out: diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index a3d9916cbad1..b72f613cfa02 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -105,10 +105,11 @@ DECLARE_EVENT_CLASS(async_outbound_complete_template, // The value of status is one of ack codes and rcodes specific to Linux FireWire subsystem. DECLARE_EVENT_CLASS(async_inbound_template, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count), TP_STRUCT__entry( __field(u64, transaction) + __field(u8, card_index) __field(u8, generation) __field(u8, scode) __field(u8, status) @@ -118,6 +119,7 @@ DECLARE_EVENT_CLASS(async_inbound_template, ), TP_fast_assign( __entry->transaction = transaction; + __entry->card_index = card_index; __entry->generation = generation; __entry->scode = scode; __entry->status = status; @@ -127,8 +129,9 @@ DECLARE_EVENT_CLASS(async_inbound_template, ), // This format is for the response subaction. TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x rcode=%u header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, @@ -155,16 +158,17 @@ DEFINE_EVENT(async_outbound_complete_template, async_request_outbound_complete, ); DEFINE_EVENT(async_inbound_template, async_response_inbound, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count) + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count) ); DEFINE_EVENT_PRINT(async_inbound_template, async_request_inbound, - TP_PROTO(u64 transaction, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), - TP_ARGS(transaction, generation, scode, status, timestamp, header, data, data_count), + TP_PROTO(u64 transaction, unsigned int card_index, unsigned int generation, unsigned int scode, unsigned int status, unsigned int timestamp, const u32 *header, const u32 *data, unsigned int data_count), + TP_ARGS(transaction, card_index, generation, scode, status, timestamp, header, data, data_count), TP_printk( - "transaction=0x%llx generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", + "transaction=0x%llx card_index=%u generation=%u scode=%u status=%u timestamp=0x%04x dst_id=0x%04x tlabel=%u tcode=%u src_id=0x%04x offset=0x%012llx header=%s data=%s", __entry->transaction, + __entry->card_index, __entry->generation, __entry->scode, __entry->status, -- cgit v1.2.3 From 3cb44a72a39835b368ab78d739819330089aa2bf Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:36 +0900 Subject: firewire: core: record card index in async_phy_outbound_initiate tracepoints event The asynchronous transaction is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-5-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-cdev.c | 4 ++-- drivers/firewire/core-transaction.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 3 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index 55993c9e0b90..ff8739f96af5 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1659,8 +1659,8 @@ static int ioctl_send_phy_packet(struct client *client, union ioctl_arg *arg) memcpy(pp->data, a->data, sizeof(a->data)); } - trace_async_phy_outbound_initiate((uintptr_t)&e->p, e->p.generation, e->p.header[1], - e->p.header[2]); + trace_async_phy_outbound_initiate((uintptr_t)&e->p, card->index, e->p.generation, + e->p.header[1], e->p.header[2]); card->driver->send_request(card, &e->p); diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 3503c238f8ae..e522dc3d9897 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -504,7 +504,7 @@ void fw_send_phy_config(struct fw_card *card, phy_config_packet.generation = generation; reinit_completion(&phy_config_done); - trace_async_phy_outbound_initiate((uintptr_t)&phy_config_packet, + trace_async_phy_outbound_initiate((uintptr_t)&phy_config_packet, card->index, phy_config_packet.generation, phy_config_packet.header[1], phy_config_packet.header[2]); diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index b72f613cfa02..a59dc26b2a53 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -217,23 +217,26 @@ DEFINE_EVENT(async_outbound_complete_template, async_response_outbound_complete, #undef ASYNC_HEADER_GET_RCODE TRACE_EVENT(async_phy_outbound_initiate, - TP_PROTO(u64 packet, unsigned int generation, u32 first_quadlet, u32 second_quadlet), - TP_ARGS(packet, generation, first_quadlet, second_quadlet), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, u32 first_quadlet, u32 second_quadlet), + TP_ARGS(packet, card_index, generation, first_quadlet, second_quadlet), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u32, first_quadlet) __field(u32, second_quadlet) ), TP_fast_assign( __entry->packet = packet; + __entry->card_index = card_index; __entry->generation = generation; __entry->first_quadlet = first_quadlet; __entry->second_quadlet = second_quadlet ), TP_printk( - "packet=0x%llx generation=%u first_quadlet=0x%08x second_quadlet=0x%08x", + "packet=0x%llx card_index=%u generation=%u first_quadlet=0x%08x second_quadlet=0x%08x", __entry->packet, + __entry->card_index, __entry->generation, __entry->first_quadlet, __entry->second_quadlet -- cgit v1.2.3 From 810f2aa83563020d834425018303f2aaecef1e6e Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:37 +0900 Subject: firewire: core: record card index in async_phy_outbound_complete tracepoints event The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-6-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-cdev.c | 2 +- drivers/firewire/core-transaction.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index ff8739f96af5..9a7dc90330a3 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -1559,7 +1559,7 @@ static void outbound_phy_packet_callback(struct fw_packet *packet, struct client *e_client = e->client; u32 rcode; - trace_async_phy_outbound_complete((uintptr_t)packet, status, packet->generation, + trace_async_phy_outbound_complete((uintptr_t)packet, card->index, status, packet->generation, packet->timestamp); switch (status) { diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index e522dc3d9897..bd5a467cfd60 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -464,7 +464,7 @@ static DECLARE_COMPLETION(phy_config_done); static void transmit_phy_packet_callback(struct fw_packet *packet, struct fw_card *card, int status) { - trace_async_phy_outbound_complete((uintptr_t)packet, packet->generation, status, + trace_async_phy_outbound_complete((uintptr_t)packet, card->index, packet->generation, status, packet->timestamp); complete(&phy_config_done); } diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index a59dc26b2a53..61c7a2461fbc 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -244,23 +244,26 @@ TRACE_EVENT(async_phy_outbound_initiate, ); TRACE_EVENT(async_phy_outbound_complete, - TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp), - TP_ARGS(packet, generation, status, timestamp), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp), + TP_ARGS(packet, card_index, generation, status, timestamp), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u8, status) __field(u16, timestamp) ), TP_fast_assign( __entry->packet = packet; + __entry->card_index = card_index; __entry->generation = generation; __entry->status = status; __entry->timestamp = timestamp; ), TP_printk( - "packet=0x%llx generation=%u status=%u timestamp=0x%04x", + "packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x", __entry->packet, + __entry->card_index, __entry->generation, __entry->status, __entry->timestamp -- cgit v1.2.3 From abbb4bd96d7f871b10bd7058f7284ffaf4e8257f Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:38 +0900 Subject: firewire: core: record card index in async_phy_inbound tracepoints event The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-7-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-transaction.c | 2 +- include/trace/events/firewire.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index bd5a467cfd60..76ab6a209768 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -997,7 +997,7 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p) tcode = async_header_get_tcode(p->header); if (tcode_is_link_internal(tcode)) { - trace_async_phy_inbound((uintptr_t)p, p->generation, p->ack, p->timestamp, + trace_async_phy_inbound((uintptr_t)p, card->index, p->generation, p->ack, p->timestamp, p->header[1], p->header[2]); fw_cdev_handle_phy_packet(card, p); return; diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 61c7a2461fbc..e5524fc71880 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -271,10 +271,11 @@ TRACE_EVENT(async_phy_outbound_complete, ); TRACE_EVENT(async_phy_inbound, - TP_PROTO(u64 packet, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet), - TP_ARGS(packet, generation, status, timestamp, first_quadlet, second_quadlet), + TP_PROTO(u64 packet, unsigned int card_index, unsigned int generation, unsigned int status, unsigned int timestamp, u32 first_quadlet, u32 second_quadlet), + TP_ARGS(packet, card_index, generation, status, timestamp, first_quadlet, second_quadlet), TP_STRUCT__entry( __field(u64, packet) + __field(u8, card_index) __field(u8, generation) __field(u8, status) __field(u16, timestamp) @@ -290,8 +291,9 @@ TRACE_EVENT(async_phy_inbound, __entry->second_quadlet = second_quadlet ), TP_printk( - "packet=0x%llx generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x", + "packet=0x%llx card_index=%u generation=%u status=%u timestamp=0x%04x first_quadlet=0x%08x second_quadlet=0x%08x", __entry->packet, + __entry->card_index, __entry->generation, __entry->status, __entry->timestamp, -- cgit v1.2.3 From 7507dbc46b780e9fa2ec3d4db7cd9ce07e722927 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:39 +0900 Subject: firewire: core: record card index in tracepoinrts events derived from bus_reset_arrange_template The asynchronous transmission of phy packet is initiated on one of 1394 OHCI controller, however the existing tracepoints events has the lack of data about it. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-8-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-card.c | 6 +++--- include/trace/events/firewire.h | 21 ++++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c index 127d87e3a153..f8b99dd6cd82 100644 --- a/drivers/firewire/core-card.c +++ b/drivers/firewire/core-card.c @@ -222,14 +222,14 @@ static int reset_bus(struct fw_card *card, bool short_reset) int reg = short_reset ? 5 : 1; int bit = short_reset ? PHY_BUS_SHORT_RESET : PHY_BUS_RESET; - trace_bus_reset_initiate(card->generation, short_reset); + trace_bus_reset_initiate(card->index, card->generation, short_reset); return card->driver->update_phy_reg(card, reg, 0, bit); } void fw_schedule_bus_reset(struct fw_card *card, bool delayed, bool short_reset) { - trace_bus_reset_schedule(card->generation, short_reset); + trace_bus_reset_schedule(card->index, card->generation, short_reset); /* We don't try hard to sort out requests of long vs. short resets. */ card->br_short = short_reset; @@ -249,7 +249,7 @@ static void br_work(struct work_struct *work) /* Delay for 2s after last reset per IEEE 1394 clause 8.2.1. */ if (card->reset_jiffies != 0 && time_before64(get_jiffies_64(), card->reset_jiffies + 2 * HZ)) { - trace_bus_reset_postpone(card->generation, card->br_short); + trace_bus_reset_postpone(card->index, card->generation, card->br_short); if (!queue_delayed_work(fw_workqueue, &card->br_work, 2 * HZ)) fw_card_put(card); diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index e5524fc71880..e6485051f546 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -303,36 +303,39 @@ TRACE_EVENT(async_phy_inbound, ); DECLARE_EVENT_CLASS(bus_reset_arrange_template, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset), + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __field(bool, short_reset) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; __entry->short_reset = short_reset; ), TP_printk( - "generation=%u short_reset=%s", + "card_index=%u generation=%u short_reset=%s", + __entry->card_index, __entry->generation, __entry->short_reset ? "true" : "false" ) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_initiate, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_schedule, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone, - TP_PROTO(unsigned int generation, bool short_reset), - TP_ARGS(generation, short_reset) + TP_PROTO(unsigned int card_index, unsigned int generation, bool short_reset), + TP_ARGS(card_index, generation, short_reset) ); TRACE_EVENT(bus_reset_handle, -- cgit v1.2.3 From 893098b2af3ea12bab2f505aa825662b379df67d Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 13 Jun 2024 22:14:40 +0900 Subject: firewire: core: record card index in bus_reset_handle tracepoints event The bus reset event occurs in the bus managed by one of 1394 OHCI controller in Linux system, however the existing tracepoints events has the lack of data about it to distinguish the issued hardware from the others. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240613131440.431766-9-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-topology.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 837cc44d8d9f..8107eebd4296 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -508,7 +508,7 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, struct fw_node *local_node; unsigned long flags; - trace_bus_reset_handle(generation, node_id, bm_abdicate, self_ids, self_id_count); + trace_bus_reset_handle(card->index, generation, node_id, bm_abdicate, self_ids, self_id_count); spin_lock_irqsave(&card->lock, flags); diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index e6485051f546..5ccc0d91b220 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -339,22 +339,25 @@ DEFINE_EVENT(bus_reset_arrange_template, bus_reset_postpone, ); TRACE_EVENT(bus_reset_handle, - TP_PROTO(unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count), - TP_ARGS(generation, node_id, bm_abdicate, self_ids, self_id_count), + TP_PROTO(unsigned int card_index, unsigned int generation, unsigned int node_id, bool bm_abdicate, u32 *self_ids, unsigned int self_id_count), + TP_ARGS(card_index, generation, node_id, bm_abdicate, self_ids, self_id_count), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __field(u8, node_id) __field(bool, bm_abdicate) __dynamic_array(u32, self_ids, self_id_count) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; __entry->node_id = node_id; __entry->bm_abdicate = bm_abdicate; memcpy(__get_dynamic_array(self_ids), self_ids, __get_dynamic_array_len(self_ids)); ), TP_printk( - "generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s", + "card_index=%u generation=%u node_id=0x%04x bm_abdicate=%s self_ids=%s", + __entry->card_index, __entry->generation, __entry->node_id, __entry->bm_abdicate ? "true" : "false", -- cgit v1.2.3 From 384a746bb55960aa5ffb3a67de08f11fc2f51042 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 5 Jun 2024 11:17:10 +0200 Subject: Revert "mm: init_mlocked_on_free_v3" There was insufficient review and no agreement that this is the right approach. There are serious flaws with the implementation that make processes using mlock() not even work with simple fork() [1] and we get reliable crashes when rebooting. Further, simply because we might be unmapping a single PTE of a large mlocked folio, we shouldn't zero out the whole folio. ... especially because the code can also *corrupt* urelated memory because kernel_init_pages(page, folio_nr_pages(folio)); Could end up writing outside of the actual folio if we work with a tail page. Let's revert it. Once there is agreement that this is the right approach, the issues were fixed and there was reasonable review and proper testing, we can consider it again. [1] https://lkml.kernel.org/r/4da9da2f-73e4-45fd-b62f-a8a513314057@redhat.com Link: https://lkml.kernel.org/r/20240605091710.38961-1-david@redhat.com Fixes: ba42b524a040 ("mm: init_mlocked_on_free_v3") Signed-off-by: David Hildenbrand Reported-by: David Wang <00107082@163.com> Closes: https://lore.kernel.org/lkml/20240528151340.4282-1-00107082@163.com/ Reported-by: Lance Yang Closes: https://lkml.kernel.org/r/20240601140917.43562-1-ioworker0@gmail.com Acked-by: Lance Yang Cc: York Jasper Niebuhr Cc: Matthew Wilcox (Oracle) Cc: Kees Cook Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 6 ---- include/linux/mm.h | 9 +----- mm/internal.h | 1 - mm/memory.c | 6 ---- mm/mm_init.c | 43 ++++--------------------- mm/page_alloc.c | 2 +- security/Kconfig.hardening | 15 --------- 7 files changed, 9 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b600df82669d..11e57ba2985c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2192,12 +2192,6 @@ Format: 0 | 1 Default set by CONFIG_INIT_ON_FREE_DEFAULT_ON. - init_mlocked_on_free= [MM] Fill freed userspace memory with zeroes if - it was mlock'ed and not explicitly munlock'ed - afterwards. - Format: 0 | 1 - Default set by CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON - init_pkru= [X86] Specify the default memory protection keys rights register contents for all processes. 0x55555554 by default (disallow access to all but pkey 0). Can diff --git a/include/linux/mm.h b/include/linux/mm.h index 9849dfda44d4..9a5652c5fadd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3776,14 +3776,7 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free); -} - -DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -static inline bool want_init_mlocked_on_free(void) -{ - return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, - &init_mlocked_on_free); + &init_on_free); } extern bool _debug_pagealloc_enabled_early; diff --git a/mm/internal.h b/mm/internal.h index b2c75b12014e..c72c306761a4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -588,7 +588,6 @@ extern void __putback_isolated_page(struct page *page, unsigned int order, extern void memblock_free_pages(struct page *page, unsigned long pfn, unsigned int order); extern void __free_pages_core(struct page *page, unsigned int order); -extern void kernel_init_pages(struct page *page, int numpages); /* * This will have no effect, other than possibly generating a warning, if the diff --git a/mm/memory.c b/mm/memory.c index 0f47a533014e..2bc8032a30a2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1507,12 +1507,6 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb, if (unlikely(folio_mapcount(folio) < 0)) print_bad_pte(vma, addr, ptent, page); } - - if (want_init_mlocked_on_free() && folio_test_mlocked(folio) && - !delay_rmap && folio_test_anon(folio)) { - kernel_init_pages(page, folio_nr_pages(folio)); - } - if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) { *force_flush = true; *force_break = true; diff --git a/mm/mm_init.c b/mm/mm_init.c index f72b852bd5b8..3ec04933f7fd 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -2523,9 +2523,6 @@ EXPORT_SYMBOL(init_on_alloc); DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); EXPORT_SYMBOL(init_on_free); -DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); -EXPORT_SYMBOL(init_mlocked_on_free); - static bool _init_on_alloc_enabled_early __read_mostly = IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON); static int __init early_init_on_alloc(char *buf) @@ -2543,14 +2540,6 @@ static int __init early_init_on_free(char *buf) } early_param("init_on_free", early_init_on_free); -static bool _init_mlocked_on_free_enabled_early __read_mostly - = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON); -static int __init early_init_mlocked_on_free(char *buf) -{ - return kstrtobool(buf, &_init_mlocked_on_free_enabled_early); -} -early_param("init_mlocked_on_free", early_init_mlocked_on_free); - DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled); /* @@ -2578,21 +2567,12 @@ static void __init mem_debugging_and_hardening_init(void) } #endif - if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early || - _init_mlocked_on_free_enabled_early) && + if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) && page_poisoning_requested) { pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, " - "will take precedence over init_on_alloc, init_on_free " - "and init_mlocked_on_free\n"); + "will take precedence over init_on_alloc and init_on_free\n"); _init_on_alloc_enabled_early = false; _init_on_free_enabled_early = false; - _init_mlocked_on_free_enabled_early = false; - } - - if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) { - pr_info("mem auto-init: init_on_free is on, " - "will take precedence over init_mlocked_on_free\n"); - _init_mlocked_on_free_enabled_early = false; } if (_init_on_alloc_enabled_early) { @@ -2609,17 +2589,9 @@ static void __init mem_debugging_and_hardening_init(void) static_branch_disable(&init_on_free); } - if (_init_mlocked_on_free_enabled_early) { - want_check_pages = true; - static_branch_enable(&init_mlocked_on_free); - } else { - static_branch_disable(&init_mlocked_on_free); - } - - if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early || - _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early)) - pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and " - "init_mlocked_on_free are disabled when running KMSAN\n"); + if (IS_ENABLED(CONFIG_KMSAN) && + (_init_on_alloc_enabled_early || _init_on_free_enabled_early)) + pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n"); #ifdef CONFIG_DEBUG_PAGEALLOC if (debug_pagealloc_enabled()) { @@ -2658,10 +2630,9 @@ static void __init report_meminit(void) else stack = "off"; - pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n", + pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n", stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off", - want_init_on_free() ? "on" : "off", - want_init_mlocked_on_free() ? "on" : "off"); + want_init_on_free() ? "on" : "off"); if (want_init_on_free()) pr_info("mem auto-init: clearing system memory may take some time...\n"); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 222299b5c0e6..7300aa9f14b0 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1016,7 +1016,7 @@ static inline bool should_skip_kasan_poison(struct page *page) return page_kasan_tag(page) == KASAN_TAG_KERNEL; } -void kernel_init_pages(struct page *page, int numpages) +static void kernel_init_pages(struct page *page, int numpages) { int i; diff --git a/security/Kconfig.hardening b/security/Kconfig.hardening index effbf5982be1..2cff851ebfd7 100644 --- a/security/Kconfig.hardening +++ b/security/Kconfig.hardening @@ -255,21 +255,6 @@ config INIT_ON_FREE_DEFAULT_ON touching "cold" memory areas. Most cases see 3-5% impact. Some synthetic workloads have measured as high as 8%. -config INIT_MLOCKED_ON_FREE_DEFAULT_ON - bool "Enable mlocked memory zeroing on free" - depends on !KMSAN - help - This config has the effect of setting "init_mlocked_on_free=1" - on the kernel command line. If it is enabled, all mlocked process - memory is zeroed when freed. This restriction to mlocked memory - improves performance over "init_on_free" but can still be used to - protect confidential data like key material from content exposures - to other processes, as well as live forensics and cold boot attacks. - Any non-mlocked memory is not cleared before it is reassigned. This - configuration can be overwritten by setting "init_mlocked_on_free=0" - on the command line. The "init_on_free" boot option takes - precedence over "init_mlocked_on_free". - config CC_HAS_ZERO_CALL_USED_REGS def_bool $(cc-option,-fzero-call-used-regs=used-gpr) # https://github.com/ClangBuiltLinux/linux/issues/1766 -- cgit v1.2.3 From a273559e9eb68cb58c57803d76a1622b8324a878 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Sat, 1 Jun 2024 16:38:40 -0700 Subject: lib/alloc_tag: fix RCU imbalance in pgalloc_tag_get() put_page_tag_ref() should be called only when get_page_tag_ref() returns a valid reference because only in that case get_page_tag_ref() enters RCU read section while put_page_tag_ref() will call rcu_read_unlock() even if the provided reference is NULL. Fix pgalloc_tag_get() which does not follow this rule causing RCU imbalance. Add a warning in put_page_tag_ref() to catch any future mistakes. Link: https://lkml.kernel.org/r/20240601233840.617458-1-surenb@google.com Fixes: cc92eba1c88b ("mm: fix non-compound multi-order memory accounting in __free_pages") Signed-off-by: Suren Baghdasaryan Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202405271029.6d2f9c4c-lkp@intel.com Acked-by: Vlastimil Babka Cc: Kent Overstreet Cc: Kees Cook Cc: Pasha Tatashin Signed-off-by: Andrew Morton --- include/linux/pgalloc_tag.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 86ba5d33e43b..9cacadbd61f8 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -37,6 +37,9 @@ static inline union codetag_ref *get_page_tag_ref(struct page *page) static inline void put_page_tag_ref(union codetag_ref *ref) { + if (WARN_ON(!ref)) + return; + page_ext_put(page_ext_from_codetag_ref(ref)); } @@ -102,9 +105,11 @@ static inline struct alloc_tag *pgalloc_tag_get(struct page *page) union codetag_ref *ref = get_page_tag_ref(page); alloc_tag_sub_check(ref); - if (ref && ref->ct) - tag = ct_to_alloc_tag(ref->ct); - put_page_tag_ref(ref); + if (ref) { + if (ref->ct) + tag = ct_to_alloc_tag(ref->ct); + put_page_tag_ref(ref); + } } return tag; -- cgit v1.2.3 From 6a50c9b512f7734bc356f4bd47885a6f7c98491a Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Fri, 7 Jun 2024 17:40:48 +0800 Subject: mm: huge_memory: fix misused mapping_large_folio_support() for anon folios When I did a large folios split test, a WARNING "[ 5059.122759][ T166] Cannot split file folio to non-0 order" was triggered. But the test cases are only for anonmous folios. while mapping_large_folio_support() is only reasonable for page cache folios. In split_huge_page_to_list_to_order(), the folio passed to mapping_large_folio_support() maybe anonmous folio. The folio_test_anon() check is missing. So the split of the anonmous THP is failed. This is also the same for shmem_mapping(). We'd better add a check for both. But the shmem_mapping() in __split_huge_page() is not involved, as for anonmous folios, the end parameter is set to -1, so (head[i].index >= end) is always false. shmem_mapping() is not called. Also add a VM_WARN_ON_ONCE() in mapping_large_folio_support() for anon mapping, So we can detect the wrong use more easily. THP folios maybe exist in the pagecache even the file system doesn't support large folio, it is because when CONFIG_TRANSPARENT_HUGEPAGE is enabled, khugepaged will try to collapse read-only file-backed pages to THP. But the mapping does not actually support multi order large folios properly. Using /sys/kernel/debug/split_huge_pages to verify this, with this patch, large anon THP is successfully split and the warning is ceased. Link: https://lkml.kernel.org/r/202406071740485174hcFl7jRxncsHDtI-Pz-o@zte.com.cn Fixes: c010d47f107f ("mm: thp: split huge page to any lower order pages") Reviewed-by: Barry Song Reviewed-by: Zi Yan Acked-by: David Hildenbrand Signed-off-by: Ran Xiaokai Cc: Michal Hocko Cc: xu xin Cc: Yang Yang Cc: Signed-off-by: Andrew Morton --- include/linux/pagemap.h | 4 ++++ mm/huge_memory.c | 28 +++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index ee633712bba0..59f1df0cde5a 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -381,6 +381,10 @@ static inline void mapping_set_large_folios(struct address_space *mapping) */ static inline bool mapping_large_folio_support(struct address_space *mapping) { + /* AS_LARGE_FOLIO_SUPPORT is only reasonable for pagecache folios */ + VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, + "Anonymous mapping always supports large folio"); + return IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && test_bit(AS_LARGE_FOLIO_SUPPORT, &mapping->flags); } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 89932fd0f62e..db7946a0a28c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -3009,30 +3009,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list, if (new_order >= folio_order(folio)) return -EINVAL; - /* Cannot split anonymous THP to order-1 */ - if (new_order == 1 && folio_test_anon(folio)) { - VM_WARN_ONCE(1, "Cannot split to order-1 folio"); - return -EINVAL; - } - - if (new_order) { - /* Only swapping a whole PMD-mapped folio is supported */ - if (folio_test_swapcache(folio)) + if (folio_test_anon(folio)) { + /* order-1 is not supported for anonymous THP. */ + if (new_order == 1) { + VM_WARN_ONCE(1, "Cannot split to order-1 folio"); return -EINVAL; + } + } else if (new_order) { /* Split shmem folio to non-zero order not supported */ if (shmem_mapping(folio->mapping)) { VM_WARN_ONCE(1, "Cannot split shmem folio to non-0 order"); return -EINVAL; } - /* No split if the file system does not support large folio */ - if (!mapping_large_folio_support(folio->mapping)) { + /* + * No split if the file system does not support large folio. + * Note that we might still have THPs in such mappings due to + * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping + * does not actually support large folios properly. + */ + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && + !mapping_large_folio_support(folio->mapping)) { VM_WARN_ONCE(1, "Cannot split file folio to non-0 order"); return -EINVAL; } } + /* Only swapping a whole PMD-mapped folio is supported */ + if (folio_test_swapcache(folio) && new_order) + return -EINVAL; is_hzp = is_huge_zero_folio(folio); if (is_hzp) { -- cgit v1.2.3 From 01c8f9806bde438ca1c8cbbc439f0a14a6694f6c Mon Sep 17 00:00:00 2001 From: Aleksandr Nogikh Date: Tue, 11 Jun 2024 15:32:29 +0200 Subject: kcov: don't lose track of remote references during softirqs In kcov_remote_start()/kcov_remote_stop(), we swap the previous KCOV metadata of the current task into a per-CPU variable. However, the kcov_mode_enabled(mode) check is not sufficient in the case of remote KCOV coverage: current->kcov_mode always remains KCOV_MODE_DISABLED for remote KCOV objects. If the original task that has invoked the KCOV_REMOTE_ENABLE ioctl happens to get interrupted and kcov_remote_start() is called, it ultimately leads to kcov_remote_stop() NOT restoring the original KCOV reference. So when the task exits, all registered remote KCOV handles remain active forever. The most uncomfortable effect (at least for syzkaller) is that the bug prevents the reuse of the same /sys/kernel/debug/kcov descriptor. If we obtain it in the parent process and then e.g. drop some capabilities and continuously fork to execute individual programs, at some point current->kcov of the forked process is lost, kcov_task_exit() takes no action, and all KCOV_REMOTE_ENABLE ioctls calls from subsequent forks fail. And, yes, the efficiency is also affected if we keep on losing remote kcov objects. a) kcov_remote_map keeps on growing forever. b) (If I'm not mistaken), we're also not freeing the memory referenced by kcov->area. Fix it by introducing a special kcov_mode that is assigned to the task that owns a KCOV remote object. It makes kcov_mode_enabled() return true and yet does not trigger coverage collection in __sanitizer_cov_trace_pc() and write_comp_data(). [nogikh@google.com: replace WRITE_ONCE() with an ordinary assignment] Link: https://lkml.kernel.org/r/20240614171221.2837584-1-nogikh@google.com Link: https://lkml.kernel.org/r/20240611133229.527822-1-nogikh@google.com Fixes: 5ff3b30ab57d ("kcov: collect coverage from interrupts") Signed-off-by: Aleksandr Nogikh Reviewed-by: Dmitry Vyukov Reviewed-by: Andrey Konovalov Tested-by: Andrey Konovalov Cc: Alexander Potapenko Cc: Arnd Bergmann Cc: Marco Elver Cc: Signed-off-by: Andrew Morton --- include/linux/kcov.h | 2 ++ kernel/kcov.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index b851ba415e03..3b479a3d235a 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -21,6 +21,8 @@ enum kcov_mode { KCOV_MODE_TRACE_PC = 2, /* Collecting comparison operands mode. */ KCOV_MODE_TRACE_CMP = 3, + /* The process owns a KCOV remote reference. */ + KCOV_MODE_REMOTE = 4, }; #define KCOV_IN_CTXSW (1 << 30) diff --git a/kernel/kcov.c b/kernel/kcov.c index c3124f6d5536..f0a69d402066 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -632,6 +632,7 @@ static int kcov_ioctl_locked(struct kcov *kcov, unsigned int cmd, return -EINVAL; kcov->mode = mode; t->kcov = kcov; + t->kcov_mode = KCOV_MODE_REMOTE; kcov->t = t; kcov->remote = true; kcov->remote_size = remote_arg->area_size; -- cgit v1.2.3 From 4604b3888f614a353c7afa17bdc8e7393bb1f9a1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 Jun 2024 21:51:33 +0300 Subject: hwrng: core - Remove list.h from the hw_random.h The 'struct list' type is defined in types.h, no need to include list.h for that. Signed-off-by: Andy Shevchenko Signed-off-by: Herbert Xu --- include/linux/hw_random.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h index 136e9842120e..b424555753b1 100644 --- a/include/linux/hw_random.h +++ b/include/linux/hw_random.h @@ -13,9 +13,8 @@ #define LINUX_HWRANDOM_H_ #include -#include -#include #include +#include /** * struct hwrng - Hardware Random Number Generator driver -- cgit v1.2.3 From 0eb3bed57a06a20c612012127f4405d48fa4a50f Mon Sep 17 00:00:00 2001 From: Stefan Berger Date: Fri, 7 Jun 2024 18:34:17 -0400 Subject: crypto: ecc - Add comment to ecc_digits_from_bytes about input byte array Add comment to ecc_digits_from_bytes kdoc that the first byte is expected to hold the most significant bits of the large integer that is converted into an array of digits. Signed-off-by: Stefan Berger Signed-off-by: Herbert Xu --- include/crypto/internal/ecc.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/crypto/internal/ecc.h b/include/crypto/internal/ecc.h index f7e75e1e71f3..0717a53ae732 100644 --- a/include/crypto/internal/ecc.h +++ b/include/crypto/internal/ecc.h @@ -63,6 +63,9 @@ static inline void ecc_swap_digits(const void *in, u64 *out, unsigned int ndigit * @nbytes Size of input byte array * @out Output digits array * @ndigits: Number of digits to create from byte array + * + * The first byte in the input byte array is expected to hold the most + * significant bits of the large integer. */ void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes, u64 *out, unsigned int ndigits); -- cgit v1.2.3 From 8043832e2a123fd9372007a29192f2f3ba328cd6 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (IBM)" Date: Fri, 14 Jun 2024 11:05:43 +0300 Subject: memblock: use numa_valid_node() helper to check for invalid node ID Introduce numa_valid_node(nid) that verifies that nid is a valid node ID and use that instead of comparing nid parameter with either NUMA_NO_NODE or MAX_NUMNODES. This makes the checks for valid node IDs consistent and more robust and allows to get rid of multiple WARNings. Suggested-by: Linus Torvalds Signed-off-by: Mike Rapoport (IBM) --- include/linux/numa.h | 5 +++++ mm/memblock.c | 28 +++++++--------------------- 2 files changed, 12 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/numa.h b/include/linux/numa.h index 1d43371fafd2..eb19503604fe 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -15,6 +15,11 @@ #define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) +static inline bool numa_valid_node(int nid) +{ + return nid >= 0 && nid < MAX_NUMNODES; +} + /* optionally keep NUMA memory info available post init */ #ifdef CONFIG_NUMA_KEEP_MEMINFO #define __initdata_or_meminfo diff --git a/mm/memblock.c b/mm/memblock.c index 08e9806b1cf9..e81fb68f7f88 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -754,7 +754,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt /* calculate lose page */ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) { - if (nid == NUMA_NO_NODE) + if (!numa_valid_node(nid)) nr_pages += end_pfn - start_pfn; } @@ -1061,7 +1061,7 @@ static bool should_skip_region(struct memblock_type *type, return false; /* only memory regions are associated with nodes, check it */ - if (nid != NUMA_NO_NODE && nid != m_nid) + if (numa_valid_node(nid) && nid != m_nid) return true; /* skip hotpluggable memory regions if needed */ @@ -1118,10 +1118,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - for (; idx_a < type_a->cnt; idx_a++) { struct memblock_region *m = &type_a->regions[idx_a]; @@ -1215,9 +1211,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid, int idx_a = *idx & 0xffffffff; int idx_b = *idx >> 32; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (*idx == (u64)ULLONG_MAX) { idx_a = type_a->cnt - 1; if (type_b != NULL) @@ -1303,7 +1296,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size)) continue; - if (nid == MAX_NUMNODES || nid == r_nid) + if (!numa_valid_node(nid) || nid == r_nid) break; } if (*idx >= type->cnt) { @@ -1339,10 +1332,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size, int start_rgn, end_rgn; int i, ret; - if (WARN_ONCE(nid == MAX_NUMNODES, - "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn); if (ret) return ret; @@ -1452,9 +1441,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size, enum memblock_flags flags = choose_memblock_flags(); phys_addr_t found; - if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n")) - nid = NUMA_NO_NODE; - if (!align) { /* Can't use WARNs this early in boot on powerpc */ dump_stack(); @@ -1467,7 +1453,7 @@ again: if (found && !memblock_reserve(found, size)) goto done; - if (nid != NUMA_NO_NODE && !exact_nid) { + if (numa_valid_node(nid) && !exact_nid) { found = memblock_find_in_range_node(size, align, start, end, NUMA_NO_NODE, flags); @@ -1987,7 +1973,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) end = base + size - 1; flags = rgn->flags; #ifdef CONFIG_NUMA - if (memblock_get_region_node(rgn) != MAX_NUMNODES) + if (numa_valid_node(memblock_get_region_node(rgn))) snprintf(nid_buf, sizeof(nid_buf), " on node %d", memblock_get_region_node(rgn)); #endif @@ -2181,7 +2167,7 @@ static void __init memmap_init_reserved_pages(void) start = region->base; end = start + region->size; - if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES) + if (!numa_valid_node(nid)) nid = early_pfn_to_nid(PFN_DOWN(start)); reserve_bootmem_region(start, end, nid); @@ -2272,7 +2258,7 @@ static int memblock_debug_show(struct seq_file *m, void *private) seq_printf(m, "%4d: ", i); seq_printf(m, "%pa..%pa ", ®->base, &end); - if (nid != MAX_NUMNODES) + if (numa_valid_node(nid)) seq_printf(m, "%4d ", nid); else seq_printf(m, "%4c ", 'x'); -- cgit v1.2.3 From d98995b4bf981519dde4af0a081c393d62474039 Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Mon, 3 Jun 2024 13:26:37 +0300 Subject: net/mlx5: Reimplement write combining test The test of write combining was added before in mlx5_ib driver. It opens UD QP and posts NOP WQEs, and uses BlueFlame doorbell. When BlueFlame is used, WQEs get written directly to a PCI BAR of the device (in addition to memory) so that the device handles them without having to access memory. In this test, the WQEs written in memory are different from the ones written to the BlueFlame which request CQE update. By checking the completion reports posted on CQ, we can know if BlueFlame succeeds or not. The write combining must be supported if BlueFlame succeeds as its register is written using write combining. This patch reimplements the test in the same way, but using a pair of SQ and CQ only. It is moved to mlx5_core as a general feature used by both mlx5_core and mlx5_ib. Besides, save write combine test result of the PCI function, so that its thousands of child functions such as SF can query without paying the time and resource penalty by itself. The test function is called only after failing to get the cached result. With this enhancement, all thousands of SFs of the PF attached to same driver no longer need to perform WC check explicitly, which is already done in the system. This saves several commands per SF, thereby speeds up SF creation and also saves completion EQ creation. Signed-off-by: Jianbo Liu Reviewed-by: Tariq Toukan Link: https://lore.kernel.org/r/4ff5a8cc4c5b5b0d98397baa45a5019bcdbf096e.1717409369.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 19 +- drivers/infiniband/hw/mlx5/mem.c | 198 ----------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/infiniband/hw/mlx5/qp.c | 16 - drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 + drivers/net/ethernet/mellanox/mlx5/core/wc.c | 434 +++++++++++++++++++++++ include/linux/mlx5/driver.h | 11 + 8 files changed, 451 insertions(+), 234 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/wc.c (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2366c46eebc8..00e292422801 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx, } resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) + if (mlx5_wc_support_get(dev->mdev)) resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); resp->cache_line_size = cache_line_size(); @@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm switch (command) { case MLX5_IB_MMAP_WC_PAGE: case MLX5_IB_MMAP_ALLOC_WC: - if (!dev->wc_support) + if (!mlx5_wc_support_get(dev->mdev)) return -EPERM; fallthrough; case MLX5_IB_MMAP_NC_PAGE: @@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)( alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC) return -EOPNOTSUPP; - if (!to_mdev(c->ibucontext.device)->wc_support && + if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) && alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF) return -EOPNOTSUPP; @@ -3766,18 +3766,6 @@ err_mp: return err; } -static int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - static const struct ib_device_ops mlx5_ib_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_MLX5, @@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .device_group = &mlx5_attr_group, - .enable_driver = mlx5_ib_enable_driver, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 5a22be14d958..af321f6ef7f5 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -30,10 +30,8 @@ * SOFTWARE. */ -#include #include #include "mlx5_ib.h" -#include /* * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be @@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff( return 0; return page_size; } - -#define WR_ID_BF 0xBF -#define WR_ID_END 0xBAD -#define TEST_WC_NUM_WQES 255 -#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) -static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id, - bool signaled) -{ - struct mlx5_ib_qp *qp = to_mqp(ibqp); - struct mlx5_wqe_ctrl_seg *ctrl; - struct mlx5_bf *bf = &qp->bf; - __be32 mmio_wqe[16] = {}; - unsigned long flags; - unsigned int idx; - - if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)) - return -EIO; - - spin_lock_irqsave(&qp->sq.lock, flags); - - idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); - ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx); - - memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg)); - ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0; - ctrl->opmod_idx_opcode = - cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP); - ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) | - (qp->trans_qp.base.mqp.qpn << 8)); - - qp->sq.wrid[idx] = wr_id; - qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP; - qp->sq.wqe_head[idx] = qp->sq.head + 1; - qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), - MLX5_SEND_WQE_BB); - qp->sq.w_list[idx].next = qp->sq.cur_post; - qp->sq.head++; - - memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); - ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= - MLX5_WQE_CTRL_CQ_UPDATE; - - /* Make sure that descriptors are written before - * updating doorbell record and ringing the doorbell - */ - wmb(); - - qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post); - - /* Make sure doorbell record is visible to the HCA before - * we hit doorbell - */ - wmb(); - __iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe, - sizeof(mmio_wqe) / 8); - - bf->offset ^= bf->buf_size; - - spin_unlock_irqrestore(&qp->sq.lock, flags); - - return 0; -} - -static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq) -{ - int ret; - struct ib_wc wc = {}; - unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; - - do { - ret = ib_poll_cq(cq, 1, &wc); - if (ret < 0 || wc.status) - return ret < 0 ? ret : -EINVAL; - if (ret) - break; - } while (!time_after(jiffies, end)); - - if (!ret) - return -ETIMEDOUT; - - if (wc.wr_id != WR_ID_BF) - ret = 0; - - return ret; -} - -static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp) -{ - int err, i; - - for (i = 0; i < TEST_WC_NUM_WQES; i++) { - err = post_send_nop(dev, qp, WR_ID_BF, false); - if (err) - return err; - } - - return post_send_nop(dev, qp, WR_ID_END, true); -} - -int mlx5_ib_test_wc(struct mlx5_ib_dev *dev) -{ - struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 }; - int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type); - struct ib_qp_init_attr qp_init_attr = { - .cap = { .max_send_wr = TEST_WC_NUM_WQES }, - .qp_type = IB_QPT_UD, - .sq_sig_type = IB_SIGNAL_REQ_WR, - .create_flags = MLX5_IB_QP_CREATE_WC_TEST, - }; - struct ib_qp_attr qp_attr = { .port_num = 1 }; - struct ib_device *ibdev = &dev->ib_dev; - struct ib_qp *qp; - struct ib_cq *cq; - struct ib_pd *pd; - int ret; - - if (!MLX5_CAP_GEN(dev->mdev, bf)) - return 0; - - if (!dev->mdev->roce.roce_en && - port_type_cap == MLX5_CAP_PORT_TYPE_ETH) { - if (mlx5_core_is_pf(dev->mdev)) - dev->wc_support = arch_can_pci_mmap_wc(); - return 0; - } - - ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false); - if (ret) - goto print_err; - - if (!dev->wc_bfreg.wc) - goto out1; - - pd = ib_alloc_pd(ibdev, 0); - if (IS_ERR(pd)) { - ret = PTR_ERR(pd); - goto out1; - } - - cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr); - if (IS_ERR(cq)) { - ret = PTR_ERR(cq); - goto out2; - } - - qp_init_attr.recv_cq = cq; - qp_init_attr.send_cq = cq; - qp = ib_create_qp(pd, &qp_init_attr); - if (IS_ERR(qp)) { - ret = PTR_ERR(qp); - goto out3; - } - - qp_attr.qp_state = IB_QPS_INIT; - ret = ib_modify_qp(qp, &qp_attr, - IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX | - IB_QP_QKEY); - if (ret) - goto out4; - - qp_attr.qp_state = IB_QPS_RTR; - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE); - if (ret) - goto out4; - - qp_attr.qp_state = IB_QPS_RTS; - ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN); - if (ret) - goto out4; - - ret = test_wc_do_send(dev, qp); - if (ret < 0) - goto out4; - - ret = test_wc_poll_cq_result(dev, cq); - if (ret > 0) { - dev->wc_support = true; - ret = 0; - } - -out4: - ib_destroy_qp(qp); -out3: - ib_destroy_cq(cq); -out2: - ib_dealloc_pd(pd); -out1: - mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg); -print_err: - if (ret) - mlx5_ib_err( - dev, - "Error %d while trying to test write-combining support\n", - ret); - return ret; -} diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index f255a12e26a0..b68779e9d86c 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -341,7 +341,6 @@ struct mlx5_ib_flow_db { * rely on the range reserved for that use in the ib_qp_create_flags enum. */ #define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START -#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1) struct wr_list { u16 opcode; @@ -1123,7 +1122,6 @@ struct mlx5_ib_dev { u8 ib_active:1; u8 is_rep:1; u8 lag_active:1; - u8 wc_support:1; u8 fill_delay; struct umr_common umrc; /* sync used page count stats @@ -1149,7 +1147,6 @@ struct mlx5_ib_dev { /* Array with num_ports elements */ struct mlx5_ib_port *port; struct mlx5_sq_bfreg bfreg; - struct mlx5_sq_bfreg wc_bfreg; struct mlx5_sq_bfreg fp_bfreg; struct mlx5_ib_delay_drop delay_drop; const struct mlx5_ib_profile *profile; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index e2164f813607..e8c0fead4062 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev, if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR) qp->bf.bfreg = &dev->fp_bfreg; - else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) - qp->bf.bfreg = &dev->wc_bfreg; else qp->bf.bfreg = &dev->bfreg; @@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag, return; } - if (flag == MLX5_IB_QP_CREATE_WC_TEST) { - /* - * Special case, if condition didn't meet, it won't be error, - * just different in-kernel flow. - */ - *flags &= ~MLX5_IB_QP_CREATE_WC_TEST; - return; - } mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag); } @@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, IB_QP_CREATE_PCI_WRITE_END_PADDING, MLX5_CAP_GEN(mdev, end_pad), qp); - process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST, - qp_type != MLX5_IB_QPT_REG_UMR, qp); process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1, true, qp); @@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR) return true; - /* Internal QP used for wc testing, with NOPs in wq */ - if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) - return true; - return false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 76dc5a9b9648..1289475e7be7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o # # Netdev basic diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 459a836a5d9c..527da58c7953 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx) mutex_init(&dev->intf_state_mutex); lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key); mutex_init(&dev->mlx5e_res.uplink_netdev_lock); + mutex_init(&dev->wc_state_lock); mutex_init(&priv->bfregs.reg_head.lock); mutex_init(&priv->bfregs.wc_head.lock); @@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev) mutex_destroy(&priv->alloc_mutex); mutex_destroy(&priv->bfregs.wc_head.lock); mutex_destroy(&priv->bfregs.reg_head.lock); + mutex_destroy(&dev->wc_state_lock); mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock); mutex_destroy(&dev->intf_state_mutex); lockdep_unregister_key(&dev->lock_key); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c new file mode 100644 index 000000000000..1bed75eca97d --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include +#include +#include "lib/clock.h" +#include "mlx5_core.h" +#include "wq.h" + +#define TEST_WC_NUM_WQES 255 +#define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES)) +#define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ +#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100) + +struct mlx5_wc_cq { + /* data path - accessed per cqe */ + struct mlx5_cqwq wq; + + /* data path - accessed per napi poll */ + struct mlx5_core_cq mcq; + + /* control */ + struct mlx5_core_dev *mdev; + struct mlx5_wq_ctrl wq_ctrl; +}; + +struct mlx5_wc_sq { + /* data path */ + u16 cc; + u16 pc; + + /* read only */ + struct mlx5_wq_cyc wq; + u32 sqn; + + /* control path */ + struct mlx5_wq_ctrl wq_ctrl; + + struct mlx5_wc_cq cq; + struct mlx5_sq_bfreg bfreg; +}; + +static int mlx5_wc_create_cqwq(struct mlx5_core_dev *mdev, void *cqc, + struct mlx5_wc_cq *cq) +{ + struct mlx5_core_cq *mcq = &cq->mcq; + struct mlx5_wq_param param = {}; + int err; + u32 i; + + err = mlx5_cqwq_create(mdev, ¶m, cqc, &cq->wq, &cq->wq_ctrl); + if (err) + return err; + + mcq->cqe_sz = 64; + mcq->set_ci_db = cq->wq_ctrl.db.db; + mcq->arm_db = cq->wq_ctrl.db.db + 1; + + for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); + + cqe->op_own = 0xf1; + } + + cq->mdev = mdev; + + return 0; +} + +static int create_wc_cq(struct mlx5_wc_cq *cq, void *cqc_data) +{ + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + struct mlx5_core_dev *mdev = cq->mdev; + struct mlx5_core_cq *mcq = &cq->mcq; + int err, inlen, eqn; + void *in, *cqc; + + err = mlx5_comp_eqn_get(mdev, 0, &eqn); + if (err) + return err; + + inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + sizeof(u64) * cq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); + + memcpy(cqc, cqc_data, MLX5_ST_SZ_BYTES(cqc)); + + mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + + MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); + + err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out)); + + kvfree(in); + + return err; +} + +static int mlx5_wc_create_cq(struct mlx5_core_dev *mdev, struct mlx5_wc_cq *cq) +{ + void *cqc; + int err; + + cqc = kvzalloc(MLX5_ST_SZ_BYTES(cqc), GFP_KERNEL); + if (!cqc) + return -ENOMEM; + + MLX5_SET(cqc, cqc, log_cq_size, TEST_WC_LOG_CQ_SZ); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) + MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); + + err = mlx5_wc_create_cqwq(mdev, cqc, cq); + if (err) { + mlx5_core_err(mdev, "Failed to create wc cq wq, err=%d\n", err); + goto err_create_cqwq; + } + + err = create_wc_cq(cq, cqc); + if (err) { + mlx5_core_err(mdev, "Failed to create wc cq, err=%d\n", err); + goto err_create_cq; + } + + kvfree(cqc); + return 0; + +err_create_cq: + mlx5_wq_destroy(&cq->wq_ctrl); +err_create_cqwq: + kvfree(cqc); + return err; +} + +static void mlx5_wc_destroy_cq(struct mlx5_wc_cq *cq) +{ + mlx5_core_destroy_cq(cq->mdev, &cq->mcq); + mlx5_wq_destroy(&cq->wq_ctrl); +} + +static int create_wc_sq(struct mlx5_core_dev *mdev, void *sqc_data, + struct mlx5_wc_sq *sq) +{ + void *in, *sqc, *wq; + int inlen, err; + u8 ts_format; + + inlen = MLX5_ST_SZ_BYTES(create_sq_in) + + sizeof(u64) * sq->wq_ctrl.buf.npages; + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); + wq = MLX5_ADDR_OF(sqc, sqc, wq); + + memcpy(sqc, sqc_data, MLX5_ST_SZ_BYTES(sqc)); + MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); + + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); + MLX5_SET(sqc, sqc, flush_in_error_en, 1); + + ts_format = mlx5_is_real_time_sq(mdev) ? + MLX5_TIMESTAMP_FORMAT_REAL_TIME : + MLX5_TIMESTAMP_FORMAT_FREE_RUNNING; + MLX5_SET(sqc, sqc, ts_format, ts_format); + + MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); + MLX5_SET(wq, wq, uar_page, sq->bfreg.index); + MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - + MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); + + mlx5_fill_page_frag_array(&sq->wq_ctrl.buf, + (__be64 *)MLX5_ADDR_OF(wq, wq, pas)); + + err = mlx5_core_create_sq(mdev, in, inlen, &sq->sqn); + if (err) { + mlx5_core_err(mdev, "Failed to create wc sq, err=%d\n", err); + goto err_create_sq; + } + + memset(in, 0, MLX5_ST_SZ_BYTES(modify_sq_in)); + MLX5_SET(modify_sq_in, in, sq_state, MLX5_SQC_STATE_RST); + sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); + MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RDY); + + err = mlx5_core_modify_sq(mdev, sq->sqn, in); + if (err) { + mlx5_core_err(mdev, "Failed to set wc sq(sqn=0x%x) ready, err=%d\n", + sq->sqn, err); + goto err_modify_sq; + } + + kvfree(in); + return 0; + +err_modify_sq: + mlx5_core_destroy_sq(mdev, sq->sqn); +err_create_sq: + kvfree(in); + return err; +} + +static int mlx5_wc_create_sq(struct mlx5_core_dev *mdev, struct mlx5_wc_sq *sq) +{ + struct mlx5_wq_param param = {}; + void *sqc_data, *wq; + int err; + + sqc_data = kvzalloc(MLX5_ST_SZ_BYTES(sqc), GFP_KERNEL); + if (!sqc_data) + return -ENOMEM; + + wq = MLX5_ADDR_OF(sqc, sqc_data, wq); + MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(wq, wq, log_wq_sz, TEST_WC_SQ_LOG_WQ_SZ); + + err = mlx5_wq_cyc_create(mdev, ¶m, wq, &sq->wq, &sq->wq_ctrl); + if (err) { + mlx5_core_err(mdev, "Failed to create wc sq wq, err=%d\n", err); + goto err_create_wq_cyc; + } + + err = create_wc_sq(mdev, sqc_data, sq); + if (err) + goto err_create_sq; + + mlx5_core_dbg(mdev, "wc sq->sqn = 0x%x created\n", sq->sqn); + + kvfree(sqc_data); + return 0; + +err_create_sq: + mlx5_wq_destroy(&sq->wq_ctrl); +err_create_wq_cyc: + kvfree(sqc_data); + return err; +} + +static void mlx5_wc_destroy_sq(struct mlx5_wc_sq *sq) +{ + mlx5_core_destroy_sq(sq->cq.mdev, sq->sqn); + mlx5_wq_destroy(&sq->wq_ctrl); +} + +static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, bool signaled) +{ + int buf_size = (1 << MLX5_CAP_GEN(sq->cq.mdev, log_bf_reg_size)) / 2; + struct mlx5_wqe_ctrl_seg *ctrl; + __be32 mmio_wqe[16] = {}; + u16 pi; + + pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc); + ctrl = mlx5_wq_cyc_get_wqe(&sq->wq, pi); + memset(ctrl, 0, sizeof(*ctrl)); + ctrl->opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_NOP); + ctrl->qpn_ds = + cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg), MLX5_SEND_WQE_DS)); + if (signaled) + ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; + + memcpy(mmio_wqe, ctrl, sizeof(*ctrl)); + ((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |= + MLX5_WQE_CTRL_CQ_UPDATE; + + /* ensure wqe is visible to device before updating doorbell record */ + dma_wmb(); + + sq->pc++; + sq->wq.db[MLX5_SND_DBR] = cpu_to_be32(sq->pc); + + /* ensure doorbell record is visible to device before ringing the + * doorbell + */ + wmb(); + + __iowrite64_copy(sq->bfreg.map + sq->bfreg.offset, mmio_wqe, + sizeof(mmio_wqe) / 8); + + sq->bfreg.offset ^= buf_size; +} + +static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq) +{ + struct mlx5_wc_cq *cq = &sq->cq; + struct mlx5_cqe64 *cqe; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return -ETIMEDOUT; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + mlx5_cqwq_pop(&cq->wq); + + if (get_cqe_opcode(cqe) == MLX5_CQE_REQ) { + int wqe_counter = be16_to_cpu(cqe->wqe_counter); + struct mlx5_core_dev *mdev = cq->mdev; + + if (wqe_counter == TEST_WC_NUM_WQES - 1) + mdev->wc_state = MLX5_WC_STATE_UNSUPPORTED; + else + mdev->wc_state = MLX5_WC_STATE_SUPPORTED; + + mlx5_core_dbg(mdev, "wc wqe_counter = 0x%x\n", wqe_counter); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc++; + + return 0; +} + +static void mlx5_core_test_wc(struct mlx5_core_dev *mdev) +{ + unsigned long expires; + struct mlx5_wc_sq *sq; + int i, err; + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + return; + + sq = kzalloc(sizeof(*sq), GFP_KERNEL); + if (!sq) + return; + + err = mlx5_alloc_bfreg(mdev, &sq->bfreg, true, false); + if (err) { + mlx5_core_err(mdev, "Failed to alloc bfreg for wc, err=%d\n", err); + goto err_alloc_bfreg; + } + + err = mlx5_wc_create_cq(mdev, &sq->cq); + if (err) + goto err_create_cq; + + err = mlx5_wc_create_sq(mdev, sq); + if (err) + goto err_create_sq; + + for (i = 0; i < TEST_WC_NUM_WQES - 1; i++) + mlx5_wc_post_nop(sq, false); + + mlx5_wc_post_nop(sq, true); + + expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; + do { + err = mlx5_wc_poll_cq(sq); + if (err) + usleep_range(2, 10); + } while (mdev->wc_state == MLX5_WC_STATE_UNINITIALIZED && + time_is_after_jiffies(expires)); + + mlx5_wc_destroy_sq(sq); + +err_create_sq: + mlx5_wc_destroy_cq(&sq->cq); +err_create_cq: + mlx5_free_bfreg(mdev, &sq->bfreg); +err_alloc_bfreg: + kfree(sq); +} + +bool mlx5_wc_support_get(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *parent = NULL; + + if (!MLX5_CAP_GEN(mdev, bf)) { + mlx5_core_dbg(mdev, "BlueFlame not supported\n"); + goto out; + } + + if (!MLX5_CAP_GEN(mdev, log_max_sq)) { + mlx5_core_dbg(mdev, "SQ not supported\n"); + goto out; + } + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + /* No need to lock anything as we perform WC test only + * once for whole device and was already done. + */ + goto out; + + mutex_lock(&mdev->wc_state_lock); + + if (mdev->wc_state != MLX5_WC_STATE_UNINITIALIZED) + goto unlock; + +#ifdef CONFIG_MLX5_SF + if (mlx5_core_is_sf(mdev)) + parent = mdev->priv.parent_mdev; +#endif + + if (parent) { + mutex_lock(&parent->wc_state_lock); + + mlx5_core_test_wc(parent); + + mlx5_core_dbg(mdev, "parent set wc_state=%d\n", + parent->wc_state); + mdev->wc_state = parent->wc_state; + + mutex_unlock(&parent->wc_state_lock); + } + + mlx5_core_test_wc(mdev); + +unlock: + mutex_unlock(&mdev->wc_state_lock); +out: + mlx5_core_dbg(mdev, "wc_state=%d\n", mdev->wc_state); + + return mdev->wc_state == MLX5_WC_STATE_SUPPORTED; +} +EXPORT_SYMBOL(mlx5_wc_support_get); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 779cfdf2e9d6..0d31f77396fc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -766,6 +766,12 @@ struct mlx5_hca_cap { u32 max[MLX5_UN_SZ_DW(hca_cap_union)]; }; +enum mlx5_wc_state { + MLX5_WC_STATE_UNINITIALIZED, + MLX5_WC_STATE_UNSUPPORTED, + MLX5_WC_STATE_SUPPORTED, +}; + struct mlx5_core_dev { struct device *device; enum mlx5_coredev_type coredev_type; @@ -824,6 +830,9 @@ struct mlx5_core_dev { #endif u64 num_ipsec_offloads; struct mlx5_sd *sd; + enum mlx5_wc_state wc_state; + /* sync write combining state */ + struct mutex wc_state_lock; }; struct mlx5_db { @@ -1375,4 +1384,6 @@ static inline bool mlx5_is_macsec_roce_supported(struct mlx5_core_dev *mdev) enum { MLX5_OCTWORD = 16, }; + +bool mlx5_wc_support_get(struct mlx5_core_dev *mdev); #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From b339e0a39dc37726712b9f0485d78fe4306d1667 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Thu, 13 Jun 2024 21:00:04 +0300 Subject: RDMA/mlx5: Add Qcounters req_transport_retries_exceeded/req_rnr_retries_exceeded The req_transport_retries_exceeded counter shows the number of times requester detected transport retries exceed error. The req_rnr_retries_exceeded counter show the number of times the requester detected RNR NAKs retries exceed error. Signed-off-by: Patrisious Haddad Link: https://lore.kernel.org/r/250466af94f4989d638fab168e246035530e912f.1718301543.git.leon@kernel.org Reviewed-by: Simon Horman Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/counters.c | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 8300ce622835..4f6c1968a2ee 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -83,6 +83,8 @@ static const struct mlx5_ib_counter extended_err_cnts[] = { INIT_Q_COUNTER(resp_remote_access_errors), INIT_Q_COUNTER(resp_cqe_flush_error), INIT_Q_COUNTER(req_cqe_flush_error), + INIT_Q_COUNTER(req_transport_retries_exceeded), + INIT_Q_COUNTER(req_rnr_retries_exceeded), }; static const struct mlx5_ib_counter roce_accl_cnts[] = { @@ -102,6 +104,8 @@ static const struct mlx5_ib_counter vport_extended_err_cnts[] = { INIT_VPORT_Q_COUNTER(resp_remote_access_errors), INIT_VPORT_Q_COUNTER(resp_cqe_flush_error), INIT_VPORT_Q_COUNTER(req_cqe_flush_error), + INIT_VPORT_Q_COUNTER(req_transport_retries_exceeded), + INIT_VPORT_Q_COUNTER(req_rnr_retries_exceeded), }; static const struct mlx5_ib_counter vport_roce_accl_cnts[] = { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5df52e15f7d6..09d9d87d62c6 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5629,7 +5629,11 @@ struct mlx5_ifc_query_q_counter_out_bits { u8 local_ack_timeout_err[0x20]; - u8 reserved_at_320[0xa0]; + u8 reserved_at_320[0x60]; + + u8 req_rnr_retries_exceeded[0x20]; + + u8 reserved_at_3a0[0x20]; u8 resp_local_length_error[0x20]; -- cgit v1.2.3 From 81cc927d9c5eefd4a1b08e16b0ab2263f36d03f7 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi Date: Thu, 23 May 2024 17:45:17 -0400 Subject: io_uring: Drop per-ctx dummy_ubuf Commit 19a63c402170 ("io_uring/rsrc: keep one global dummy_ubuf") replaced it with a global static object but this stayed behind. Fixes: 19a63c402170 ("io_uring/rsrc: keep one global dummy_ubuf") Signed-off-by: Gabriel Krisman Bertazi Link: https://lore.kernel.org/r/20240523214517.31803-1-krisman@suse.de Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b48570eaa449..93c9044ec3fe 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -373,7 +373,6 @@ struct io_ring_ctx { struct io_restriction restrictions; /* slow path rsrc auxilary data, used by update/register */ - struct io_mapped_ubuf *dummy_ubuf; struct io_rsrc_data *file_data; struct io_rsrc_data *buf_data; -- cgit v1.2.3 From 200f3abd14db55f9aadcb74f4e7a678f1c469ba1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 3 Jun 2024 11:51:19 -0600 Subject: io_uring/eventfd: move eventfd handling to separate file This is pretty nicely abstracted already, but let's move it to a separate file rather than have it in the main io_uring file. With that, we can also move the io_ev_fd struct and enum out of global scope. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 8 --- io_uring/Makefile | 6 +- io_uring/eventfd.c | 160 +++++++++++++++++++++++++++++++++++++++++ io_uring/eventfd.h | 8 +++ io_uring/io_uring.c | 82 +-------------------- io_uring/io_uring.h | 6 -- io_uring/register.c | 56 +-------------- 7 files changed, 173 insertions(+), 153 deletions(-) create mode 100644 io_uring/eventfd.c create mode 100644 io_uring/eventfd.h (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 93c9044ec3fe..850e30be9322 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -211,14 +211,6 @@ struct io_submit_state { struct blk_plug plug; }; -struct io_ev_fd { - struct eventfd_ctx *cq_ev_fd; - unsigned int eventfd_async: 1; - struct rcu_head rcu; - atomic_t refs; - atomic_t ops; -}; - struct io_alloc_cache { void **entries; unsigned int nr_cached; diff --git a/io_uring/Makefile b/io_uring/Makefile index fc1b23c524e8..61923e11c767 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -4,9 +4,9 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \ tctx.o filetable.o rw.o net.o poll.o \ - uring_cmd.o openclose.o sqpoll.o \ - xattr.o nop.o fs.o splice.o sync.o \ - msg_ring.o advise.o openclose.o \ + eventfd.o uring_cmd.o openclose.o \ + sqpoll.o xattr.o nop.o fs.o splice.o \ + sync.o msg_ring.o advise.o openclose.o \ epoll.o statx.o timeout.o fdinfo.o \ cancel.o waitid.o register.o \ truncate.o memmap.o diff --git a/io_uring/eventfd.c b/io_uring/eventfd.c new file mode 100644 index 000000000000..b9384503a2b7 --- /dev/null +++ b/io_uring/eventfd.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io-wq.h" +#include "eventfd.h" + +struct io_ev_fd { + struct eventfd_ctx *cq_ev_fd; + unsigned int eventfd_async: 1; + struct rcu_head rcu; + atomic_t refs; + atomic_t ops; +}; + +enum { + IO_EVENTFD_OP_SIGNAL_BIT, +}; + +static void io_eventfd_free(struct rcu_head *rcu) +{ + struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); + + eventfd_ctx_put(ev_fd->cq_ev_fd); + kfree(ev_fd); +} + +static void io_eventfd_do_signal(struct rcu_head *rcu) +{ + struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); + + eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); + + if (atomic_dec_and_test(&ev_fd->refs)) + io_eventfd_free(rcu); +} + +void io_eventfd_signal(struct io_ring_ctx *ctx) +{ + struct io_ev_fd *ev_fd = NULL; + + if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) + return; + + guard(rcu)(); + + /* + * rcu_dereference ctx->io_ev_fd once and use it for both for checking + * and eventfd_signal + */ + ev_fd = rcu_dereference(ctx->io_ev_fd); + + /* + * Check again if ev_fd exists incase an io_eventfd_unregister call + * completed between the NULL check of ctx->io_ev_fd at the start of + * the function and rcu_read_lock. + */ + if (unlikely(!ev_fd)) + return; + if (!atomic_inc_not_zero(&ev_fd->refs)) + return; + if (ev_fd->eventfd_async && !io_wq_current_is_worker()) + goto out; + + if (likely(eventfd_signal_allowed())) { + eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); + } else { + if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { + call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); + return; + } + } +out: + if (atomic_dec_and_test(&ev_fd->refs)) + call_rcu(&ev_fd->rcu, io_eventfd_free); +} + +void io_eventfd_flush_signal(struct io_ring_ctx *ctx) +{ + bool skip; + + spin_lock(&ctx->completion_lock); + + /* + * Eventfd should only get triggered when at least one event has been + * posted. Some applications rely on the eventfd notification count + * only changing IFF a new CQE has been added to the CQ ring. There's + * no depedency on 1:1 relationship between how many times this + * function is called (and hence the eventfd count) and number of CQEs + * posted to the CQ ring. + */ + skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail; + ctx->evfd_last_cq_tail = ctx->cached_cq_tail; + spin_unlock(&ctx->completion_lock); + if (skip) + return; + + io_eventfd_signal(ctx); +} + +int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned int eventfd_async) +{ + struct io_ev_fd *ev_fd; + __s32 __user *fds = arg; + int fd; + + ev_fd = rcu_dereference_protected(ctx->io_ev_fd, + lockdep_is_held(&ctx->uring_lock)); + if (ev_fd) + return -EBUSY; + + if (copy_from_user(&fd, fds, sizeof(*fds))) + return -EFAULT; + + ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); + if (!ev_fd) + return -ENOMEM; + + ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); + if (IS_ERR(ev_fd->cq_ev_fd)) { + int ret = PTR_ERR(ev_fd->cq_ev_fd); + kfree(ev_fd); + return ret; + } + + spin_lock(&ctx->completion_lock); + ctx->evfd_last_cq_tail = ctx->cached_cq_tail; + spin_unlock(&ctx->completion_lock); + + ev_fd->eventfd_async = eventfd_async; + ctx->has_evfd = true; + atomic_set(&ev_fd->refs, 1); + atomic_set(&ev_fd->ops, 0); + rcu_assign_pointer(ctx->io_ev_fd, ev_fd); + return 0; +} + +int io_eventfd_unregister(struct io_ring_ctx *ctx) +{ + struct io_ev_fd *ev_fd; + + ev_fd = rcu_dereference_protected(ctx->io_ev_fd, + lockdep_is_held(&ctx->uring_lock)); + if (ev_fd) { + ctx->has_evfd = false; + rcu_assign_pointer(ctx->io_ev_fd, NULL); + if (atomic_dec_and_test(&ev_fd->refs)) + call_rcu(&ev_fd->rcu, io_eventfd_free); + return 0; + } + + return -ENXIO; +} diff --git a/io_uring/eventfd.h b/io_uring/eventfd.h new file mode 100644 index 000000000000..d394f49c6321 --- /dev/null +++ b/io_uring/eventfd.h @@ -0,0 +1,8 @@ + +struct io_ring_ctx; +int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned int eventfd_async); +int io_eventfd_unregister(struct io_ring_ctx *ctx); + +void io_eventfd_flush_signal(struct io_ring_ctx *ctx); +void io_eventfd_signal(struct io_ring_ctx *ctx); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0a24feec27f7..d10678b9d519 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -101,6 +101,7 @@ #include "poll.h" #include "rw.h" #include "alloc_cache.h" +#include "eventfd.h" #define IORING_MAX_ENTRIES 32768 #define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) @@ -541,87 +542,6 @@ static __cold void io_queue_deferred(struct io_ring_ctx *ctx) } } -void io_eventfd_free(struct rcu_head *rcu) -{ - struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); - - eventfd_ctx_put(ev_fd->cq_ev_fd); - kfree(ev_fd); -} - -void io_eventfd_do_signal(struct rcu_head *rcu) -{ - struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); - - eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); - - if (atomic_dec_and_test(&ev_fd->refs)) - io_eventfd_free(rcu); -} - -static void io_eventfd_signal(struct io_ring_ctx *ctx) -{ - struct io_ev_fd *ev_fd = NULL; - - if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) - return; - - guard(rcu)(); - - /* - * rcu_dereference ctx->io_ev_fd once and use it for both for checking - * and eventfd_signal - */ - ev_fd = rcu_dereference(ctx->io_ev_fd); - - /* - * Check again if ev_fd exists incase an io_eventfd_unregister call - * completed between the NULL check of ctx->io_ev_fd at the start of - * the function and rcu_read_lock. - */ - if (unlikely(!ev_fd)) - return; - if (!atomic_inc_not_zero(&ev_fd->refs)) - return; - if (ev_fd->eventfd_async && !io_wq_current_is_worker()) - goto out; - - if (likely(eventfd_signal_allowed())) { - eventfd_signal_mask(ev_fd->cq_ev_fd, EPOLL_URING_WAKE); - } else { - if (!atomic_fetch_or(BIT(IO_EVENTFD_OP_SIGNAL_BIT), &ev_fd->ops)) { - call_rcu_hurry(&ev_fd->rcu, io_eventfd_do_signal); - return; - } - } -out: - if (atomic_dec_and_test(&ev_fd->refs)) - call_rcu(&ev_fd->rcu, io_eventfd_free); -} - -static void io_eventfd_flush_signal(struct io_ring_ctx *ctx) -{ - bool skip; - - spin_lock(&ctx->completion_lock); - - /* - * Eventfd should only get triggered when at least one event has been - * posted. Some applications rely on the eventfd notification count - * only changing IFF a new CQE has been added to the CQ ring. There's - * no depedency on 1:1 relationship between how many times this - * function is called (and hence the eventfd count) and number of CQEs - * posted to the CQ ring. - */ - skip = ctx->cached_cq_tail == ctx->evfd_last_cq_tail; - ctx->evfd_last_cq_tail = ctx->cached_cq_tail; - spin_unlock(&ctx->completion_lock); - if (skip) - return; - - io_eventfd_signal(ctx); -} - void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { if (ctx->poll_activated) diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 2b08b402b716..cd43924eed04 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -104,12 +104,6 @@ bool __io_alloc_req_refill(struct io_ring_ctx *ctx); bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, bool cancel_all); -enum { - IO_EVENTFD_OP_SIGNAL_BIT, -}; - -void io_eventfd_do_signal(struct rcu_head *rcu); -void io_eventfd_free(struct rcu_head *rcu); void io_activate_pollwq(struct io_ring_ctx *ctx); static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx) diff --git a/io_uring/register.c b/io_uring/register.c index 212711e9bc8a..f121e02f5e10 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -27,65 +27,11 @@ #include "cancel.h" #include "kbuf.h" #include "napi.h" +#include "eventfd.h" #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) -static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned int eventfd_async) -{ - struct io_ev_fd *ev_fd; - __s32 __user *fds = arg; - int fd; - - ev_fd = rcu_dereference_protected(ctx->io_ev_fd, - lockdep_is_held(&ctx->uring_lock)); - if (ev_fd) - return -EBUSY; - - if (copy_from_user(&fd, fds, sizeof(*fds))) - return -EFAULT; - - ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); - if (!ev_fd) - return -ENOMEM; - - ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); - if (IS_ERR(ev_fd->cq_ev_fd)) { - int ret = PTR_ERR(ev_fd->cq_ev_fd); - kfree(ev_fd); - return ret; - } - - spin_lock(&ctx->completion_lock); - ctx->evfd_last_cq_tail = ctx->cached_cq_tail; - spin_unlock(&ctx->completion_lock); - - ev_fd->eventfd_async = eventfd_async; - ctx->has_evfd = true; - atomic_set(&ev_fd->refs, 1); - atomic_set(&ev_fd->ops, 0); - rcu_assign_pointer(ctx->io_ev_fd, ev_fd); - return 0; -} - -int io_eventfd_unregister(struct io_ring_ctx *ctx) -{ - struct io_ev_fd *ev_fd; - - ev_fd = rcu_dereference_protected(ctx->io_ev_fd, - lockdep_is_held(&ctx->uring_lock)); - if (ev_fd) { - ctx->has_evfd = false; - rcu_assign_pointer(ctx->io_ev_fd, NULL); - if (atomic_dec_and_test(&ev_fd->refs)) - call_rcu(&ev_fd->rcu, io_eventfd_free); - return 0; - } - - return -ENXIO; -} - static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args) { -- cgit v1.2.3 From 3474d1b93f897ab33ce160e759afd47d5f412de4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 13 Jun 2024 19:28:27 +0000 Subject: io_uring/io-wq: make io_wq_work flags atomic The work flags can be set/accessed from different tasks, both the originator of the request, and the io-wq workers. While modifications aren't concurrent, it still makes KMSAN unhappy. There's no real downside to just making the flag reading/manipulation use proper atomics here. Signed-off-by: Jens Axboe --- include/linux/io_uring_types.h | 2 +- io_uring/io-wq.c | 19 ++++++++++--------- io_uring/io-wq.h | 2 +- io_uring/io_uring.c | 12 ++++++------ 4 files changed, 18 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 850e30be9322..1052a68fd68d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -50,7 +50,7 @@ struct io_wq_work_list { struct io_wq_work { struct io_wq_work_node list; - unsigned flags; + atomic_t flags; /* place it here instead of io_kiocb as it fills padding and saves 4B */ int cancel_seq; }; diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 7d3316fe9bfc..913c92249522 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -159,7 +159,7 @@ static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound) static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, struct io_wq_work *work) { - return io_get_acct(wq, !(work->flags & IO_WQ_WORK_UNBOUND)); + return io_get_acct(wq, !(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND)); } static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) @@ -451,7 +451,7 @@ static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker) static inline unsigned int io_get_work_hash(struct io_wq_work *work) { - return work->flags >> IO_WQ_HASH_SHIFT; + return atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT; } static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash) @@ -592,8 +592,9 @@ static void io_worker_handle_work(struct io_wq_acct *acct, next_hashed = wq_next_work(work); - if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) - work->flags |= IO_WQ_WORK_CANCEL; + if (do_kill && + (atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND)) + atomic_or(IO_WQ_WORK_CANCEL, &work->flags); wq->do_work(work); io_assign_current_work(worker, NULL); @@ -891,7 +892,7 @@ static bool io_wq_worker_wake(struct io_worker *worker, void *data) static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq) { do { - work->flags |= IO_WQ_WORK_CANCEL; + atomic_or(IO_WQ_WORK_CANCEL, &work->flags); wq->do_work(work); work = wq->free_work(work); } while (work); @@ -926,7 +927,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) { struct io_wq_acct *acct = io_work_get_acct(wq, work); - unsigned long work_flags = work->flags; + unsigned int work_flags = atomic_read(&work->flags); struct io_cb_cancel_data match = { .fn = io_wq_work_match_item, .data = work, @@ -939,7 +940,7 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) * been marked as one that should not get executed, cancel it here. */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || - (work->flags & IO_WQ_WORK_CANCEL)) { + (work_flags & IO_WQ_WORK_CANCEL)) { io_run_cancel(work, wq); return; } @@ -982,7 +983,7 @@ void io_wq_hash_work(struct io_wq_work *work, void *val) unsigned int bit; bit = hash_ptr(val, IO_WQ_HASH_ORDER); - work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); + atomic_or(IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT), &work->flags); } static bool __io_wq_worker_cancel(struct io_worker *worker, @@ -990,7 +991,7 @@ static bool __io_wq_worker_cancel(struct io_worker *worker, struct io_wq_work *work) { if (work && match->fn(work, match->data)) { - work->flags |= IO_WQ_WORK_CANCEL; + atomic_or(IO_WQ_WORK_CANCEL, &work->flags); __set_notify_signal(worker->task); return true; } diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h index 2b2a6406dd8e..b3b004a7b625 100644 --- a/io_uring/io-wq.h +++ b/io_uring/io-wq.h @@ -56,7 +56,7 @@ bool io_wq_worker_stopped(void); static inline bool io_wq_is_hashed(struct io_wq_work *work) { - return work->flags & IO_WQ_WORK_HASHED; + return atomic_read(&work->flags) & IO_WQ_WORK_HASHED; } typedef bool (work_cancel_fn)(struct io_wq_work *, void *); diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 57382e523b33..438c44ca3abd 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -462,9 +462,9 @@ static void io_prep_async_work(struct io_kiocb *req) } req->work.list.next = NULL; - req->work.flags = 0; + atomic_set(&req->work.flags, 0); if (req->flags & REQ_F_FORCE_ASYNC) - req->work.flags |= IO_WQ_WORK_CONCURRENT; + atomic_or(IO_WQ_WORK_CONCURRENT, &req->work.flags); if (req->file && !(req->flags & REQ_F_FIXED_FILE)) req->flags |= io_file_get_flags(req->file); @@ -480,7 +480,7 @@ static void io_prep_async_work(struct io_kiocb *req) io_wq_hash_work(&req->work, file_inode(req->file)); } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { if (def->unbound_nonreg_file) - req->work.flags |= IO_WQ_WORK_UNBOUND; + atomic_or(IO_WQ_WORK_UNBOUND, &req->work.flags); } } @@ -520,7 +520,7 @@ static void io_queue_iowq(struct io_kiocb *req) * worker for it). */ if (WARN_ON_ONCE(!same_thread_group(req->task, current))) - req->work.flags |= IO_WQ_WORK_CANCEL; + atomic_or(IO_WQ_WORK_CANCEL, &req->work.flags); trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); io_wq_enqueue(tctx->io_wq, &req->work); @@ -1736,14 +1736,14 @@ void io_wq_submit_work(struct io_wq_work *work) io_arm_ltimeout(req); /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ - if (work->flags & IO_WQ_WORK_CANCEL) { + if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { fail: io_req_task_queue_fail(req, err); return; } if (!io_assign_file(req, def, issue_flags)) { err = -EBADF; - work->flags |= IO_WQ_WORK_CANCEL; + atomic_or(IO_WQ_WORK_CANCEL, &work->flags); goto fail; } -- cgit v1.2.3 From c3042a5403ef2be622023fcc3b11fc1aa08ba7fa Mon Sep 17 00:00:00 2001 From: John Garry Date: Fri, 14 Jun 2024 09:03:44 +0000 Subject: block: Drop locking annotation for limits_lock Currently compiling block/blk-settings.c with C=1 gives the following warning: block/blk-settings.c:262:9: warning: context imbalance in 'queue_limits_commit_update' - wrong count at exit request_queue.limits_lock is a mutex. Sparse locking annotation for mutexes are currently not supported - see [0] - so drop that locking annotation. [0] https://lore.kernel.org/lkml/cover.1579893447.git.jbi.octave@gmail.com/T/#mbb8bda6c0a7ca7ce19f46df976a8e3b489745488 Fixes: d690cb8ae14bd ("block: add an API to atomically update queue limits") Signed-off-by: John Garry Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20240614090345.655716-3-john.g.garry@oracle.com Signed-off-by: Jens Axboe --- block/blk-settings.c | 1 - include/linux/blkdev.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index f574181105f8..377a86fe0fcc 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -303,7 +303,6 @@ int blk_set_default_limits(struct queue_limits *lim) */ int queue_limits_commit_update(struct request_queue *q, struct queue_limits *lim) - __releases(q->limits_lock) { int error; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c247a716885..83af6ac5aa48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -893,7 +893,6 @@ static inline unsigned int blk_chunk_sectors_left(sector_t offset, */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) - __acquires(q->limits_lock) { mutex_lock(&q->limits_lock); return q->limits; -- cgit v1.2.3 From 1ccfd1a4c809d99891142c1bb9851daa42e40f0d Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 6 Jun 2024 08:51:54 +0900 Subject: firewire: core: arrangement header inclusion for tracepoints events It is a bit inconvenient to put the relative path to local header from tree-wide header. This commit delegates the selection to include headers into users. Link: https://lore.kernel.org/r/20240605235155.116468-11-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-trace.c | 3 +++ drivers/firewire/core-transaction.c | 2 +- drivers/firewire/packet-header-definitions.h | 2 ++ include/trace/events/firewire.h | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-trace.c b/drivers/firewire/core-trace.c index 96cbd9d384dc..7cbf850f3719 100644 --- a/drivers/firewire/core-trace.c +++ b/drivers/firewire/core-trace.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later // Copyright (c) 2024 Takashi Sakamoto +#include +#include "packet-header-definitions.h" + #define CREATE_TRACE_POINTS #include diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c index 76ab6a209768..8410e7b4ab60 100644 --- a/drivers/firewire/core-transaction.c +++ b/drivers/firewire/core-transaction.c @@ -29,8 +29,8 @@ #include #include "core.h" -#include #include "packet-header-definitions.h" +#include #define HEADER_DESTINATION_IS_BROADCAST(header) \ ((async_header_get_destination(header) & 0x3f) == 0x3f) diff --git a/drivers/firewire/packet-header-definitions.h b/drivers/firewire/packet-header-definitions.h index ab9d0fa790d4..87a5a31845c3 100644 --- a/drivers/firewire/packet-header-definitions.h +++ b/drivers/firewire/packet-header-definitions.h @@ -7,6 +7,8 @@ #ifndef _FIREWIRE_PACKET_HEADER_DEFINITIONS_H #define _FIREWIRE_PACKET_HEADER_DEFINITIONS_H +#include + #define ASYNC_HEADER_QUADLET_COUNT 4 #define ASYNC_HEADER_Q0_DESTINATION_SHIFT 16 diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 5ccc0d91b220..1a18533778f1 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -11,7 +11,7 @@ #include -#include "../../../drivers/firewire/packet-header-definitions.h" +// Some macros are defined in 'drivers/firewire/packet-header-definitions.h'. // The content of TP_printk field is preprocessed, then put to the module binary. #define ASYNC_HEADER_GET_DESTINATION(header) \ -- cgit v1.2.3 From 677ceae190732ee844f940d1e4860af4022d2be3 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Thu, 6 Jun 2024 08:51:55 +0900 Subject: firewire: core: add tracepoints event for self_id_sequence It is helpful to trace the content of self ID sequence when the core function building bus topology. This commit adds a tracepoints event fot the purpose. It seems not to achieve printing variable length of array in print time without any storage, thus the structure of event includes a superfluous array to store the state of port. Additionally, there is no helper function to print symbol array, thus the state of port is printed as raw value. Link: https://lore.kernel.org/r/20240605235155.116468-12-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-topology.c | 7 +++-- drivers/firewire/core-trace.c | 15 ++++++++++ include/trace/events/firewire.h | 59 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index e17a01345857..04e24d14e031 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -95,7 +95,8 @@ static inline struct fw_node *fw_node(struct list_head *l) * internally consistent. On success this function returns the * fw_node corresponding to the local card otherwise NULL. */ -static struct fw_node *build_tree(struct fw_card *card, const u32 *sid, int self_id_count) +static struct fw_node *build_tree(struct fw_card *card, const u32 *sid, int self_id_count, + unsigned int generation) { struct self_id_sequence_enumerator enumerator = { .cursor = sid, @@ -139,6 +140,8 @@ static struct fw_node *build_tree(struct fw_card *card, const u32 *sid, int self } port_capacity = self_id_sequence_get_port_capacity(quadlet_count); + trace_self_id_sequence(self_id_sequence, quadlet_count, generation); + for (port_index = 0; port_index < port_capacity; ++port_index) { port_status = self_id_sequence_get_port_status(self_id_sequence, quadlet_count, port_index); @@ -482,7 +485,7 @@ void fw_core_handle_bus_reset(struct fw_card *card, int node_id, int generation, card->bm_abdicate = bm_abdicate; fw_schedule_bm_work(card, 0); - local_node = build_tree(card, self_ids, self_id_count); + local_node = build_tree(card, self_ids, self_id_count, generation); update_topology_map(card, self_ids, self_id_count); diff --git a/drivers/firewire/core-trace.c b/drivers/firewire/core-trace.c index 7cbf850f3719..c9bc4990d66e 100644 --- a/drivers/firewire/core-trace.c +++ b/drivers/firewire/core-trace.c @@ -2,7 +2,22 @@ // Copyright (c) 2024 Takashi Sakamoto #include +#include #include "packet-header-definitions.h" +#include "phy-packet-definitions.h" #define CREATE_TRACE_POINTS #include + +#ifdef TRACEPOINTS_ENABLED +void copy_port_status(u8 *port_status, unsigned int port_capacity, + const u32 *self_id_sequence, unsigned int quadlet_count) +{ + unsigned int port_index; + + for (port_index = 0; port_index < port_capacity; ++port_index) { + port_status[port_index] = + self_id_sequence_get_port_status(self_id_sequence, quadlet_count, port_index); + } +} +#endif diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 1a18533778f1..1d0d63b0f8e7 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -366,6 +366,65 @@ TRACE_EVENT(bus_reset_handle, ) ); +// Some macros are defined in 'drivers/firewire/phy-packet-definitions.h'. + +// The content of TP_printk field is preprocessed, then put to the module binary. + +#define PHY_PACKET_SELF_ID_GET_PHY_ID(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_PHY_ID_MASK) >> SELF_ID_PHY_ID_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_LINK_ACTIVE_MASK) >> SELF_ID_ZERO_LINK_ACTIVE_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_GAP_COUNT(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_GAP_COUNT_MASK) >> SELF_ID_ZERO_GAP_COUNT_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_SCODE(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_SCODE_MASK) >> SELF_ID_ZERO_SCODE_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_CONTENDER(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_CONTENDER_MASK) >> SELF_ID_ZERO_CONTENDER_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_POWER_CLASS(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_POWER_CLASS_MASK) >> SELF_ID_ZERO_POWER_CLASS_SHIFT) + +#define PHY_PACKET_SELF_ID_GET_INITIATED_RESET(quads) \ + ((((const u32 *)quads)[0] & SELF_ID_ZERO_INITIATED_RESET_MASK) >> SELF_ID_ZERO_INITIATED_RESET_SHIFT) + +void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *self_id_sequence, + unsigned int quadlet_count); + +TRACE_EVENT(self_id_sequence, + TP_PROTO(const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), + TP_ARGS(self_id_sequence, quadlet_count, generation), + TP_STRUCT__entry( + __field(u8, generation) + __dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count)) + __dynamic_array(u32, self_id_sequence, quadlet_count) + ), + TP_fast_assign( + __entry->generation = generation; + copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), + self_id_sequence, quadlet_count); + memcpy(__get_dynamic_array(self_id_sequence), self_id_sequence, + __get_dynamic_array_len(self_id_sequence)); + ), + TP_printk( + "generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s", + __entry->generation, + PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false", + PHY_PACKET_SELF_ID_GET_GAP_COUNT(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_SCODE(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_CONTENDER(__get_dynamic_array(self_id_sequence)) ? "true" : "false", + PHY_PACKET_SELF_ID_GET_POWER_CLASS(__get_dynamic_array(self_id_sequence)), + PHY_PACKET_SELF_ID_GET_INITIATED_RESET(__get_dynamic_array(self_id_sequence)) ? "true" : "false", + __print_array(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), 1), + __print_array(__get_dynamic_array(self_id_sequence), + __get_dynamic_array_len(self_id_sequence) / QUADLET_SIZE, QUADLET_SIZE) + ) +); + #undef QUADLET_SIZE #endif // _FIREWIRE_TRACE_EVENT_H -- cgit v1.2.3 From 2fd22faf0e9f120c39e6f47e5622b0039bb10817 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Fri, 14 Jun 2024 09:42:51 +0900 Subject: firewire: core: record card index in tracepoints event for self ID sequence This patch is for for-next branch. The selfIDComplete event occurs in the bus managed by one of 1394 OHCI controller in Linux system, while the existing tracepoints events has the lack of data about it to distinguish the issued hardware from the others. This commit adds card_index member into event structure to store the index of host controller in use, and prints it. Link: https://lore.kernel.org/r/20240614004251.460649-1-o-takashi@sakamocchi.jp Signed-off-by: Takashi Sakamoto --- drivers/firewire/core-topology.c | 2 +- include/trace/events/firewire.h | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/firewire/core-topology.c b/drivers/firewire/core-topology.c index 04e24d14e031..b4e637aa6932 100644 --- a/drivers/firewire/core-topology.c +++ b/drivers/firewire/core-topology.c @@ -140,7 +140,7 @@ static struct fw_node *build_tree(struct fw_card *card, const u32 *sid, int self } port_capacity = self_id_sequence_get_port_capacity(quadlet_count); - trace_self_id_sequence(self_id_sequence, quadlet_count, generation); + trace_self_id_sequence(card->index, self_id_sequence, quadlet_count, generation); for (port_index = 0; port_index < port_capacity; ++port_index) { port_status = self_id_sequence_get_port_status(self_id_sequence, quadlet_count, diff --git a/include/trace/events/firewire.h b/include/trace/events/firewire.h index 1d0d63b0f8e7..d237a30d197b 100644 --- a/include/trace/events/firewire.h +++ b/include/trace/events/firewire.h @@ -395,14 +395,16 @@ void copy_port_status(u8 *port_status, unsigned int port_capacity, const u32 *se unsigned int quadlet_count); TRACE_EVENT(self_id_sequence, - TP_PROTO(const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), - TP_ARGS(self_id_sequence, quadlet_count, generation), + TP_PROTO(unsigned int card_index, const u32 *self_id_sequence, unsigned int quadlet_count, unsigned int generation), + TP_ARGS(card_index, self_id_sequence, quadlet_count, generation), TP_STRUCT__entry( + __field(u8, card_index) __field(u8, generation) __dynamic_array(u8, port_status, self_id_sequence_get_port_capacity(quadlet_count)) __dynamic_array(u32, self_id_sequence, quadlet_count) ), TP_fast_assign( + __entry->card_index = card_index; __entry->generation = generation; copy_port_status(__get_dynamic_array(port_status), __get_dynamic_array_len(port_status), self_id_sequence, quadlet_count); @@ -410,7 +412,8 @@ TRACE_EVENT(self_id_sequence, __get_dynamic_array_len(self_id_sequence)); ), TP_printk( - "generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s", + "card_index=%u generation=%u phy_id=0x%02x link_active=%s gap_count=%u scode=%u contender=%s power_class=%u initiated_reset=%s port_status=%s self_id_sequence=%s", + __entry->card_index, __entry->generation, PHY_PACKET_SELF_ID_GET_PHY_ID(__get_dynamic_array(self_id_sequence)), PHY_PACKET_SELF_ID_GET_LINK_ACTIVE(__get_dynamic_array(self_id_sequence)) ? "true" : "false", -- cgit v1.2.3 From 1c75adb22d49ca9389333ca5e6939052a7203111 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 13 Jun 2024 22:59:09 +0200 Subject: ASoC: SOF: mediatek: Constify struct mtk_adsp_ipc_ops 'struct mtk_adsp_ipc_ops' is not modified in these drivers. Constifying this structure moves some data to a read-only section, so increase overall security. In order to do it, "struct mtk_adsp_ipc" also needs to be adjusted to this new const qualifier. On a x86_64, with allmodconfig: Before: ====== text data bss dec hex filename 15533 2383 0 17916 45fc sound/soc/sof/mediatek/mt8195/mt8195.o After: ===== text data bss dec hex filename 15557 2367 0 17924 4604 sound/soc/sof/mediatek/mt8195/mt8195.o Signed-off-by: Christophe JAILLET Link: https://msgid.link/r/a45d6b2b5ec040ea0fc78fca662c2dca3f13a49f.1718312321.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- include/linux/firmware/mediatek/mtk-adsp-ipc.h | 2 +- sound/soc/sof/mediatek/mt8186/mt8186.c | 2 +- sound/soc/sof/mediatek/mt8195/mt8195.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/firmware/mediatek/mtk-adsp-ipc.h b/include/linux/firmware/mediatek/mtk-adsp-ipc.h index 5b1d16fa3f56..6e86799a7dc4 100644 --- a/include/linux/firmware/mediatek/mtk-adsp-ipc.h +++ b/include/linux/firmware/mediatek/mtk-adsp-ipc.h @@ -40,7 +40,7 @@ struct mtk_adsp_chan { struct mtk_adsp_ipc { struct mtk_adsp_chan chans[MTK_ADSP_MBOX_NUM]; struct device *dev; - struct mtk_adsp_ipc_ops *ops; + const struct mtk_adsp_ipc_ops *ops; void *private_data; }; diff --git a/sound/soc/sof/mediatek/mt8186/mt8186.c b/sound/soc/sof/mediatek/mt8186/mt8186.c index bea1b9d9ca28..74522400207e 100644 --- a/sound/soc/sof/mediatek/mt8186/mt8186.c +++ b/sound/soc/sof/mediatek/mt8186/mt8186.c @@ -82,7 +82,7 @@ static void mt8186_dsp_handle_request(struct mtk_adsp_ipc *ipc) } } -static struct mtk_adsp_ipc_ops dsp_ops = { +static const struct mtk_adsp_ipc_ops dsp_ops = { .handle_reply = mt8186_dsp_handle_reply, .handle_request = mt8186_dsp_handle_request, }; diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 31dc98d1b1d8..24ae1d4959be 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -82,7 +82,7 @@ static void mt8195_dsp_handle_request(struct mtk_adsp_ipc *ipc) } } -static struct mtk_adsp_ipc_ops dsp_ops = { +static const struct mtk_adsp_ipc_ops dsp_ops = { .handle_reply = mt8195_dsp_handle_reply, .handle_request = mt8195_dsp_handle_request, }; -- cgit v1.2.3 From f22b4b55edb507a2b30981e133b66b642be4d13f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Jun 2024 14:33:16 -0700 Subject: net: make for_each_netdev_dump() a little more bug-proof I find the behavior of xa_for_each_start() slightly counter-intuitive. It doesn't end the iteration by making the index point after the last element. IOW calling xa_for_each_start() again after it "finished" will run the body of the loop for the last valid element, instead of doing nothing. This works fine for netlink dumps if they terminate correctly (i.e. coalesce or carefully handle NLM_DONE), but as we keep getting reminded legacy dumps are unlikely to go away. Fixing this generically at the xa_for_each_start() level seems hard - there is no index reserved for "end of iteration". ifindexes are 31b wide, tho, and iterator is ulong so for for_each_netdev_dump() it's safe to go to the next element. Signed-off-by: Jakub Kicinski Reviewed-by: Przemek Kitszel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f148a01dd1d1..85111502cf8f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3021,7 +3021,8 @@ int call_netdevice_notifiers_info(unsigned long val, #define net_device_entry(lh) list_entry(lh, struct net_device, dev_list) #define for_each_netdev_dump(net, d, ifindex) \ - xa_for_each_start(&(net)->dev_by_index, (ifindex), (d), (ifindex)) + for (; (d = xa_find(&(net)->dev_by_index, &ifindex, \ + ULONG_MAX, XA_PRESENT)); ifindex++) static inline struct net_device *next_net_device(struct net_device *dev) { -- cgit v1.2.3 From d25a92ccae6bed02327b63d138e12e7806830f78 Mon Sep 17 00:00:00 2001 From: "D. Wythe" Date: Fri, 14 Jun 2024 02:00:30 +0800 Subject: net/smc: Introduce IPPROTO_SMC This patch allows to create smc socket via AF_INET, similar to the following code, /* create v4 smc sock */ v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC); /* create v6 smc sock */ v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC); There are several reasons why we believe it is appropriate here: 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6) address. There is no AF_SMC address at all. 2. Create smc socket in the AF_INET(6) path, which allows us to reuse the infrastructure of AF_INET(6) path, such as common ebpf hooks. Otherwise, smc have to implement it again in AF_SMC path. Signed-off-by: D. Wythe Reviewed-by: Wenjia Zhang Reviewed-by: Dust Li Tested-by: Niklas Schnelle Tested-by: Wenjia Zhang Signed-off-by: David S. Miller --- include/uapi/linux/in.h | 2 + net/smc/Makefile | 2 +- net/smc/af_smc.c | 16 ++++- net/smc/smc_inet.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++ net/smc/smc_inet.h | 22 +++++++ 5 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 net/smc/smc_inet.c create mode 100644 net/smc/smc_inet.h (limited to 'include') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/net/smc/Makefile b/net/smc/Makefile index 2c510d543058..60f1c87d5212 100644 --- a/net/smc/Makefile +++ b/net/smc/Makefile @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC) += smc.o obj-$(CONFIG_SMC_DIAG) += smc_diag.o smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o -smc-y += smc_tracepoint.o +smc-y += smc_tracepoint.o smc_inet.o smc-$(CONFIG_SYSCTL) += smc_sysctl.o smc-$(CONFIG_SMC_LO) += smc_loopback.o diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7ee6f37813d6..73a875573e7a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -54,6 +54,7 @@ #include "smc_tracepoint.h" #include "smc_sysctl.h" #include "smc_loopback.h" +#include "smc_inet.h" static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server @@ -3575,10 +3576,15 @@ static int __init smc_init(void) pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc); goto out_lo; } - + rc = smc_inet_init(); + if (rc) { + pr_err("%s: smc_inet_init fails with %d\n", __func__, rc); + goto out_ulp; + } static_branch_enable(&tcp_have_smc); return 0; - +out_ulp: + tcp_unregister_ulp(&smc_ulp_ops); out_lo: smc_loopback_exit(); out_ib: @@ -3615,6 +3621,7 @@ out_pernet_subsys: static void __exit smc_exit(void) { static_branch_disable(&tcp_have_smc); + smc_inet_exit(); tcp_unregister_ulp(&smc_ulp_ops); sock_unregister(PF_SMC); smc_core_exit(); @@ -3642,4 +3649,9 @@ MODULE_DESCRIPTION("smc socket address family"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_SMC); MODULE_ALIAS_TCP_ULP("smc"); +/* 256 for IPPROTO_SMC and 1 for SOCK_STREAM */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 256, 1); +#if IS_ENABLED(CONFIG_IPV6) +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 256, 1); +#endif /* CONFIG_IPV6 */ MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME); diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c new file mode 100644 index 000000000000..bece346dd8e9 --- /dev/null +++ b/net/smc/smc_inet.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + * + * Copyright IBM Corp. 2016, 2018 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe + */ + +#include +#include + +#include "smc_inet.h" +#include "smc.h" + +static int smc_inet_init_sock(struct sock *sk); + +static struct proto smc_inet_prot = { + .name = "INET_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v4_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet_stream_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet_prot, + .ops = &smc_inet_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; + +#if IS_ENABLED(CONFIG_IPV6) +static struct proto smc_inet6_prot = { + .name = "INET6_SMC", + .owner = THIS_MODULE, + .init = smc_inet_init_sock, + .hash = smc_hash_sk, + .unhash = smc_unhash_sk, + .release_cb = smc_release_cb, + .obj_size = sizeof(struct smc_sock), + .h.smc_hash = &smc_v6_hashinfo, + .slab_flags = SLAB_TYPESAFE_BY_RCU, +}; + +static const struct proto_ops smc_inet6_stream_ops = { + .family = PF_INET6, + .owner = THIS_MODULE, + .release = smc_release, + .bind = smc_bind, + .connect = smc_connect, + .socketpair = sock_no_socketpair, + .accept = smc_accept, + .getname = smc_getname, + .poll = smc_poll, + .ioctl = smc_ioctl, + .listen = smc_listen, + .shutdown = smc_shutdown, + .setsockopt = smc_setsockopt, + .getsockopt = smc_getsockopt, + .sendmsg = smc_sendmsg, + .recvmsg = smc_recvmsg, + .mmap = sock_no_mmap, + .splice_read = smc_splice_read, +}; + +static struct inet_protosw smc_inet6_protosw = { + .type = SOCK_STREAM, + .protocol = IPPROTO_SMC, + .prot = &smc_inet6_prot, + .ops = &smc_inet6_stream_ops, + .flags = INET_PROTOSW_ICSK, +}; +#endif /* CONFIG_IPV6 */ + +static int smc_inet_init_sock(struct sock *sk) +{ + struct net *net = sock_net(sk); + + /* init common smc sock */ + smc_sk_init(net, sk, IPPROTO_SMC); + /* create clcsock */ + return smc_create_clcsk(net, sk, sk->sk_family); +} + +int __init smc_inet_init(void) +{ + int rc; + + rc = proto_register(&smc_inet_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet_prot fails with %d\n", + __func__, rc); + return rc; + } + /* no return value */ + inet_register_protosw(&smc_inet_protosw); + +#if IS_ENABLED(CONFIG_IPV6) + rc = proto_register(&smc_inet6_prot, 1); + if (rc) { + pr_err("%s: proto_register smc_inet6_prot fails with %d\n", + __func__, rc); + goto out_inet6_prot; + } + rc = inet6_register_protosw(&smc_inet6_protosw); + if (rc) { + pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n", + __func__, rc); + goto out_inet6_protosw; + } + return rc; +out_inet6_protosw: + proto_unregister(&smc_inet6_prot); +out_inet6_prot: + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +#endif /* CONFIG_IPV6 */ + return rc; +} + +void smc_inet_exit(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + inet6_unregister_protosw(&smc_inet6_protosw); + proto_unregister(&smc_inet6_prot); +#endif /* CONFIG_IPV6 */ + inet_unregister_protosw(&smc_inet_protosw); + proto_unregister(&smc_inet_prot); +} diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h new file mode 100644 index 000000000000..a489c8a2b8ef --- /dev/null +++ b/net/smc/smc_inet.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Shared Memory Communications over RDMA (SMC-R) and RoCE + * + * Definitions for the IPPROTO_SMC (socket related) + + * Copyright IBM Corp. 2016 + * Copyright (c) 2024, Alibaba Inc. + * + * Author: D. Wythe + */ +#ifndef __INET_SMC +#define __INET_SMC + +/* Initialize protocol registration on IPPROTO_SMC, + * @return 0 on success + */ +int smc_inet_init(void); + +void smc_inet_exit(void); + +#endif /* __INET_SMC */ -- cgit v1.2.3 From a43b9ec091b1b1924ea18883a715e5aadba2543e Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Wed, 29 May 2024 10:19:20 -0700 Subject: peci, hwmon: Switch to new Intel CPU model defines Update peci subsystem to use the same vendor-family-model combined definition that core x86 code uses. Signed-off-by: Tony Luck Acked-by: Guenter Roeck Reviewed-by: Iwona Winiarska Link: https://lore.kernel.org/r/20240529171920.62571-1-tony.luck@intel.com Signed-off-by: Iwona Winiarska --- drivers/hwmon/peci/cputemp.c | 8 ++++---- drivers/peci/core.c | 5 ++--- drivers/peci/cpu.c | 21 +++++++-------------- drivers/peci/device.c | 3 +-- drivers/peci/internal.h | 6 ++---- include/linux/peci-cpu.h | 24 ++++++++++++++++++++++++ include/linux/peci.h | 6 ++---- 7 files changed, 42 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c index a812c15948d9..5a682195b98f 100644 --- a/drivers/hwmon/peci/cputemp.c +++ b/drivers/hwmon/peci/cputemp.c @@ -360,10 +360,10 @@ static int init_core_mask(struct peci_cputemp *priv) int ret; /* Get the RESOLVED_CORES register value */ - switch (peci_dev->info.model) { - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: - case INTEL_FAM6_SAPPHIRERAPIDS_X: + switch (peci_dev->info.x86_vfm) { + case INTEL_ICELAKE_X: + case INTEL_ICELAKE_D: + case INTEL_SAPPHIRERAPIDS_X: ret = peci_ep_pci_local_read(peci_dev, 0, reg->bus, reg->dev, reg->func, reg->offset + 4, &data); if (ret) diff --git a/drivers/peci/core.c b/drivers/peci/core.c index 8f8bda2f2a62..8ff3e5d225ae 100644 --- a/drivers/peci/core.c +++ b/drivers/peci/core.c @@ -163,9 +163,8 @@ EXPORT_SYMBOL_NS_GPL(devm_peci_controller_add, PECI); static const struct peci_device_id * peci_bus_match_device_id(const struct peci_device_id *id, struct peci_device *device) { - while (id->family != 0) { - if (id->family == device->info.family && - id->model == device->info.model) + while (id->x86_vfm != 0) { + if (id->x86_vfm == device->info.x86_vfm) return id; id++; } diff --git a/drivers/peci/cpu.c b/drivers/peci/cpu.c index bd990acd92b8..152bbd8e717a 100644 --- a/drivers/peci/cpu.c +++ b/drivers/peci/cpu.c @@ -294,38 +294,31 @@ peci_cpu_probe(struct peci_device *device, const struct peci_device_id *id) static const struct peci_device_id peci_cpu_device_ids[] = { { /* Haswell Xeon */ - .family = 6, - .model = INTEL_FAM6_HASWELL_X, + .x86_vfm = INTEL_HASWELL_X, .data = "hsx", }, { /* Broadwell Xeon */ - .family = 6, - .model = INTEL_FAM6_BROADWELL_X, + .x86_vfm = INTEL_BROADWELL_X, .data = "bdx", }, { /* Broadwell Xeon D */ - .family = 6, - .model = INTEL_FAM6_BROADWELL_D, + .x86_vfm = INTEL_BROADWELL_D, .data = "bdxd", }, { /* Skylake Xeon */ - .family = 6, - .model = INTEL_FAM6_SKYLAKE_X, + .x86_vfm = INTEL_SKYLAKE_X, .data = "skx", }, { /* Icelake Xeon */ - .family = 6, - .model = INTEL_FAM6_ICELAKE_X, + .x86_vfm = INTEL_ICELAKE_X, .data = "icx", }, { /* Icelake Xeon D */ - .family = 6, - .model = INTEL_FAM6_ICELAKE_D, + .x86_vfm = INTEL_ICELAKE_D, .data = "icxd", }, { /* Sapphire Rapids Xeon */ - .family = 6, - .model = INTEL_FAM6_SAPPHIRERAPIDS_X, + .x86_vfm = INTEL_SAPPHIRERAPIDS_X, .data = "spr", }, { } diff --git a/drivers/peci/device.c b/drivers/peci/device.c index ee01f03c29b7..37ca7dd61807 100644 --- a/drivers/peci/device.c +++ b/drivers/peci/device.c @@ -100,8 +100,7 @@ static int peci_device_info_init(struct peci_device *device) if (ret) return ret; - device->info.family = peci_x86_cpu_family(cpu_id); - device->info.model = peci_x86_cpu_model(cpu_id); + device->info.x86_vfm = IFM(peci_x86_cpu_family(cpu_id), peci_x86_cpu_model(cpu_id)); ret = peci_get_revision(device, &revision); if (ret) diff --git a/drivers/peci/internal.h b/drivers/peci/internal.h index 506bafcccbbf..7a4f6eae2f90 100644 --- a/drivers/peci/internal.h +++ b/drivers/peci/internal.h @@ -66,13 +66,11 @@ struct peci_request *peci_xfer_ep_mmio64_readl(struct peci_device *device, u8 ba /** * struct peci_device_id - PECI device data to match * @data: pointer to driver private data specific to device - * @family: device family - * @model: device model + * @x86_vfm: device vendor-family-model */ struct peci_device_id { const void *data; - u16 family; - u8 model; + u32 x86_vfm; }; extern const struct device_type peci_device_type; diff --git a/include/linux/peci-cpu.h b/include/linux/peci-cpu.h index ff8ae9c26c80..601cdd086bf6 100644 --- a/include/linux/peci-cpu.h +++ b/include/linux/peci-cpu.h @@ -6,6 +6,30 @@ #include +/* Copied from x86 */ +#define X86_VENDOR_INTEL 0 + +/* Copied from x86 */ +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) +/* End of copied code */ + #include "../../arch/x86/include/asm/intel-family.h" #define PECI_PCS_PKG_ID 0 /* Package Identifier Read */ diff --git a/include/linux/peci.h b/include/linux/peci.h index 90e241458ef6..3e0bc37591d6 100644 --- a/include/linux/peci.h +++ b/include/linux/peci.h @@ -59,8 +59,7 @@ static inline struct peci_controller *to_peci_controller(void *d) * struct peci_device - PECI device * @dev: device object to register PECI device to the device model * @info: PECI device characteristics - * @info.family: device family - * @info.model: device model + * @info.x86_vfm: device vendor-family-model * @info.peci_revision: PECI revision supported by the PECI device * @info.socket_id: the socket ID represented by the PECI device * @addr: address used on the PECI bus connected to the parent controller @@ -73,8 +72,7 @@ static inline struct peci_controller *to_peci_controller(void *d) struct peci_device { struct device dev; struct { - u16 family; - u8 model; + u32 x86_vfm; u8 peci_revision; u8 socket_id; } info; -- cgit v1.2.3 From f45a6051d582f613f4abc383ae238661f7813302 Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Mon, 25 Mar 2024 18:38:10 +0200 Subject: cpu/hotplug: Fix typo in comment Signed-off-by: Costa Shulyupin Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240325163810.669459-1-costa.shul@redhat.com --- include/linux/cpuhotplug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 7a5785f405b6..7f6c820c12eb 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -27,7 +27,7 @@ * startup callbacks sequentially from CPUHP_OFFLINE + 1 to CPUHP_ONLINE * during a CPU online operation. During a CPU offline operation the * installed teardown callbacks are invoked in the reverse order from - * CPU_ONLINE - 1 down to CPUHP_OFFLINE. + * CPUHP_ONLINE - 1 down to CPUHP_OFFLINE. * * The state space has three sections: PREPARE, STARTING and ONLINE. * -- cgit v1.2.3 From 299d623f5c9ab48e53255cf6b510627f1ef26dfe Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:03 +0200 Subject: irqdomain: Introduce irq_domain_instantiate() The existing irq_domain_add_*() functions used to instantiate an IRQ domain are wrappers built on top of __irq_domain_add() and describe the domain properties using a bunch of parameters. Adding more parameters and wrappers to hide new parameters in the existing code lead to more and more code without any relevant value and without any flexibility. Introduce irq_domain_instantiate() where the interrupt domain properties are given using a irq_domain_info structure instead of the bunch of parameters to allow flexibility and easy evolution. irq_domain_instantiate() performs the same operation as the one done by __irq_domain_add(). For compatibility reason with existing code, keep __irq_domain_add() but convert it to irq_domain_instantiate(). [ tglx: Fixed up struct initializer coding style ] Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-3-herve.codina@bootlin.com --- include/linux/irqdomain.h | 21 +++++++++++++++++++++ kernel/irq/irqdomain.c | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 21ecf582a0fe..ab8939c8724d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -257,6 +257,27 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +/** + * struct irq_domain_info - Domain information structure + * @fwnode: firmware node for the interrupt controller + * @size: Size of linear map; 0 for radix mapping only + * @hwirq_max: Maximum number of interrupts supported by controller + * @direct_max: Maximum value of direct maps; + * Use ~0 for no limit; 0 for no direct mapping + * @ops: Domain operation callbacks + * @host_data: Controller private data pointer + */ +struct irq_domain_info { + struct fwnode_handle *fwnode; + unsigned int size; + irq_hw_number_t hwirq_max; + int direct_max; + const struct irq_domain_ops *ops; + void *host_data; +}; + +struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); + struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 40b631bd2836..111052f363ea 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -247,6 +247,27 @@ static void irq_domain_free(struct irq_domain *domain) kfree(domain); } +/** + * irq_domain_instantiate() - Instantiate a new irq domain data structure + * @info: Domain information pointer pointing to the information for this domain + * + * Return: A pointer to the instantiated irq domain or an ERR_PTR value. + */ +struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) +{ + struct irq_domain *domain; + + domain = __irq_domain_create(info->fwnode, info->size, info->hwirq_max, + info->direct_max, info->ops, info->host_data); + if (!domain) + return ERR_PTR(-ENOMEM); + + __irq_domain_publish(domain); + + return domain; +} +EXPORT_SYMBOL_GPL(irq_domain_instantiate); + /** * __irq_domain_add() - Allocate a new irq_domain data structure * @fwnode: firmware node for the interrupt controller @@ -265,14 +286,18 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s const struct irq_domain_ops *ops, void *host_data) { - struct irq_domain *domain; - - domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max, - ops, host_data); - if (domain) - __irq_domain_publish(domain); + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = hwirq_max, + .direct_max = direct_max, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; - return domain; + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } EXPORT_SYMBOL_GPL(__irq_domain_add); -- cgit v1.2.3 From 922ac2cf9fe444c4aff165b9f7e158a9b651647d Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:05 +0200 Subject: irqdomain: Constify parameter in is_fwnode_irqchip() The fwnode parameter has no reason to be a pointer to an un-const struct fwnode_handle. Indeed, struct fwnode_handle is not supposed to be modified by the function. Be consistent with other function performing the same kind of operation such as is_of_node(), is_acpi_device_node() or is_software_node(): constify the fwnode parameter. Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-5-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index ab8939c8724d..a3b43e357009 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -314,7 +314,7 @@ static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) extern const struct fwnode_operations irqchip_fwnode_ops; -static inline bool is_fwnode_irqchip(struct fwnode_handle *fwnode) +static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) { return fwnode && fwnode->ops == &irqchip_fwnode_ops; } -- cgit v1.2.3 From 757398541c30a5e898169763b43f08dab71ea3bd Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:08 +0200 Subject: irqdomain: Handle additional domain flags in irq_domain_instantiate() In order to use irq_domain_instantiate() from several places such as irq_domain_create_hierarchy(), irq_domain_instantiate() needs to handle additional domain flags. Add the required infrastructure. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-8-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 ++ kernel/irq/irqdomain.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a3b43e357009..4683b66eded9 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -260,6 +260,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); /** * struct irq_domain_info - Domain information structure * @fwnode: firmware node for the interrupt controller + * @domain_flags: Additional flags to add to the domain flags * @size: Size of linear map; 0 for radix mapping only * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; @@ -269,6 +270,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); */ struct irq_domain_info { struct fwnode_handle *fwnode; + unsigned int domain_flags; unsigned int size; irq_hw_number_t hwirq_max; int direct_max; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 0eda48f77aa9..26ad1ea67998 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -263,6 +263,8 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) if (!domain) return ERR_PTR(-ENOMEM); + domain->flags |= info->domain_flags; + __irq_domain_publish(domain); return domain; -- cgit v1.2.3 From 419e3778ff295c00aa158d9f2854a70b47ba1136 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:09 +0200 Subject: irqdomain: Handle domain hierarchy parent in irq_domain_instantiate() To use irq_domain_instantiate() from irq_domain_create_hierarchy(), irq_domain_instantiate() needs to handle the domain hierarchy parent. Add the required functionality. Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-9-herve.codina@bootlin.com --- include/linux/irqdomain.h | 6 ++++++ kernel/irq/irqdomain.c | 7 +++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 4683b66eded9..e52fd5e5494c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -276,6 +276,12 @@ struct irq_domain_info { int direct_max; const struct irq_domain_ops *ops; void *host_data; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + /** + * @parent: Pointer to the parent irq domain used in a hierarchy domain + */ + struct irq_domain *parent; +#endif }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 26ad1ea67998..1269a8104e6c 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -265,6 +265,13 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) domain->flags |= info->domain_flags; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + if (info->parent) { + domain->root = info->parent->root; + domain->parent = info->parent; + } +#endif + __irq_domain_publish(domain); return domain; -- cgit v1.2.3 From 0b21add71bd9cfa2bd6677a0300e15fd4c4b84ed Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:12 +0200 Subject: irqdomain: Handle domain bus token in irq_domain_create() irq_domain_update_bus_token() is the only way to set the domain bus token. This is sub-optimal as irq_domain_update_bus_token() can be called only once the domain is created and needs to revert some operations, change the domain name and redo the operations. In order to avoid this revert/change/redo sequence, take the domain bus into account token during the domain creation. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-12-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 ++ kernel/irq/irqdomain.c | 30 ++++++++++++++++++++++++------ 2 files changed, 26 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e52fd5e5494c..52bed23e5c61 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -265,6 +265,7 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); * @hwirq_max: Maximum number of interrupts supported by controller * @direct_max: Maximum value of direct maps; * Use ~0 for no limit; 0 for no direct mapping + * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer */ @@ -274,6 +275,7 @@ struct irq_domain_info { unsigned int size; irq_hw_number_t hwirq_max; int direct_max; + enum irq_domain_bus_token bus_token; const struct irq_domain_ops *ops; void *host_data; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index fe7bba685580..a21648c76336 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -129,7 +129,8 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); static int irq_domain_set_name(struct irq_domain *domain, - const struct fwnode_handle *fwnode) + const struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { static atomic_t unknown_domains; struct irqchip_fwid *fwid; @@ -140,13 +141,23 @@ static int irq_domain_set_name(struct irq_domain *domain, switch (fwid->type) { case IRQCHIP_FWNODE_NAMED: case IRQCHIP_FWNODE_NAMED_ID: - domain->name = kstrdup(fwid->name, GFP_KERNEL); + domain->name = bus_token ? + kasprintf(GFP_KERNEL, "%s-%d", + fwid->name, bus_token) : + kstrdup(fwid->name, GFP_KERNEL); if (!domain->name) return -ENOMEM; domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; break; default: domain->name = fwid->name; + if (bus_token) { + domain->name = kasprintf(GFP_KERNEL, "%s-%d", + fwid->name, bus_token); + if (!domain->name) + return -ENOMEM; + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } break; } } else if (is_of_node(fwnode) || is_acpi_device_node(fwnode) || @@ -158,7 +169,9 @@ static int irq_domain_set_name(struct irq_domain *domain, * unhappy about. Replace them with ':', which does * the trick and is not as offensive as '\'... */ - name = kasprintf(GFP_KERNEL, "%pfw", fwnode); + name = bus_token ? + kasprintf(GFP_KERNEL, "%pfw-%d", fwnode, bus_token) : + kasprintf(GFP_KERNEL, "%pfw", fwnode); if (!name) return -ENOMEM; @@ -169,8 +182,12 @@ static int irq_domain_set_name(struct irq_domain *domain, if (!domain->name) { if (fwnode) pr_err("Invalid fwnode type for irqdomain\n"); - domain->name = kasprintf(GFP_KERNEL, "unknown-%d", - atomic_inc_return(&unknown_domains)); + domain->name = bus_token ? + kasprintf(GFP_KERNEL, "unknown-%d-%d", + atomic_inc_return(&unknown_domains), + bus_token) : + kasprintf(GFP_KERNEL, "unknown-%d", + atomic_inc_return(&unknown_domains)); if (!domain->name) return -ENOMEM; domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; @@ -194,7 +211,7 @@ static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info if (!domain) return ERR_PTR(-ENOMEM); - err = irq_domain_set_name(domain, info->fwnode); + err = irq_domain_set_name(domain, info->fwnode, info->bus_token); if (err) { kfree(domain); return ERR_PTR(err); @@ -207,6 +224,7 @@ static struct irq_domain *__irq_domain_create(const struct irq_domain_info *info INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); domain->ops = info->ops; domain->host_data = info->host_data; + domain->bus_token = info->bus_token; domain->hwirq_max = info->hwirq_max; if (info->direct_max) -- cgit v1.2.3 From 44b68de9b8e3dfde12308e8567548799d7ded0de Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:13 +0200 Subject: irqdomain: Introduce init() and exit() hooks The current API does not allow additional initialization before the domain is published. This can lead to a race condition between consumers and supplier as a domain can be available for consumers before being fully ready. Introduce the init() hook to allow additional initialization before plublishing the domain. Also introduce the exit() hook to revert operations done in init() on domain removal. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-13-herve.codina@bootlin.com --- include/linux/irqdomain.h | 8 ++++++++ kernel/irq/irqdomain.c | 15 +++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 52bed23e5c61..2c927edc4d43 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -141,6 +141,7 @@ struct irq_domain_chip_generic; * purposes related to the irq domain. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init + * @exit: Function called when the domain is destroyed * * Revmap data, used internally by the irq domain code: * @revmap_size: Size of the linear map table @revmap[] @@ -169,6 +170,7 @@ struct irq_domain { #ifdef CONFIG_GENERIC_MSI_IRQ const struct msi_parent_ops *msi_parent_ops; #endif + void (*exit)(struct irq_domain *d); /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; @@ -268,6 +270,10 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer + * @init: Function called when the domain is created. + * Allow to do some additional domain initialisation. + * @exit: Function called when the domain is destroyed. + * Allow to do some additional cleanup operation. */ struct irq_domain_info { struct fwnode_handle *fwnode; @@ -284,6 +290,8 @@ struct irq_domain_info { */ struct irq_domain *parent; #endif + int (*init)(struct irq_domain *d); + void (*exit)(struct irq_domain *d); }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index a21648c76336..a0324d8e6dab 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -276,12 +276,14 @@ static void irq_domain_free(struct irq_domain *domain) struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) { struct irq_domain *domain; + int err; domain = __irq_domain_create(info); if (IS_ERR(domain)) return domain; domain->flags |= info->domain_flags; + domain->exit = info->exit; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (info->parent) { @@ -290,9 +292,19 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) } #endif + if (info->init) { + err = info->init(domain); + if (err) + goto err_domain_free; + } + __irq_domain_publish(domain); return domain; + +err_domain_free: + irq_domain_free(domain); + return ERR_PTR(err); } EXPORT_SYMBOL_GPL(irq_domain_instantiate); @@ -339,6 +351,9 @@ EXPORT_SYMBOL_GPL(__irq_domain_add); */ void irq_domain_remove(struct irq_domain *domain) { + if (domain->exit) + domain->exit(domain); + mutex_lock(&irq_domain_mutex); debugfs_remove_domain_dir(domain); -- cgit v1.2.3 From e25f553a92973eaf59ff3a00fe7f61ab01b2877f Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:14 +0200 Subject: genirq/generic_chip: Introduce irq_domain_{alloc,remove}_generic_chips() The existing __irq_alloc_domain_generic_chips() uses a bunch of parameters to describe the generic chips that need to be allocated. Adding more parameters and wrappers to hide new parameters in the existing code leads to more and more code without any relevant values and without any flexibility. Introduce irq_domain_alloc_generic_chips() where the generic chips description is done using the irq_domain_chip_generic_info structure instead of the bunch of parameters to allow flexibility and easy evolution. Also introduce irq_domain_remove_generic_chips() to revert the operations done by irq_domain_alloc_generic_chips(). Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-14-herve.codina@bootlin.com --- include/linux/irq.h | 25 +++++++++++++ kernel/irq/generic-chip.c | 91 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 93 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index a217e1029c1d..58264b236cbf 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1117,6 +1117,27 @@ struct irq_domain_chip_generic { struct irq_chip_generic *gc[]; }; +/** + * struct irq_domain_chip_generic_info - Generic chip information structure + * @name: Name of the generic interrupt chip + * @handler: Interrupt handler used by the generic interrupt chip + * @irqs_per_chip: Number of interrupts each chip handles (max 32) + * @num_ct: Number of irq_chip_type instances associated with each + * chip + * @irq_flags_to_clear: IRQ_* bits to clear in the mapping function + * @irq_flags_to_set: IRQ_* bits to set in the mapping function + * @gc_flags: Generic chip specific setup flags + */ +struct irq_domain_chip_generic_info { + const char *name; + irq_flow_handler_t handler; + unsigned int irqs_per_chip; + unsigned int num_ct; + unsigned int irq_flags_to_clear; + unsigned int irq_flags_to_set; + enum irq_gc_flags gc_flags; +}; + /* Generic chip callback functions */ void irq_gc_noop(struct irq_data *d); void irq_gc_mask_disable_reg(struct irq_data *d); @@ -1153,6 +1174,10 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); +int irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info); +void irq_domain_remove_generic_chips(struct irq_domain *d); + int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, int num_ct, const char *name, irq_flow_handler_t handler, diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index d39a40bc542b..d9696f5dcc38 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -276,21 +276,14 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags) } /** - * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain - * @d: irq domain for which to allocate chips - * @irqs_per_chip: Number of interrupts each chip handles (max 32) - * @num_ct: Number of irq_chip_type instances associated with this - * @name: Name of the irq chip - * @handler: Default flow handler associated with these chips - * @clr: IRQ_* bits to clear in the mapping function - * @set: IRQ_* bits to set in the mapping function - * @gcflags: Generic chip specific setup flags + * irq_domain_alloc_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @info: Generic chip information + * + * Return: 0 on success, negative error code on failure */ -int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, - int num_ct, const char *name, - irq_flow_handler_t handler, - unsigned int clr, unsigned int set, - enum irq_gc_flags gcflags) +int irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info) { struct irq_domain_chip_generic *dgc; struct irq_chip_generic *gc; @@ -304,23 +297,23 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, if (d->gc) return -EBUSY; - numchips = DIV_ROUND_UP(d->revmap_size, irqs_per_chip); + numchips = DIV_ROUND_UP(d->revmap_size, info->irqs_per_chip); if (!numchips) return -EINVAL; /* Allocate a pointer, generic chip and chiptypes for each chip */ - gc_sz = struct_size(gc, chip_types, num_ct); + gc_sz = struct_size(gc, chip_types, info->num_ct); dgc_sz = struct_size(dgc, gc, numchips); sz = dgc_sz + numchips * gc_sz; tmp = dgc = kzalloc(sz, GFP_KERNEL); if (!dgc) return -ENOMEM; - dgc->irqs_per_chip = irqs_per_chip; + dgc->irqs_per_chip = info->irqs_per_chip; dgc->num_chips = numchips; - dgc->irq_flags_to_set = set; - dgc->irq_flags_to_clear = clr; - dgc->gc_flags = gcflags; + dgc->irq_flags_to_set = info->irq_flags_to_set; + dgc->irq_flags_to_clear = info->irq_flags_to_clear; + dgc->gc_flags = info->gc_flags; d->gc = dgc; /* Calc pointer to the first generic chip */ @@ -328,11 +321,12 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, for (i = 0; i < numchips; i++) { /* Store the pointer to the generic chip */ dgc->gc[i] = gc = tmp; - irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip, - NULL, handler); + irq_init_generic_chip(gc, info->name, info->num_ct, + i * dgc->irqs_per_chip, NULL, + info->handler); gc->domain = d; - if (gcflags & IRQ_GC_BE_IO) { + if (dgc->gc_flags & IRQ_GC_BE_IO) { gc->reg_readl = &irq_readl_be; gc->reg_writel = &irq_writel_be; } @@ -345,6 +339,57 @@ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, } return 0; } +EXPORT_SYMBOL_GPL(irq_domain_alloc_generic_chips); + +/** + * irq_domain_remove_generic_chips - Remove generic chips from an irq domain + * @d: irq domain for which generic chips are to be removed + */ +void irq_domain_remove_generic_chips(struct irq_domain *d) +{ + struct irq_domain_chip_generic *dgc = d->gc; + unsigned int i; + + if (!dgc) + return; + + for (i = 0; i < dgc->num_chips; i++) + irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); + + d->gc = NULL; + kfree(dgc); +} +EXPORT_SYMBOL_GPL(irq_domain_remove_generic_chips); + +/** + * __irq_alloc_domain_generic_chips - Allocate generic chips for an irq domain + * @d: irq domain for which to allocate chips + * @irqs_per_chip: Number of interrupts each chip handles (max 32) + * @num_ct: Number of irq_chip_type instances associated with this + * @name: Name of the irq chip + * @handler: Default flow handler associated with these chips + * @clr: IRQ_* bits to clear in the mapping function + * @set: IRQ_* bits to set in the mapping function + * @gcflags: Generic chip specific setup flags + */ +int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, + int num_ct, const char *name, + irq_flow_handler_t handler, + unsigned int clr, unsigned int set, + enum irq_gc_flags gcflags) +{ + struct irq_domain_chip_generic_info info = { + .irqs_per_chip = irqs_per_chip, + .num_ct = num_ct, + .name = name, + .handler = handler, + .irq_flags_to_clear = clr, + .irq_flags_to_set = set, + .gc_flags = gcflags, + }; + + return irq_domain_alloc_generic_chips(d, &info); +} EXPORT_SYMBOL_GPL(__irq_alloc_domain_generic_chips); static struct irq_chip_generic * -- cgit v1.2.3 From fea922ee9f8ffd3c2a8e8dfbc68de42905a3982a Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:15 +0200 Subject: genirq/generic_chip: Introduce init() and exit() hooks Most of generic chip drivers need to perform some more additional initializations on the generic chips allocated before they can be fully ready. These additional initializations need to be performed before the IRQ domain is published to avoid a race condition between IRQ consumers and suppliers. Introduce the init() hook to perform these initializations at the right place just after the generic chip creation. Also introduce the exit() hook to allow reverting operations done by the init() hook just before the generic chip is destroyed. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-15-herve.codina@bootlin.com --- include/linux/irq.h | 8 ++++++++ kernel/irq/generic-chip.c | 24 ++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 58264b236cbf..712bcee56efc 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1106,6 +1106,7 @@ enum irq_gc_flags { * @irq_flags_to_set: IRQ* flags to set on irq setup * @irq_flags_to_clear: IRQ* flags to clear on irq setup * @gc_flags: Generic chip specific setup flags + * @exit: Function called on each chip when they are destroyed. * @gc: Array of pointers to generic interrupt chips */ struct irq_domain_chip_generic { @@ -1114,6 +1115,7 @@ struct irq_domain_chip_generic { unsigned int irq_flags_to_clear; unsigned int irq_flags_to_set; enum irq_gc_flags gc_flags; + void (*exit)(struct irq_chip_generic *gc); struct irq_chip_generic *gc[]; }; @@ -1127,6 +1129,10 @@ struct irq_domain_chip_generic { * @irq_flags_to_clear: IRQ_* bits to clear in the mapping function * @irq_flags_to_set: IRQ_* bits to set in the mapping function * @gc_flags: Generic chip specific setup flags + * @init: Function called on each chip when they are created. + * Allow to do some additional chip initialisation. + * @exit: Function called on each chip when they are destroyed. + * Allow to do some chip cleanup operation. */ struct irq_domain_chip_generic_info { const char *name; @@ -1136,6 +1142,8 @@ struct irq_domain_chip_generic_info { unsigned int irq_flags_to_clear; unsigned int irq_flags_to_set; enum irq_gc_flags gc_flags; + int (*init)(struct irq_chip_generic *gc); + void (*exit)(struct irq_chip_generic *gc); }; /* Generic chip callback functions */ diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index d9696f5dcc38..32ffcbb87fa1 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -293,6 +293,7 @@ int irq_domain_alloc_generic_chips(struct irq_domain *d, size_t gc_sz; size_t sz; void *tmp; + int ret; if (d->gc) return -EBUSY; @@ -314,6 +315,7 @@ int irq_domain_alloc_generic_chips(struct irq_domain *d, dgc->irq_flags_to_set = info->irq_flags_to_set; dgc->irq_flags_to_clear = info->irq_flags_to_clear; dgc->gc_flags = info->gc_flags; + dgc->exit = info->exit; d->gc = dgc; /* Calc pointer to the first generic chip */ @@ -331,6 +333,12 @@ int irq_domain_alloc_generic_chips(struct irq_domain *d, gc->reg_writel = &irq_writel_be; } + if (info->init) { + ret = info->init(gc); + if (ret) + goto err; + } + raw_spin_lock_irqsave(&gc_lock, flags); list_add_tail(&gc->list, &gc_list); raw_spin_unlock_irqrestore(&gc_lock, flags); @@ -338,6 +346,16 @@ int irq_domain_alloc_generic_chips(struct irq_domain *d, tmp += gc_sz; } return 0; + +err: + while (i--) { + if (dgc->exit) + dgc->exit(dgc->gc[i]); + irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); + } + d->gc = NULL; + kfree(dgc); + return ret; } EXPORT_SYMBOL_GPL(irq_domain_alloc_generic_chips); @@ -353,9 +371,11 @@ void irq_domain_remove_generic_chips(struct irq_domain *d) if (!dgc) return; - for (i = 0; i < dgc->num_chips; i++) + for (i = 0; i < dgc->num_chips; i++) { + if (dgc->exit) + dgc->exit(dgc->gc[i]); irq_remove_generic_chip(dgc->gc[i], ~0U, 0, 0); - + } d->gc = NULL; kfree(dgc); } -- cgit v1.2.3 From e6f67ce32e8e6dcbadf42dc435fbc9002cabf1f9 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:16 +0200 Subject: irqdomain: Add support for generic irq chips creation before publishing a domain The current API functions create an irq_domain and also publish this newly created to domain. Once an irq_domain is published, consumers can request IRQ in order to use them. Some interrupt controller drivers have to perform some more operations with the created irq_domain in order to have it ready to be used. For instance: - Allocate generic irq chips with irq_alloc_domain_generic_chips() - Retrieve the generic irq chips with irq_get_domain_generic_chip() - Initialize retrieved chips: set register base address and offsets, set several hooks such as irq_mask, irq_unmask, ... With the newly introduced irq_domain_alloc_generic_chips(), an interrupt controller driver can use the irq_domain_chip_generic_info structure and set the init() hook to perform its generic chips initialization. In order to avoid a window where the domain is published but not yet ready to be used, handle the generic chip creation (i.e the irq_domain_alloc_generic_chips() call) before the domain is published. Suggested-by: Thomas Gleixner Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-16-herve.codina@bootlin.com --- include/linux/irqdomain.h | 9 +++++++++ kernel/irq/irqdomain.c | 14 +++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 2c927edc4d43..5540b22a755c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -210,6 +210,9 @@ enum { /* Irq domain is a MSI device domain */ IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9), + /* Irq domain must destroy generic chips when removed */ + IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -259,6 +262,9 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) } void irq_domain_free_fwnode(struct fwnode_handle *fwnode); + +struct irq_domain_chip_generic_info; + /** * struct irq_domain_info - Domain information structure * @fwnode: firmware node for the interrupt controller @@ -270,6 +276,8 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode); * @bus_token: Domain bus token * @ops: Domain operation callbacks * @host_data: Controller private data pointer + * @dgc_info: Geneneric chip information structure pointer used to + * create generic chips for the domain if not NULL. * @init: Function called when the domain is created. * Allow to do some additional domain initialisation. * @exit: Function called when the domain is destroyed. @@ -290,6 +298,7 @@ struct irq_domain_info { */ struct irq_domain *parent; #endif + struct irq_domain_chip_generic_info *dgc_info; int (*init)(struct irq_domain *d); void (*exit)(struct irq_domain *d); }; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index a0324d8e6dab..4d2a40359eb2 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -292,16 +292,25 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info) } #endif + if (info->dgc_info) { + err = irq_domain_alloc_generic_chips(domain, info->dgc_info); + if (err) + goto err_domain_free; + } + if (info->init) { err = info->init(domain); if (err) - goto err_domain_free; + goto err_domain_gc_remove; } __irq_domain_publish(domain); return domain; +err_domain_gc_remove: + if (info->dgc_info) + irq_domain_remove_generic_chips(domain); err_domain_free: irq_domain_free(domain); return ERR_PTR(err); @@ -369,6 +378,9 @@ void irq_domain_remove(struct irq_domain *domain) mutex_unlock(&irq_domain_mutex); + if (domain->flags & IRQ_DOMAIN_FLAG_DESTROY_GC) + irq_domain_remove_generic_chips(domain); + pr_debug("Removed domain %s\n", domain->name); irq_domain_free(domain); } -- cgit v1.2.3 From 0c5b29a6dc7b463b6072da8cef43800008728ff3 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:17 +0200 Subject: irqdomain: Add a resource managed version of irq_domain_instantiate() Add a devres version of irq_domain_instantiate(). Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-17-herve.codina@bootlin.com --- include/linux/irqdomain.h | 2 ++ kernel/irq/devres.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 5540b22a755c..8820317582c4 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -304,6 +304,8 @@ struct irq_domain_info { }; struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); +struct irq_domain *devm_irq_domain_instantiate(struct device *dev, + const struct irq_domain_info *info); struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, irq_hw_number_t hwirq_max, int direct_max, diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index f6e5515ee077..b3e98668f4dd 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -282,3 +283,43 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, } EXPORT_SYMBOL_GPL(devm_irq_setup_generic_chip); #endif /* CONFIG_GENERIC_IRQ_CHIP */ + +#ifdef CONFIG_IRQ_DOMAIN +static void devm_irq_domain_remove(struct device *dev, void *res) +{ + struct irq_domain **domain = res; + + irq_domain_remove(*domain); +} + +/** + * devm_irq_domain_instantiate() - Instantiate a new irq domain data for a + * managed device. + * @dev: Device to instantiate the domain for + * @info: Domain information pointer pointing to the information for this + * domain + * + * Return: A pointer to the instantiated irq domain or an ERR_PTR value. + */ +struct irq_domain *devm_irq_domain_instantiate(struct device *dev, + const struct irq_domain_info *info) +{ + struct irq_domain *domain; + struct irq_domain **dr; + + dr = devres_alloc(devm_irq_domain_remove, sizeof(*dr), GFP_KERNEL); + if (!dr) + return ERR_PTR(-ENOMEM); + + domain = irq_domain_instantiate(info); + if (!IS_ERR(domain)) { + *dr = domain; + devres_add(dev, dr); + } else { + devres_free(dr); + } + + return domain; +} +EXPORT_SYMBOL_GPL(devm_irq_domain_instantiate); +#endif /* CONFIG_IRQ_DOMAIN */ -- cgit v1.2.3 From 7c53626cd11820a11f9cb2c54c02d47fc062a265 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:18 +0200 Subject: irqdomain: Convert __irq_domain_add() wrappers to irq_domain_instantiate() __irq_domain_add() wrappers use directly __irq_domain_add(). With the introduction of irq_domain_instantiate(), __irq_domain_add() becomes obsolete. In order to fully remove __irq_domain_add(), convert wrappers to irq_domain_instantiate() [ tglx: Fixup struct initializers ] Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-18-herve.codina@bootlin.com --- include/linux/irqdomain.h | 58 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 53 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 8820317582c4..33a968fbdda2 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -400,7 +400,17 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = of_node_to_fwnode(of_node), + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } #ifdef CONFIG_IRQ_DOMAIN_NOMAP @@ -409,7 +419,17 @@ static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_nod const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data); + struct irq_domain_info info = { + .fwnode = of_node_to_fwnode(of_node), + .hwirq_max = max_irq, + .direct_max = max_irq, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } extern unsigned int irq_create_direct_mapping(struct irq_domain *host); @@ -419,7 +439,16 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = of_node_to_fwnode(of_node), + .hwirq_max = ~0U, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, @@ -427,14 +456,33 @@ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle * const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(fwnode, size, size, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data); + struct irq_domain_info info = { + .fwnode = fwnode, + .hwirq_max = ~0, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; } extern void irq_domain_remove(struct irq_domain *host); -- cgit v1.2.3 From 0b4b172b760efabf8a77ea17644d333fbb444d39 Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Fri, 14 Jun 2024 19:32:21 +0200 Subject: irqdomain: Remove __irq_domain_add() __irq_domain_add() has been replaced by irq_domain_instanciate() and so, it is no more used. Simply remove it. Signed-off-by: Herve Codina Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20240614173232.1184015-21-herve.codina@bootlin.com --- include/linux/irqdomain.h | 6 +----- kernel/irq/irqdomain.c | 33 --------------------------------- 2 files changed, 1 insertion(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 33a968fbdda2..02cd486ac354 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -184,7 +184,7 @@ enum { /* Irq domain is hierarchical */ IRQ_DOMAIN_FLAG_HIERARCHY = (1 << 0), - /* Irq domain name was allocated in __irq_domain_add() */ + /* Irq domain name was allocated internally */ IRQ_DOMAIN_NAME_ALLOCATED = (1 << 1), /* Irq domain is an IPI domain with virq per cpu */ @@ -307,10 +307,6 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); struct irq_domain *devm_irq_domain_instantiate(struct device *dev, const struct irq_domain_info *info); -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, - irq_hw_number_t hwirq_max, int direct_max, - const struct irq_domain_ops *ops, - void *host_data); struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index c9b076c09eb4..91eaf6bfcbf0 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -317,39 +317,6 @@ err_domain_free: } EXPORT_SYMBOL_GPL(irq_domain_instantiate); -/** - * __irq_domain_add() - Allocate a new irq_domain data structure - * @fwnode: firmware node for the interrupt controller - * @size: Size of linear map; 0 for radix mapping only - * @hwirq_max: Maximum number of interrupts supported by controller - * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no - * direct mapping - * @ops: domain callbacks - * @host_data: Controller private data pointer - * - * Allocates and initializes an irq_domain structure. - * Returns pointer to IRQ domain, or NULL on failure. - */ -struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, - irq_hw_number_t hwirq_max, int direct_max, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = fwnode, - .size = size, - .hwirq_max = hwirq_max, - .direct_max = direct_max, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} -EXPORT_SYMBOL_GPL(__irq_domain_add); - /** * irq_domain_remove() - Remove an irq domain. * @domain: domain to remove -- cgit v1.2.3 From 1037e4c53e851682ff8d1ab656567a4d5a333c93 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:58:48 +0300 Subject: cpu/hotplug: Add support for declaring CPU offlining not supported The ACPI MADT mailbox wakeup method doesn't allow to offline a CPU after it has been woken up. Currently, offlining is prevented based on the confidential computing attribute which is set for Intel TDX. But TDX is not the only possible user of the wake up method. The MADT wakeup can be implemented outside of a confidential computing environment. Offline support is a property of the wakeup method, not the CoCo implementation. Introduce cpu_hotplug_disable_offlining() that can be called to indicate that CPU offlining should be disabled. This function is going to replace CC_ATTR_HOTPLUG_DISABLED for ACPI MADT wakeup method. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Tested-by: Tao Liu Link: https://lore.kernel.org/r/20240614095904.1345461-4-kirill.shutemov@linux.intel.com --- include/linux/cpuhplock.h | 2 ++ kernel/cpu.c | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cpuhplock.h b/include/linux/cpuhplock.h index 431560bbd045..f7aa20f62b87 100644 --- a/include/linux/cpuhplock.h +++ b/include/linux/cpuhplock.h @@ -21,6 +21,7 @@ void cpus_read_lock(void); void cpus_read_unlock(void); int cpus_read_trylock(void); void lockdep_assert_cpus_held(void); +void cpu_hotplug_disable_offlining(void); void cpu_hotplug_disable(void); void cpu_hotplug_enable(void); void clear_tasks_mm_cpumask(int cpu); @@ -36,6 +37,7 @@ static inline void cpus_read_lock(void) { } static inline void cpus_read_unlock(void) { } static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } +static inline void cpu_hotplug_disable_offlining(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } static inline int remove_cpu(unsigned int cpu) { return -EPERM; } diff --git a/kernel/cpu.c b/kernel/cpu.c index 563877d6c28b..4c15b478e2bc 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -483,6 +483,8 @@ static int cpu_hotplug_disabled; DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); +static bool cpu_hotplug_offline_disabled __ro_after_init; + void cpus_read_lock(void) { percpu_down_read(&cpu_hotplug_lock); @@ -542,6 +544,14 @@ static void lockdep_release_cpus_lock(void) rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_); } +/* Declare CPU offlining not supported */ +void cpu_hotplug_disable_offlining(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_offline_disabled = true; + cpu_maps_update_done(); +} + /* * Wait for currently running CPU hotplug operations to complete (if any) and * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects @@ -1471,7 +1481,8 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) * If the platform does not support hotplug, report it explicitly to * differentiate it from a transient offlining failure. */ - if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED)) + if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED) || + cpu_hotplug_offline_disabled) return -EOPNOTSUPP; if (cpu_hotplug_disabled) return -EBUSY; -- cgit v1.2.3 From 66e48e491d1e1a0f243ebfcb9639b23de1a5db5e Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:58:49 +0300 Subject: cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup ACPI MADT doesn't allow to offline a CPU after it has been woken up. Currently, CPU hotplug is prevented based on the confidential computing attribute which is set for Intel TDX. But TDX is not the only possible user of the wake up method. Any platform that uses ACPI MADT wakeup method cannot offline CPU. Disable CPU offlining on ACPI MADT wakeup enumeration. This has no visible effects for users: currently, TDX guest is the only platform that uses the ACPI MADT wakeup method. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Tao Liu Link: https://lore.kernel.org/r/20240614095904.1345461-5-kirill.shutemov@linux.intel.com --- arch/x86/coco/core.c | 1 - arch/x86/kernel/acpi/madt_wakeup.c | 3 +++ include/linux/cc_platform.h | 10 ---------- kernel/cpu.c | 3 +-- 4 files changed, 4 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index b31ef2424d19..0f81f70aca82 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -29,7 +29,6 @@ static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { case CC_ATTR_GUEST_UNROLL_STRING_IO: - case CC_ATTR_HOTPLUG_DISABLED: case CC_ATTR_GUEST_MEM_ENCRYPT: case CC_ATTR_MEM_ENCRYPT: return true; diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index cf79ea6f3007..d222be8d7a07 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include #include @@ -76,6 +77,8 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, acpi_mp_wake_mailbox_paddr = mp_wake->base_address; + cpu_hotplug_disable_offlining(); + apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); return 0; diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h index 60693a145894..caa4b4430634 100644 --- a/include/linux/cc_platform.h +++ b/include/linux/cc_platform.h @@ -81,16 +81,6 @@ enum cc_attr { */ CC_ATTR_GUEST_SEV_SNP, - /** - * @CC_ATTR_HOTPLUG_DISABLED: Hotplug is not supported or disabled. - * - * The platform/OS is running as a guest/virtual machine does not - * support CPU hotplug feature. - * - * Examples include TDX Guest. - */ - CC_ATTR_HOTPLUG_DISABLED, - /** * @CC_ATTR_HOST_SEV_SNP: AMD SNP enabled on the host. * diff --git a/kernel/cpu.c b/kernel/cpu.c index 4c15b478e2bc..a609385c7f99 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1481,8 +1481,7 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) * If the platform does not support hotplug, report it explicitly to * differentiate it from a transient offlining failure. */ - if (cc_platform_has(CC_ATTR_HOTPLUG_DISABLED) || - cpu_hotplug_offline_disabled) + if (cpu_hotplug_offline_disabled) return -EOPNOTSUPP; if (cpu_hotplug_disabled) return -EBUSY; -- cgit v1.2.3 From 6630cbce7cd7785f76b1055f33a71199ef28510b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:58:59 +0300 Subject: x86/acpi: Rename fields in the acpi_madt_multiproc_wakeup structure In order to support MADT wakeup structure version 1, provide more appropriate names for the fields in the structure. Rename 'mailbox_version' to 'version'. This field signifies the version of the structure and the related protocols, rather than the version of the mailbox. This field has not been utilized in the code thus far. Rename 'base_address' to 'mailbox_address' to clarify the kind of address it represents. In version 1, the structure includes the reset vector address. Clear and distinct naming helps to prevent any confusion. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kai Huang Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Tao Liu Link: https://lore.kernel.org/r/20240614095904.1345461-15-kirill.shutemov@linux.intel.com --- arch/x86/kernel/acpi/madt_wakeup.c | 2 +- include/acpi/actbl2.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index d222be8d7a07..004801b9b151 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -75,7 +75,7 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, acpi_table_print_madt_entry(&header->common); - acpi_mp_wake_mailbox_paddr = mp_wake->base_address; + acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address; cpu_hotplug_disable_offlining(); diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index ae747c89d92c..fa63362469aa 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1194,9 +1194,9 @@ struct acpi_madt_generic_translator { struct acpi_madt_multiproc_wakeup { struct acpi_subtable_header header; - u16 mailbox_version; + u16 version; u32 reserved; /* reserved - must be zero */ - u64 base_address; + u64 mailbox_address; }; #define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE 2032 -- cgit v1.2.3 From 1ceebe2e46720de02af4cf626dc847ecc4a263fd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 14 Jun 2024 12:59:03 +0300 Subject: x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method MADT Multiprocessor Wakeup structure version 1 brings support for CPU offlining: BIOS provides a reset vector where the CPU has to jump to for offlining itself. The new TEST mailbox command can be used to test whether the CPU offlined itself which means the BIOS has control over the CPU and can online it again via the ACPI MADT wakeup method. Add CPU offlining support for the ACPI MADT wakeup method by implementing custom cpu_die(), play_dead() and stop_this_cpu() SMP operations. CPU offlining makes it possible to hand over secondary CPUs over kexec, not limiting the second kernel to a single CPU. The change conforms to the approved ACPI spec change proposal. See the Link. Signed-off-by: Kirill A. Shutemov Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Thomas Gleixner Acked-by: Kai Huang Acked-by: Rafael J. Wysocki Tested-by: Tao Liu Link: https://lore.kernel.org/all/13356251.uLZWGnKmhe@kreacher Link: https://lore.kernel.org/r/20240614095904.1345461-19-kirill.shutemov@linux.intel.com --- arch/x86/include/asm/acpi.h | 2 + arch/x86/kernel/acpi/Makefile | 2 +- arch/x86/kernel/acpi/madt_playdead.S | 28 ++++++ arch/x86/kernel/acpi/madt_wakeup.c | 184 ++++++++++++++++++++++++++++++++++- include/acpi/actbl2.h | 15 ++- 5 files changed, 227 insertions(+), 4 deletions(-) create mode 100644 arch/x86/kernel/acpi/madt_playdead.S (limited to 'include') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index ceacac2b335d..21bc53f5ed0c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -83,6 +83,8 @@ union acpi_subtable_headers; int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, const unsigned long end); +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa); + /* * Check if the CPU can handle C2 and deeper */ diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 2feba7257665..842a5f449404 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o -obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o +obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o madt_playdead.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/madt_playdead.S b/arch/x86/kernel/acpi/madt_playdead.S new file mode 100644 index 000000000000..4e498d28cdc8 --- /dev/null +++ b/arch/x86/kernel/acpi/madt_playdead.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include +#include +#include + + .text + .align PAGE_SIZE + +/* + * asm_acpi_mp_play_dead() - Hand over control of the CPU to the BIOS + * + * rdi: Address of the ACPI MADT MPWK ResetVector + * rsi: PGD of the identity mapping + */ +SYM_FUNC_START(asm_acpi_mp_play_dead) + /* Turn off global entries. Following CR3 write will flush them. */ + movq %cr4, %rdx + andq $~(X86_CR4_PGE), %rdx + movq %rdx, %cr4 + + /* Switch to identity mapping */ + movq %rsi, %cr3 + + /* Jump to reset vector */ + ANNOTATE_RETPOLINE_SAFE + jmp *%rdi +SYM_FUNC_END(asm_acpi_mp_play_dead) diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 30820f9de5af..6cfe762be28b 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -1,10 +1,19 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include #include +#include #include +#include +#include +#include +#include #include #include +#include +#include +#include #include +#include /* Physical address of the Multiprocessor Wakeup Structure mailbox */ static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; @@ -12,6 +21,154 @@ static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; /* Virtual address of the Multiprocessor Wakeup Structure mailbox */ static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; +static u64 acpi_mp_pgd __ro_after_init; +static u64 acpi_mp_reset_vector_paddr __ro_after_init; + +static void acpi_mp_stop_this_cpu(void) +{ + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); +} + +static void acpi_mp_play_dead(void) +{ + play_dead_common(); + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); +} + +static void acpi_mp_cpu_die(unsigned int cpu) +{ + u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); + unsigned long timeout; + + /* + * Use TEST mailbox command to prove that BIOS got control over + * the CPU before declaring it dead. + * + * BIOS has to clear 'command' field of the mailbox. + */ + acpi_mp_wake_mailbox->apic_id = apicid; + smp_store_release(&acpi_mp_wake_mailbox->command, + ACPI_MP_WAKE_COMMAND_TEST); + + /* Don't wait longer than a second. */ + timeout = USEC_PER_SEC; + while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout) + udelay(1); + + if (!timeout) + pr_err("Failed to hand over CPU %d to BIOS\n", cpu); +} + +/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */ +static void __init *alloc_pgt_page(void *dummy) +{ + return memblock_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static void __init free_pgt_page(void *pgt, void *dummy) +{ + return memblock_free(pgt, PAGE_SIZE); +} + +/* + * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at + * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches + * to the identity mapping and the function has be present at the same spot in + * the virtual address space before and after switching page tables. + */ +static int __init init_transition_pgtable(pgd_t *pgd) +{ + pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; + unsigned long vaddr, paddr; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + vaddr = (unsigned long)asm_acpi_mp_play_dead; + pgd += pgd_index(vaddr); + if (!pgd_present(*pgd)) { + p4d = (p4d_t *)alloc_pgt_page(NULL); + if (!p4d) + return -ENOMEM; + set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); + } + p4d = p4d_offset(pgd, vaddr); + if (!p4d_present(*p4d)) { + pud = (pud_t *)alloc_pgt_page(NULL); + if (!pud) + return -ENOMEM; + set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); + } + pud = pud_offset(p4d, vaddr); + if (!pud_present(*pud)) { + pmd = (pmd_t *)alloc_pgt_page(NULL); + if (!pmd) + return -ENOMEM; + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); + } + pmd = pmd_offset(pud, vaddr); + if (!pmd_present(*pmd)) { + pte = (pte_t *)alloc_pgt_page(NULL); + if (!pte) + return -ENOMEM; + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); + } + pte = pte_offset_kernel(pmd, vaddr); + + paddr = __pa(vaddr); + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); + + return 0; +} + +static int __init acpi_mp_setup_reset(u64 reset_vector) +{ + struct x86_mapping_info info = { + .alloc_pgt_page = alloc_pgt_page, + .free_pgt_page = free_pgt_page, + .page_flag = __PAGE_KERNEL_LARGE_EXEC, + .kernpg_flag = _KERNPG_TABLE_NOENC, + }; + pgd_t *pgd; + + pgd = alloc_pgt_page(NULL); + if (!pgd) + return -ENOMEM; + + for (int i = 0; i < nr_pfn_mapped; i++) { + unsigned long mstart, mend; + + mstart = pfn_mapped[i].start << PAGE_SHIFT; + mend = pfn_mapped[i].end << PAGE_SHIFT; + if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + } + + if (kernel_ident_mapping_init(&info, pgd, + PAGE_ALIGN_DOWN(reset_vector), + PAGE_ALIGN(reset_vector + 1))) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + + if (init_transition_pgtable(pgd)) { + kernel_ident_mapping_free(&info, pgd); + return -ENOMEM; + } + + smp_ops.play_dead = acpi_mp_play_dead; + smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu; + smp_ops.cpu_die = acpi_mp_cpu_die; + + acpi_mp_reset_vector_paddr = reset_vector; + acpi_mp_pgd = __pa(pgd); + + return 0; +} + static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip) { if (!acpi_mp_wake_mailbox_paddr) { @@ -97,14 +254,37 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header, struct acpi_madt_multiproc_wakeup *mp_wake; mp_wake = (struct acpi_madt_multiproc_wakeup *)header; - if (BAD_MADT_ENTRY(mp_wake, end)) + + /* + * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake + * entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger + * than the actual size of the MP wakeup entry in ACPI table because the + * 'reset_vector' is only available in the V1 MP wakeup structure. + */ + if (!mp_wake) + return -EINVAL; + if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0) + return -EINVAL; + if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0) return -EINVAL; acpi_table_print_madt_entry(&header->common); acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address; - acpi_mp_disable_offlining(mp_wake); + if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 && + mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) { + if (acpi_mp_setup_reset(mp_wake->reset_vector)) { + pr_warn("Failed to setup MADT reset vector\n"); + acpi_mp_disable_offlining(mp_wake); + } + } else { + /* + * CPU offlining requires version 1 of the ACPI MADT wakeup + * structure. + */ + acpi_mp_disable_offlining(mp_wake); + } apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu); diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index fa63362469aa..e27958ef8264 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -1197,8 +1197,20 @@ struct acpi_madt_multiproc_wakeup { u16 version; u32 reserved; /* reserved - must be zero */ u64 mailbox_address; + u64 reset_vector; }; +/* Values for Version field above */ + +enum acpi_madt_multiproc_wakeup_version { + ACPI_MADT_MP_WAKEUP_VERSION_NONE = 0, + ACPI_MADT_MP_WAKEUP_VERSION_V1 = 1, + ACPI_MADT_MP_WAKEUP_VERSION_RESERVED = 2, /* 2 and greater are reserved */ +}; + +#define ACPI_MADT_MP_WAKEUP_SIZE_V0 16 +#define ACPI_MADT_MP_WAKEUP_SIZE_V1 24 + #define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE 2032 #define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE 2048 @@ -1211,7 +1223,8 @@ struct acpi_madt_multiproc_wakeup_mailbox { u8 reserved_firmware[ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE]; /* reserved for firmware use */ }; -#define ACPI_MP_WAKE_COMMAND_WAKEUP 1 +#define ACPI_MP_WAKE_COMMAND_WAKEUP 1 +#define ACPI_MP_WAKE_COMMAND_TEST 2 /* 17: CPU Core Interrupt Controller (ACPI 6.5) */ -- cgit v1.2.3 From d4a7d067e061c95c6387cf537258082074a4d299 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 14:57:34 +0200 Subject: ASoC: soc-dai.h: Constify DAI ops auto_selectable_formats The core ASoC code does not modify contents of the 'auto_selectable_formats' array passed in 'struct snd_soc_dai_ops', so make it const for code safety. Reviewed-by: Herve Codina Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617125735.582963-1-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 15ef268c9845..279223c4ef5e 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -361,7 +361,7 @@ struct snd_soc_dai_ops { * see * snd_soc_dai_get_fmt() */ - u64 *auto_selectable_formats; + const u64 *auto_selectable_formats; int num_auto_selectable_formats; /* probe ordering - for components with runtime dependencies */ -- cgit v1.2.3 From 614dc0fb76327dbd81abd4612fbc2e4ba8f205e6 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:52 -0500 Subject: sev-guest: configfs-tsm: Allow the privlevel_floor attribute to be updated With the introduction of an SVSM, Linux will be running at a non-zero VMPL. Any request for an attestation report at a higher privilege VMPL than what Linux is currently running will result in an error. Allow for the privlevel_floor attribute to be updated dynamically. [ bp: Trim commit message. ] Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/5a736be9384aebd98a0b7c929660f8a97cbdc366.1717600736.git.thomas.lendacky@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 5 ++++- include/linux/tsm.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 4597042f31e4..3560b3a8bb4d 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -892,7 +892,7 @@ static int sev_report_new(struct tsm_report *report, void *data) return 0; } -static const struct tsm_ops sev_tsm_ops = { +static struct tsm_ops sev_tsm_ops = { .name = KBUILD_MODNAME, .report_new = sev_report_new, }; @@ -979,6 +979,9 @@ static int __init sev_guest_probe(struct platform_device *pdev) snp_dev->input.resp_gpa = __pa(snp_dev->response); snp_dev->input.data_gpa = __pa(snp_dev->certs_data); + /* Set the privlevel_floor attribute based on the vmpck_id */ + sev_tsm_ops.privlevel_floor = vmpck_id; + ret = tsm_register(&sev_tsm_ops, snp_dev, &tsm_report_extra_type); if (ret) goto e_free_cert_data; diff --git a/include/linux/tsm.h b/include/linux/tsm.h index de8324a2223c..50c5769657d8 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -54,7 +54,7 @@ struct tsm_report { */ struct tsm_ops { const char *name; - const unsigned int privlevel_floor; + unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); }; -- cgit v1.2.3 From 0e6a35b93745bb2d8b921fd0520ef730489d41a2 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:53 -0500 Subject: fs/configfs: Add a callback to determine attribute visibility In order to support dynamic decisions as to whether an attribute should be created, add a callback that returns a bool to indicate whether the attribute should be displayed. If no callback is registered, the attribute is displayed by default. Co-developed-by: Dan Williams Signed-off-by: Dan Williams Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/e555c8740a263fab9f83b2cbb44da1af49a2813c.1717600736.git.thomas.lendacky@amd.com --- fs/configfs/dir.c | 10 ++++++++++ include/linux/configfs.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 18677cd4e62f..43d6bde1adcc 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -580,6 +580,7 @@ static void detach_attrs(struct config_item * item) static int populate_attrs(struct config_item *item) { const struct config_item_type *t = item->ci_type; + struct configfs_group_operations *ops; struct configfs_attribute *attr; struct configfs_bin_attribute *bin_attr; int error = 0; @@ -587,14 +588,23 @@ static int populate_attrs(struct config_item *item) if (!t) return -EINVAL; + + ops = t->ct_group_ops; + if (t->ct_attrs) { for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) { + if (ops && ops->is_visible && !ops->is_visible(item, attr, i)) + continue; + if ((error = configfs_create_file(item, attr))) break; } } if (t->ct_bin_attrs) { for (i = 0; (bin_attr = t->ct_bin_attrs[i]) != NULL; i++) { + if (ops && ops->is_bin_visible && !ops->is_bin_visible(item, bin_attr, i)) + continue; + error = configfs_create_bin_file(item, bin_attr); if (error) break; diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 2606711adb18..c771e9d0d0b9 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -216,6 +216,9 @@ struct configfs_group_operations { struct config_group *(*make_group)(struct config_group *group, const char *name); void (*disconnect_notify)(struct config_group *group, struct config_item *item); void (*drop_item)(struct config_group *group, struct config_item *item); + bool (*is_visible)(struct config_item *item, struct configfs_attribute *attr, int n); + bool (*is_bin_visible)(struct config_item *item, struct configfs_bin_attribute *attr, + int n); }; struct configfs_subsystem { -- cgit v1.2.3 From 20dfee95936413708701eb151f419597fdd9d948 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:54 -0500 Subject: x86/sev: Take advantage of configfs visibility support in TSM The TSM attestation report support provides multiple configfs attribute types (both for standard and binary attributes) to allow for additional attributes to be displayed for SNP as compared to TDX. With the ability to hide attributes via configfs, consolidate the multiple attribute groups into a single standard attribute group and a single binary attribute group. Modify the TDX support to hide the attributes that were previously "hidden" as a result of registering the selective attribute groups. Co-developed-by: Dan Williams Signed-off-by: Dan Williams Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Kuppuswamy Sathyanarayanan Link: https://lore.kernel.org/r/8873c45d0c8abc35aaf01d7833a55788a6905727.1717600736.git.thomas.lendacky@amd.com --- drivers/virt/coco/sev-guest/sev-guest.c | 3 +- drivers/virt/coco/tdx-guest/tdx-guest.c | 26 +++++++++- drivers/virt/coco/tsm.c | 86 ++++++++++++++++----------------- include/linux/tsm.h | 38 ++++++++++++--- 4 files changed, 100 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 3560b3a8bb4d..0c70a38c8cc0 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -982,7 +983,7 @@ static int __init sev_guest_probe(struct platform_device *pdev) /* Set the privlevel_floor attribute based on the vmpck_id */ sev_tsm_ops.privlevel_floor = vmpck_id; - ret = tsm_register(&sev_tsm_ops, snp_dev, &tsm_report_extra_type); + ret = tsm_register(&sev_tsm_ops, snp_dev); if (ret) goto e_free_cert_data; diff --git a/drivers/virt/coco/tdx-guest/tdx-guest.c b/drivers/virt/coco/tdx-guest/tdx-guest.c index 1253bf76b570..2acba56ad42e 100644 --- a/drivers/virt/coco/tdx-guest/tdx-guest.c +++ b/drivers/virt/coco/tdx-guest/tdx-guest.c @@ -249,6 +249,28 @@ done: return ret; } +static bool tdx_report_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_GENERATION: + case TSM_REPORT_PROVIDER: + return true; + } + + return false; +} + +static bool tdx_report_bin_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_INBLOB: + case TSM_REPORT_OUTBLOB: + return true; + } + + return false; +} + static long tdx_guest_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -281,6 +303,8 @@ MODULE_DEVICE_TABLE(x86cpu, tdx_guest_ids); static const struct tsm_ops tdx_tsm_ops = { .name = KBUILD_MODNAME, .report_new = tdx_report_new, + .report_attr_visible = tdx_report_attr_visible, + .report_bin_attr_visible = tdx_report_bin_attr_visible, }; static int __init tdx_guest_init(void) @@ -301,7 +325,7 @@ static int __init tdx_guest_init(void) goto free_misc; } - ret = tsm_register(&tdx_tsm_ops, NULL, NULL); + ret = tsm_register(&tdx_tsm_ops, NULL); if (ret) goto free_quote; diff --git a/drivers/virt/coco/tsm.c b/drivers/virt/coco/tsm.c index d1c2db83a8ca..7db534b63c9f 100644 --- a/drivers/virt/coco/tsm.c +++ b/drivers/virt/coco/tsm.c @@ -14,7 +14,6 @@ static struct tsm_provider { const struct tsm_ops *ops; - const struct config_item_type *type; void *data; } provider; static DECLARE_RWSEM(tsm_rwsem); @@ -252,34 +251,18 @@ static ssize_t tsm_report_auxblob_read(struct config_item *cfg, void *buf, } CONFIGFS_BIN_ATTR_RO(tsm_report_, auxblob, NULL, TSM_OUTBLOB_MAX); -#define TSM_DEFAULT_ATTRS() \ - &tsm_report_attr_generation, \ - &tsm_report_attr_provider - static struct configfs_attribute *tsm_report_attrs[] = { - TSM_DEFAULT_ATTRS(), - NULL, -}; - -static struct configfs_attribute *tsm_report_extra_attrs[] = { - TSM_DEFAULT_ATTRS(), - &tsm_report_attr_privlevel, - &tsm_report_attr_privlevel_floor, + [TSM_REPORT_GENERATION] = &tsm_report_attr_generation, + [TSM_REPORT_PROVIDER] = &tsm_report_attr_provider, + [TSM_REPORT_PRIVLEVEL] = &tsm_report_attr_privlevel, + [TSM_REPORT_PRIVLEVEL_FLOOR] = &tsm_report_attr_privlevel_floor, NULL, }; -#define TSM_DEFAULT_BIN_ATTRS() \ - &tsm_report_attr_inblob, \ - &tsm_report_attr_outblob - static struct configfs_bin_attribute *tsm_report_bin_attrs[] = { - TSM_DEFAULT_BIN_ATTRS(), - NULL, -}; - -static struct configfs_bin_attribute *tsm_report_bin_extra_attrs[] = { - TSM_DEFAULT_BIN_ATTRS(), - &tsm_report_attr_auxblob, + [TSM_REPORT_INBLOB] = &tsm_report_attr_inblob, + [TSM_REPORT_OUTBLOB] = &tsm_report_attr_outblob, + [TSM_REPORT_AUXBLOB] = &tsm_report_attr_auxblob, NULL, }; @@ -297,21 +280,44 @@ static struct configfs_item_operations tsm_report_item_ops = { .release = tsm_report_item_release, }; -const struct config_item_type tsm_report_default_type = { - .ct_owner = THIS_MODULE, - .ct_bin_attrs = tsm_report_bin_attrs, - .ct_attrs = tsm_report_attrs, - .ct_item_ops = &tsm_report_item_ops, +static bool tsm_report_is_visible(struct config_item *item, + struct configfs_attribute *attr, int n) +{ + guard(rwsem_read)(&tsm_rwsem); + if (!provider.ops) + return false; + + if (!provider.ops->report_attr_visible) + return true; + + return provider.ops->report_attr_visible(n); +} + +static bool tsm_report_is_bin_visible(struct config_item *item, + struct configfs_bin_attribute *attr, int n) +{ + guard(rwsem_read)(&tsm_rwsem); + if (!provider.ops) + return false; + + if (!provider.ops->report_bin_attr_visible) + return true; + + return provider.ops->report_bin_attr_visible(n); +} + +static struct configfs_group_operations tsm_report_attr_group_ops = { + .is_visible = tsm_report_is_visible, + .is_bin_visible = tsm_report_is_bin_visible, }; -EXPORT_SYMBOL_GPL(tsm_report_default_type); -const struct config_item_type tsm_report_extra_type = { +static const struct config_item_type tsm_report_type = { .ct_owner = THIS_MODULE, - .ct_bin_attrs = tsm_report_bin_extra_attrs, - .ct_attrs = tsm_report_extra_attrs, + .ct_bin_attrs = tsm_report_bin_attrs, + .ct_attrs = tsm_report_attrs, .ct_item_ops = &tsm_report_item_ops, + .ct_group_ops = &tsm_report_attr_group_ops, }; -EXPORT_SYMBOL_GPL(tsm_report_extra_type); static struct config_item *tsm_report_make_item(struct config_group *group, const char *name) @@ -326,7 +332,7 @@ static struct config_item *tsm_report_make_item(struct config_group *group, if (!state) return ERR_PTR(-ENOMEM); - config_item_init_type_name(&state->cfg, name, provider.type); + config_item_init_type_name(&state->cfg, name, &tsm_report_type); return &state->cfg; } @@ -353,16 +359,10 @@ static struct configfs_subsystem tsm_configfs = { .su_mutex = __MUTEX_INITIALIZER(tsm_configfs.su_mutex), }; -int tsm_register(const struct tsm_ops *ops, void *priv, - const struct config_item_type *type) +int tsm_register(const struct tsm_ops *ops, void *priv) { const struct tsm_ops *conflict; - if (!type) - type = &tsm_report_default_type; - if (!(type == &tsm_report_default_type || type == &tsm_report_extra_type)) - return -EINVAL; - guard(rwsem_write)(&tsm_rwsem); conflict = provider.ops; if (conflict) { @@ -372,7 +372,6 @@ int tsm_register(const struct tsm_ops *ops, void *priv, provider.ops = ops; provider.data = priv; - provider.type = type; return 0; } EXPORT_SYMBOL_GPL(tsm_register); @@ -384,7 +383,6 @@ int tsm_unregister(const struct tsm_ops *ops) return -EBUSY; provider.ops = NULL; provider.data = NULL; - provider.type = NULL; return 0; } EXPORT_SYMBOL_GPL(tsm_unregister); diff --git a/include/linux/tsm.h b/include/linux/tsm.h index 50c5769657d8..30d9d270b446 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -42,12 +42,40 @@ struct tsm_report { u8 *auxblob; }; +/** + * enum tsm_attr_index - index used to reference report attributes + * @TSM_REPORT_GENERATION: index of the report generation number attribute + * @TSM_REPORT_PROVIDER: index of the provider name attribute + * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute + * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute + */ +enum tsm_attr_index { + TSM_REPORT_GENERATION, + TSM_REPORT_PROVIDER, + TSM_REPORT_PRIVLEVEL, + TSM_REPORT_PRIVLEVEL_FLOOR, +}; + +/** + * enum tsm_bin_attr_index - index used to reference binary report attributes + * @TSM_REPORT_INBLOB: index of the binary report input attribute + * @TSM_REPORT_OUTBLOB: index of the binary report output attribute + * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute + */ +enum tsm_bin_attr_index { + TSM_REPORT_INBLOB, + TSM_REPORT_OUTBLOB, + TSM_REPORT_AUXBLOB, +}; + /** * struct tsm_ops - attributes and operations for tsm instances * @name: tsm id reflected in /sys/kernel/config/tsm/report/$report/provider * @privlevel_floor: convey base privlevel for nested scenarios * @report_new: Populate @report with the report blob and auxblob * (optional), return 0 on successful population, or -errno otherwise + * @report_attr_visible: show or hide a report attribute entry + * @report_bin_attr_visible: show or hide a report binary attribute entry * * Implementation specific ops, only one is expected to be registered at * a time i.e. only one of "sev-guest", "tdx-guest", etc. @@ -56,14 +84,10 @@ struct tsm_ops { const char *name; unsigned int privlevel_floor; int (*report_new)(struct tsm_report *report, void *data); + bool (*report_attr_visible)(int n); + bool (*report_bin_attr_visible)(int n); }; -extern const struct config_item_type tsm_report_default_type; - -/* publish @privlevel, @privlevel_floor, and @auxblob attributes */ -extern const struct config_item_type tsm_report_extra_type; - -int tsm_register(const struct tsm_ops *ops, void *priv, - const struct config_item_type *type); +int tsm_register(const struct tsm_ops *ops, void *priv); int tsm_unregister(const struct tsm_ops *ops); #endif /* __TSM_H */ -- cgit v1.2.3 From 627dc671518b7f004ce04c45e8711f8dca94a57c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 5 Jun 2024 10:18:55 -0500 Subject: x86/sev: Extend the config-fs attestation support for an SVSM When an SVSM is present, the guest can also request attestation reports from it. These SVSM attestation reports can be used to attest the SVSM and any services running within the SVSM. Extend the config-fs attestation support to provide such. This involves creating four new config-fs attributes: - 'service-provider' (input) This attribute is used to determine whether the attestation request should be sent to the specified service provider or to the SEV firmware. The SVSM service provider is represented by the value 'svsm'. - 'service_guid' (input) Used for requesting the attestation of a single service within the service provider. A null GUID implies that the SVSM_ATTEST_SERVICES call should be used to request the attestation report. A non-null GUID implies that the SVSM_ATTEST_SINGLE_SERVICE call should be used. - 'service_manifest_version' (input) Used with the SVSM_ATTEST_SINGLE_SERVICE call, the service version represents a specific service manifest version be used for the attestation report. - 'manifestblob' (output) Used to return the service manifest associated with the attestation report. Only display these new attributes when running under an SVSM. [ bp: Massage. - s/svsm_attestation_call/svsm_attest_call/g ] Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/965015dce3c76bb8724839d50c5dea4e4b5d598f.1717600736.git.thomas.lendacky@amd.com --- Documentation/ABI/testing/configfs-tsm | 64 ++++++++++++ arch/x86/include/asm/sev.h | 31 +++++- arch/x86/kernel/sev.c | 50 +++++++++ drivers/virt/coco/sev-guest/sev-guest.c | 179 ++++++++++++++++++++++++++++++++ drivers/virt/coco/tsm.c | 93 ++++++++++++++++- include/linux/tsm.h | 19 ++++ 6 files changed, 433 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/configfs-tsm b/Documentation/ABI/testing/configfs-tsm index dd24202b5ba5..1db2008f25f9 100644 --- a/Documentation/ABI/testing/configfs-tsm +++ b/Documentation/ABI/testing/configfs-tsm @@ -31,6 +31,18 @@ Description: Standardization v2.03 Section 4.1.8.1 MSG_REPORT_REQ. https://www.amd.com/content/dam/amd/en/documents/epyc-technical-docs/specifications/56421.pdf +What: /sys/kernel/config/tsm/report/$name/manifestblob +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (RO) Optional supplemental data that a TSM may emit, visibility + of this attribute depends on TSM, and may be empty if no + manifest data is available. + + See 'service_provider' for information on the format of the + manifest blob. + What: /sys/kernel/config/tsm/report/$name/provider Date: September, 2023 KernelVersion: v6.7 @@ -80,3 +92,55 @@ Contact: linux-coco@lists.linux.dev Description: (RO) Indicates the minimum permissible value that can be written to @privlevel. + +What: /sys/kernel/config/tsm/report/$name/service_provider +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports from a service + provider for TVMs, like SEV-SNP running under an SVSM. + Specifying the service provider via this attribute will create + an attestation report as specified by the service provider. + Currently supported service-providers are: + svsm + + For the "svsm" service provider, see the Secure VM Service Module + for SEV-SNP Guests v1.00 Section 7. For the doc, search for + "site:amd.com "Secure VM Service Module for SEV-SNP + Guests", docID: 58019" + +What: /sys/kernel/config/tsm/report/$name/service_guid +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports from a service + provider for TVMs, like SEV-SNP running under an SVSM. + Specifying an empty/null GUID (00000000-0000-0000-0000-000000) + requests all active services within the service provider be + part of the attestation report. Specifying a GUID request + an attestation report of just the specified service using the + manifest form specified by the service_manifest_version + attribute. + + See 'service_provider' for information on the format of the + service guid. + +What: /sys/kernel/config/tsm/report/$name/service_manifest_version +Date: January, 2024 +KernelVersion: v6.10 +Contact: linux-coco@lists.linux.dev +Description: + (WO) Attribute is visible if a TSM implementation provider + supports the concept of attestation reports from a service + provider for TVMs, like SEV-SNP running under an SVSM. + Indicates the service manifest version requested for the + attestation report (default 0). If this field is not set by + the user, the default manifest version of the service (the + service's initial/first manifest version) is returned. + + See 'service_provider' for information on the format of the + service manifest version. diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9c6f269d4bf5..ac5886ce252e 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -213,6 +213,27 @@ struct svsm_pvalidate_call { offsetof(struct svsm_pvalidate_call, entry)) / \ sizeof(struct svsm_pvalidate_entry)) +/* + * The SVSM Attestation related structures + */ +struct svsm_loc_entry { + u64 pa; + u32 len; + u8 rsvd[4]; +}; + +struct svsm_attest_call { + struct svsm_loc_entry report_buf; + struct svsm_loc_entry nonce; + struct svsm_loc_entry manifest_buf; + struct svsm_loc_entry certificates_buf; + + /* For attesting a single service */ + u8 service_guid[16]; + u32 service_manifest_ver; + u8 rsvd[4]; +}; + /* * SVSM protocol structure */ @@ -236,6 +257,10 @@ struct svsm_call { #define SVSM_CORE_CREATE_VCPU 2 #define SVSM_CORE_DELETE_VCPU 3 +#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x)) +#define SVSM_ATTEST_SERVICES 0 +#define SVSM_ATTEST_SINGLE_SERVICE 1 + #ifdef CONFIG_AMD_MEM_ENCRYPT extern u8 snp_vmpl; @@ -317,6 +342,7 @@ bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); @@ -349,7 +375,10 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in { return -ENOTTY; } - +static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input) +{ + return -ENOTTY; +} static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4dc7ae33eac9..53ac3e0fc92a 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -2387,6 +2387,56 @@ static int __init init_sev_config(char *str) } __setup("sev=", init_sev_config); +static void update_attest_input(struct svsm_call *call, struct svsm_attest_call *input) +{ + /* If (new) lengths have been returned, propagate them up */ + if (call->rcx_out != call->rcx) + input->manifest_buf.len = call->rcx_out; + + if (call->rdx_out != call->rdx) + input->certificates_buf.len = call->rdx_out; + + if (call->r8_out != call->r8) + input->report_buf.len = call->r8_out; +} + +int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, + struct svsm_attest_call *input) +{ + struct svsm_attest_call *ac; + unsigned long flags; + u64 attest_call_pa; + int ret; + + if (!snp_vmpl) + return -EINVAL; + + local_irq_save(flags); + + call->caa = svsm_get_caa(); + + ac = (struct svsm_attest_call *)call->caa->svsm_buffer; + attest_call_pa = svsm_get_caa_pa() + offsetof(struct svsm_ca, svsm_buffer); + + *ac = *input; + + /* + * Set input registers for the request and set RDX and R8 to known + * values in order to detect length values being returned in them. + */ + call->rax = call_id; + call->rcx = attest_call_pa; + call->rdx = -1; + call->r8 = -1; + ret = svsm_perform_call_protocol(call); + update_attest_input(call, input); + + local_irq_restore(flags); + + return ret; +} +EXPORT_SYMBOL_GPL(snp_issue_svsm_attest_req); + int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { struct ghcb_state state; diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c index 0c70a38c8cc0..37522886ae95 100644 --- a/drivers/virt/coco/sev-guest/sev-guest.c +++ b/drivers/virt/coco/sev-guest/sev-guest.c @@ -39,6 +39,8 @@ #define SNP_REQ_MAX_RETRY_DURATION (60*HZ) #define SNP_REQ_RETRY_DELAY (2*HZ) +#define SVSM_MAX_RETRIES 3 + struct snp_guest_crypto { struct crypto_aead *tfm; u8 *iv, *authtag; @@ -791,6 +793,143 @@ struct snp_msg_cert_entry { u32 length; }; +static int sev_svsm_report_new(struct tsm_report *report, void *data) +{ + unsigned int rep_len, man_len, certs_len; + struct tsm_desc *desc = &report->desc; + struct svsm_attest_call ac = {}; + unsigned int retry_count; + void *rep, *man, *certs; + struct svsm_call call; + unsigned int size; + bool try_again; + void *buffer; + u64 call_id; + int ret; + + /* + * Allocate pages for the request: + * - Report blob (4K) + * - Manifest blob (4K) + * - Certificate blob (16K) + * + * Above addresses must be 4K aligned + */ + rep_len = SZ_4K; + man_len = SZ_4K; + certs_len = SEV_FW_BLOB_MAX_SIZE; + + guard(mutex)(&snp_cmd_mutex); + + if (guid_is_null(&desc->service_guid)) { + call_id = SVSM_ATTEST_CALL(SVSM_ATTEST_SERVICES); + } else { + export_guid(ac.service_guid, &desc->service_guid); + ac.service_manifest_ver = desc->service_manifest_version; + + call_id = SVSM_ATTEST_CALL(SVSM_ATTEST_SINGLE_SERVICE); + } + + retry_count = 0; + +retry: + memset(&call, 0, sizeof(call)); + + size = rep_len + man_len + certs_len; + buffer = alloc_pages_exact(size, __GFP_ZERO); + if (!buffer) + return -ENOMEM; + + rep = buffer; + ac.report_buf.pa = __pa(rep); + ac.report_buf.len = rep_len; + + man = rep + rep_len; + ac.manifest_buf.pa = __pa(man); + ac.manifest_buf.len = man_len; + + certs = man + man_len; + ac.certificates_buf.pa = __pa(certs); + ac.certificates_buf.len = certs_len; + + ac.nonce.pa = __pa(desc->inblob); + ac.nonce.len = desc->inblob_len; + + ret = snp_issue_svsm_attest_req(call_id, &call, &ac); + if (ret) { + free_pages_exact(buffer, size); + + switch (call.rax_out) { + case SVSM_ERR_INVALID_PARAMETER: + try_again = false; + + if (ac.report_buf.len > rep_len) { + rep_len = PAGE_ALIGN(ac.report_buf.len); + try_again = true; + } + + if (ac.manifest_buf.len > man_len) { + man_len = PAGE_ALIGN(ac.manifest_buf.len); + try_again = true; + } + + if (ac.certificates_buf.len > certs_len) { + certs_len = PAGE_ALIGN(ac.certificates_buf.len); + try_again = true; + } + + /* If one of the buffers wasn't large enough, retry the request */ + if (try_again && retry_count < SVSM_MAX_RETRIES) { + retry_count++; + goto retry; + } + + return -EINVAL; + default: + pr_err_ratelimited("SVSM attestation request failed (%d / 0x%llx)\n", + ret, call.rax_out); + return -EINVAL; + } + } + + /* + * Allocate all the blob memory buffers at once so that the cleanup is + * done for errors that occur after the first allocation (i.e. before + * using no_free_ptr()). + */ + rep_len = ac.report_buf.len; + void *rbuf __free(kvfree) = kvzalloc(rep_len, GFP_KERNEL); + + man_len = ac.manifest_buf.len; + void *mbuf __free(kvfree) = kvzalloc(man_len, GFP_KERNEL); + + certs_len = ac.certificates_buf.len; + void *cbuf __free(kvfree) = certs_len ? kvzalloc(certs_len, GFP_KERNEL) : NULL; + + if (!rbuf || !mbuf || (certs_len && !cbuf)) { + free_pages_exact(buffer, size); + return -ENOMEM; + } + + memcpy(rbuf, rep, rep_len); + report->outblob = no_free_ptr(rbuf); + report->outblob_len = rep_len; + + memcpy(mbuf, man, man_len); + report->manifestblob = no_free_ptr(mbuf); + report->manifestblob_len = man_len; + + if (certs_len) { + memcpy(cbuf, certs, certs_len); + report->auxblob = no_free_ptr(cbuf); + report->auxblob_len = certs_len; + } + + free_pages_exact(buffer, size); + + return 0; +} + static int sev_report_new(struct tsm_report *report, void *data) { struct snp_msg_cert_entry *cert_table; @@ -805,6 +944,13 @@ static int sev_report_new(struct tsm_report *report, void *data) if (desc->inblob_len != SNP_REPORT_USER_DATA_SIZE) return -EINVAL; + if (desc->service_provider) { + if (strcmp(desc->service_provider, "svsm")) + return -EINVAL; + + return sev_svsm_report_new(report, data); + } + void *buf __free(kvfree) = kvzalloc(size, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -893,9 +1039,42 @@ static int sev_report_new(struct tsm_report *report, void *data) return 0; } +static bool sev_report_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_GENERATION: + case TSM_REPORT_PROVIDER: + case TSM_REPORT_PRIVLEVEL: + case TSM_REPORT_PRIVLEVEL_FLOOR: + return true; + case TSM_REPORT_SERVICE_PROVIDER: + case TSM_REPORT_SERVICE_GUID: + case TSM_REPORT_SERVICE_MANIFEST_VER: + return snp_vmpl; + } + + return false; +} + +static bool sev_report_bin_attr_visible(int n) +{ + switch (n) { + case TSM_REPORT_INBLOB: + case TSM_REPORT_OUTBLOB: + case TSM_REPORT_AUXBLOB: + return true; + case TSM_REPORT_MANIFESTBLOB: + return snp_vmpl; + } + + return false; +} + static struct tsm_ops sev_tsm_ops = { .name = KBUILD_MODNAME, .report_new = sev_report_new, + .report_attr_visible = sev_report_attr_visible, + .report_bin_attr_visible = sev_report_bin_attr_visible, }; static void unregister_sev_tsm(void *data) diff --git a/drivers/virt/coco/tsm.c b/drivers/virt/coco/tsm.c index 7db534b63c9f..9432d4e303f1 100644 --- a/drivers/virt/coco/tsm.c +++ b/drivers/virt/coco/tsm.c @@ -34,7 +34,7 @@ static DECLARE_RWSEM(tsm_rwsem); * The attestation report format is TSM provider specific, when / if a standard * materializes that can be published instead of the vendor layout. Until then * the 'provider' attribute indicates the format of 'outblob', and optionally - * 'auxblob'. + * 'auxblob' and 'manifestblob'. */ struct tsm_report_state { @@ -47,6 +47,7 @@ struct tsm_report_state { enum tsm_data_select { TSM_REPORT, TSM_CERTS, + TSM_MANIFEST, }; static struct tsm_report *to_tsm_report(struct config_item *cfg) @@ -118,6 +119,74 @@ static ssize_t tsm_report_privlevel_floor_show(struct config_item *cfg, } CONFIGFS_ATTR_RO(tsm_report_, privlevel_floor); +static ssize_t tsm_report_service_provider_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + size_t sp_len; + char *sp; + int rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + + sp_len = (buf[len - 1] != '\n') ? len : len - 1; + + sp = kstrndup(buf, sp_len, GFP_KERNEL); + if (!sp) + return -ENOMEM; + kfree(report->desc.service_provider); + + report->desc.service_provider = sp; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, service_provider); + +static ssize_t tsm_report_service_guid_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + int rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + + report->desc.service_guid = guid_null; + + rc = guid_parse(buf, &report->desc.service_guid); + if (rc) + return rc; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, service_guid); + +static ssize_t tsm_report_service_manifest_version_store(struct config_item *cfg, + const char *buf, size_t len) +{ + struct tsm_report *report = to_tsm_report(cfg); + unsigned int val; + int rc; + + rc = kstrtouint(buf, 0, &val); + if (rc) + return rc; + + guard(rwsem_write)(&tsm_rwsem); + rc = try_advance_write_generation(report); + if (rc) + return rc; + report->desc.service_manifest_version = val; + + return len; +} +CONFIGFS_ATTR_WO(tsm_report_, service_manifest_version); + static ssize_t tsm_report_inblob_write(struct config_item *cfg, const void *buf, size_t count) { @@ -162,6 +231,9 @@ static ssize_t __read_report(struct tsm_report *report, void *buf, size_t count, if (select == TSM_REPORT) { out = report->outblob; len = report->outblob_len; + } else if (select == TSM_MANIFEST) { + out = report->manifestblob; + len = report->manifestblob_len; } else { out = report->auxblob; len = report->auxblob_len; @@ -187,7 +259,7 @@ static ssize_t read_cached_report(struct tsm_report *report, void *buf, /* * A given TSM backend always fills in ->outblob regardless of - * whether the report includes an auxblob or not. + * whether the report includes an auxblob/manifestblob or not. */ if (!report->outblob || state->read_generation != state->write_generation) @@ -223,8 +295,10 @@ static ssize_t tsm_report_read(struct tsm_report *report, void *buf, kvfree(report->outblob); kvfree(report->auxblob); + kvfree(report->manifestblob); report->outblob = NULL; report->auxblob = NULL; + report->manifestblob = NULL; rc = ops->report_new(report, provider.data); if (rc < 0) return rc; @@ -251,11 +325,23 @@ static ssize_t tsm_report_auxblob_read(struct config_item *cfg, void *buf, } CONFIGFS_BIN_ATTR_RO(tsm_report_, auxblob, NULL, TSM_OUTBLOB_MAX); +static ssize_t tsm_report_manifestblob_read(struct config_item *cfg, void *buf, + size_t count) +{ + struct tsm_report *report = to_tsm_report(cfg); + + return tsm_report_read(report, buf, count, TSM_MANIFEST); +} +CONFIGFS_BIN_ATTR_RO(tsm_report_, manifestblob, NULL, TSM_OUTBLOB_MAX); + static struct configfs_attribute *tsm_report_attrs[] = { [TSM_REPORT_GENERATION] = &tsm_report_attr_generation, [TSM_REPORT_PROVIDER] = &tsm_report_attr_provider, [TSM_REPORT_PRIVLEVEL] = &tsm_report_attr_privlevel, [TSM_REPORT_PRIVLEVEL_FLOOR] = &tsm_report_attr_privlevel_floor, + [TSM_REPORT_SERVICE_PROVIDER] = &tsm_report_attr_service_provider, + [TSM_REPORT_SERVICE_GUID] = &tsm_report_attr_service_guid, + [TSM_REPORT_SERVICE_MANIFEST_VER] = &tsm_report_attr_service_manifest_version, NULL, }; @@ -263,6 +349,7 @@ static struct configfs_bin_attribute *tsm_report_bin_attrs[] = { [TSM_REPORT_INBLOB] = &tsm_report_attr_inblob, [TSM_REPORT_OUTBLOB] = &tsm_report_attr_outblob, [TSM_REPORT_AUXBLOB] = &tsm_report_attr_auxblob, + [TSM_REPORT_MANIFESTBLOB] = &tsm_report_attr_manifestblob, NULL, }; @@ -271,8 +358,10 @@ static void tsm_report_item_release(struct config_item *cfg) struct tsm_report *report = to_tsm_report(cfg); struct tsm_report_state *state = to_state(report); + kvfree(report->manifestblob); kvfree(report->auxblob); kvfree(report->outblob); + kfree(report->desc.service_provider); kfree(state); } diff --git a/include/linux/tsm.h b/include/linux/tsm.h index 30d9d270b446..11b0c525be30 100644 --- a/include/linux/tsm.h +++ b/include/linux/tsm.h @@ -4,6 +4,7 @@ #include #include +#include #define TSM_INBLOB_MAX 64 #define TSM_OUTBLOB_MAX SZ_32K @@ -19,11 +20,17 @@ * @privlevel: optional privilege level to associate with @outblob * @inblob_len: sizeof @inblob * @inblob: arbitrary input data + * @service_provider: optional name of where to obtain the tsm report blob + * @service_guid: optional service-provider service guid to attest + * @service_manifest_version: optional service-provider service manifest version requested */ struct tsm_desc { unsigned int privlevel; size_t inblob_len; u8 inblob[TSM_INBLOB_MAX]; + char *service_provider; + guid_t service_guid; + unsigned int service_manifest_version; }; /** @@ -33,6 +40,8 @@ struct tsm_desc { * @outblob: generated evidence to provider to the attestation agent * @auxblob_len: sizeof(@auxblob) * @auxblob: (optional) auxiliary data to the report (e.g. certificate data) + * @manifestblob_len: sizeof(@manifestblob) + * @manifestblob: (optional) manifest data associated with the report */ struct tsm_report { struct tsm_desc desc; @@ -40,6 +49,8 @@ struct tsm_report { u8 *outblob; size_t auxblob_len; u8 *auxblob; + size_t manifestblob_len; + u8 *manifestblob; }; /** @@ -48,12 +59,18 @@ struct tsm_report { * @TSM_REPORT_PROVIDER: index of the provider name attribute * @TSM_REPORT_PRIVLEVEL: index of the desired privilege level attribute * @TSM_REPORT_PRIVLEVEL_FLOOR: index of the minimum allowed privileg level attribute + * @TSM_REPORT_SERVICE_PROVIDER: index of the service provider identifier attribute + * @TSM_REPORT_SERVICE_GUID: index of the service GUID attribute + * @TSM_REPORT_SERVICE_MANIFEST_VER: index of the service manifest version attribute */ enum tsm_attr_index { TSM_REPORT_GENERATION, TSM_REPORT_PROVIDER, TSM_REPORT_PRIVLEVEL, TSM_REPORT_PRIVLEVEL_FLOOR, + TSM_REPORT_SERVICE_PROVIDER, + TSM_REPORT_SERVICE_GUID, + TSM_REPORT_SERVICE_MANIFEST_VER, }; /** @@ -61,11 +78,13 @@ enum tsm_attr_index { * @TSM_REPORT_INBLOB: index of the binary report input attribute * @TSM_REPORT_OUTBLOB: index of the binary report output attribute * @TSM_REPORT_AUXBLOB: index of the binary auxiliary data attribute + * @TSM_REPORT_MANIFESTBLOB: index of the binary manifest data attribute */ enum tsm_bin_attr_index { TSM_REPORT_INBLOB, TSM_REPORT_OUTBLOB, TSM_REPORT_AUXBLOB, + TSM_REPORT_MANIFESTBLOB, }; /** -- cgit v1.2.3 From 8cb2dbf94e44bcde4cff0223f2f900f8fb9083a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 17 Jun 2024 20:31:31 +0200 Subject: irqdomain: Make build work for CONFIG_GENERIC_IRQ_CHIP=n ld: kernel/irq/irqdomain.o: in function `irq_domain_instantiate': kernel/irq/irqdomain.c:296:(.text+0x10dd): undefined reference to `irq_domain_alloc_generic_chips' ld: kernel/irq/irqdomain.c:313:(.text+0x1218): undefined reference to `irq_domain_remove_generic_chips' ld: kernel/irq/irqdomain.o: in function `irq_domain_remove': kernel/irq/irqdomain.c:349:(.text+0x1ddf): undefined reference to `irq_domain_remove_generic_chips' Provide the required stubs. Fixes: e6f67ce32e8e ("irqdomain: Add support for generic irq chips creation before publishing a domain") Reported-by: Borislav Betkov Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/irq.h b/include/linux/irq.h index 712bcee56efc..1f5dbf1f92c9 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1182,9 +1182,19 @@ int devm_irq_setup_generic_chip(struct device *dev, struct irq_chip_generic *gc, struct irq_chip_generic *irq_get_domain_generic_chip(struct irq_domain *d, unsigned int hw_irq); +#ifdef CONFIG_GENERIC_IRQ_CHIP int irq_domain_alloc_generic_chips(struct irq_domain *d, const struct irq_domain_chip_generic_info *info); void irq_domain_remove_generic_chips(struct irq_domain *d); +#else +static inline int +irq_domain_alloc_generic_chips(struct irq_domain *d, + const struct irq_domain_chip_generic_info *info) +{ + return -EINVAL; +} +static inline void irq_domain_remove_generic_chips(struct irq_domain *d) { } +#endif /* CONFIG_GENERIC_IRQ_CHIP */ int __irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip, int num_ct, const char *name, -- cgit v1.2.3 From efb459303dd5dd6e198a0d58322dc04c3356dc23 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Wed, 12 Jun 2024 17:04:02 +0200 Subject: net: Move dev_set_hwtstamp_phylib to net/core/dev.h This declaration was added to the header to be called from ethtool. ethtool is separated from core for code organization but it is not really a separate entity, it controls very core things. As ethtool is an internal stuff it is not wise to have it in netdevice.h. Move the declaration to net/core/dev.h instead. Remove the EXPORT_SYMBOL_GPL call as ethtool can not be built as a module. Reviewed-by: Willem de Bruijn Signed-off-by: Kory Maincent Link: https://lore.kernel.org/r/20240612-feature_ptp_netnext-v15-2-b2a086257b63@bootlin.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 3 --- net/core/dev.h | 4 ++++ net/core/dev_ioctl.c | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 85111502cf8f..c83b390191d4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3904,9 +3904,6 @@ int generic_hwtstamp_get_lower(struct net_device *dev, int generic_hwtstamp_set_lower(struct net_device *dev, struct kernel_hwtstamp_config *kernel_cfg, struct netlink_ext_ack *extack); -int dev_set_hwtstamp_phylib(struct net_device *dev, - struct kernel_hwtstamp_config *cfg, - struct netlink_ext_ack *extack); int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata); unsigned int dev_get_flags(const struct net_device *); int __dev_change_flags(struct net_device *dev, unsigned int flags, diff --git a/net/core/dev.h b/net/core/dev.h index b7b518bc2be5..58f88d28bc99 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -166,4 +166,8 @@ static inline void dev_xmit_recursion_dec(void) __this_cpu_dec(softnet_data.xmit.recursion); } +int dev_set_hwtstamp_phylib(struct net_device *dev, + struct kernel_hwtstamp_config *cfg, + struct netlink_ext_ack *extack); + #endif diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 9a66cf5015f2..b9719ed3c3fd 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -363,7 +363,6 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, return 0; } -EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib); static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr) { -- cgit v1.2.3 From 41474d25bec56900e3a018907784b0abfe5a6a9e Mon Sep 17 00:00:00 2001 From: Nick Hollinghurst Date: Fri, 16 Feb 2024 18:48:55 +0000 Subject: drm: Add DRM_MODE_TV_MODE_MONOCHROME Add this as a value for enum_drm_connector_tv_mode, represented by the string "Mono", to generate video with no colour encoding or bursts. Define it to have no pedestal (since only NTSC-M calls for a pedestal). Change default mode creation to acommodate the new tv_mode value which comprises both 525-line and 625-line formats. Acked-by: Daniel Vetter Signed-off-by: Nick Hollinghurst Signed-off-by: Dave Stevenson Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240216184857.245372-2-dave.stevenson@raspberrypi.com --- drivers/gpu/drm/drm_connector.c | 7 +++++++ drivers/gpu/drm/drm_modes.c | 5 ++++- drivers/gpu/drm/drm_probe_helper.c | 5 +++-- include/drm/drm_connector.h | 7 +++++++ 4 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 3d73a981004c..ab6ab7ff7ea8 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1087,6 +1087,7 @@ static const struct drm_prop_enum_list drm_tv_mode_enum_list[] = { { DRM_MODE_TV_MODE_PAL_M, "PAL-M" }, { DRM_MODE_TV_MODE_PAL_N, "PAL-N" }, { DRM_MODE_TV_MODE_SECAM, "SECAM" }, + { DRM_MODE_TV_MODE_MONOCHROME, "Mono" }, }; DRM_ENUM_NAME_FN(drm_get_tv_mode_name, drm_tv_mode_enum_list) @@ -1858,6 +1859,12 @@ EXPORT_SYMBOL(drm_connector_attach_dp_subconnector_property); * TV Mode is CCIR System B (aka 625-lines) together with * the SECAM Color Encoding. * + * Mono: + * + * Use timings appropriate to the DRM mode, including + * equalizing pulses for a 525-line or 625-line mode, + * with no pedestal or color encoding. + * * Drivers can set up this property by calling * drm_mode_create_tv_properties(). */ diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 2d8b0371619d..1a0890083aee 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -531,7 +531,8 @@ static int fill_analog_mode(struct drm_device *dev, * @interlace: whether to compute an interlaced mode * * This function creates a struct drm_display_mode instance suited for - * an analog TV output, for one of the usual analog TV mode. + * an analog TV output, for one of the usual analog TV modes. Where + * this is DRM_MODE_TV_MODE_MONOCHROME, a 625-line mode will be created. * * Note that @hdisplay is larger than the usual constraints for the PAL * and NTSC timings, and we'll choose to ignore most timings constraints @@ -569,6 +570,8 @@ struct drm_display_mode *drm_analog_tv_mode(struct drm_device *dev, case DRM_MODE_TV_MODE_PAL_N: fallthrough; case DRM_MODE_TV_MODE_SECAM: + fallthrough; + case DRM_MODE_TV_MODE_MONOCHROME: analog = DRM_MODE_ANALOG_PAL; break; diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 249c8c2cb319..bb49d552e671 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -1259,8 +1259,9 @@ int drm_connector_helper_tv_get_modes(struct drm_connector *connector) for (i = 0; i < tv_mode_property->num_values; i++) supported_tv_modes |= BIT(tv_mode_property->values[i]); - if ((supported_tv_modes & ntsc_modes) && - (supported_tv_modes & pal_modes)) { + if (((supported_tv_modes & ntsc_modes) && + (supported_tv_modes & pal_modes)) || + (supported_tv_modes & BIT(DRM_MODE_TV_MODE_MONOCHROME))) { uint64_t default_mode; if (drm_object_property_get_default_value(&connector->base, diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index f750765d8fbc..c754651044d4 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -201,6 +201,13 @@ enum drm_connector_tv_mode { */ DRM_MODE_TV_MODE_SECAM, + /** + * @DRM_MODE_TV_MODE_MONOCHROME: Use timings appropriate to + * the DRM mode, including equalizing pulses for a 525-line + * or 625-line mode, with no pedestal or color encoding. + */ + DRM_MODE_TV_MODE_MONOCHROME, + /** * @DRM_MODE_TV_MODE_MAX: Number of analog TV output modes. * -- cgit v1.2.3 From 777b8afb8179155353ec14b1d8153122410aba29 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sat, 15 Jun 2024 20:00:27 +0800 Subject: net: phy: introduce core support for phy-mode = "10g-qxgmii" 10G-QXGMII is a MAC-to-PHY interface defined by the USXGMII multiport specification. It uses the same signaling as USXGMII, but it multiplexes 4 ports over the link, resulting in a maximum speed of 2.5G per port. Some in-tree SoCs like the NXP LS1028A use "usxgmii" when they mean either the single-port USXGMII or the quad-port 10G-QXGMII variant, and they could get away just fine with that thus far. But there is a need to distinguish between the 2 as far as SerDes drivers are concerned. Signed-off-by: Vladimir Oltean Signed-off-by: Luo Jie Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Paolo Abeni --- Documentation/networking/phy.rst | 6 ++++++ drivers/net/phy/phy-core.c | 1 + drivers/net/phy/phylink.c | 9 ++++++++- include/linux/phy.h | 4 ++++ include/linux/phylink.h | 1 + 5 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index 1283240d7620..f64641417c54 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -327,6 +327,12 @@ Some of the interface modes are described below: This is the Penta SGMII mode, it is similar to QSGMII but it combines 5 SGMII lines into a single link compared to 4 on QSGMII. +``PHY_INTERFACE_MODE_10G_QXGMII`` + Represents the 10G-QXGMII PHY-MAC interface as defined by the Cisco USXGMII + Multiport Copper Interface document. It supports 4 ports over a 10.3125 GHz + SerDes lane, each port having speeds of 2.5G / 1G / 100M / 10M achieved + through symbol replication. The PCS expects the standard USXGMII code word. + Pause frames / flow control =========================== diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 15f349e5995a..a235ea2264a7 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -141,6 +141,7 @@ int phy_interface_num_ports(phy_interface_t interface) return 1; case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_QUSGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: return 4; case PHY_INTERFACE_MODE_PSGMII: return 5; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 02427378acfd..6c24c48dcf0f 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -231,6 +231,7 @@ static int phylink_interface_max_speed(phy_interface_t interface) return SPEED_1000; case PHY_INTERFACE_MODE_2500BASEX: + case PHY_INTERFACE_MODE_10G_QXGMII: return SPEED_2500; case PHY_INTERFACE_MODE_5GBASER: @@ -500,7 +501,11 @@ static unsigned long phylink_get_capabilities(phy_interface_t interface, switch (interface) { case PHY_INTERFACE_MODE_USXGMII: - caps |= MAC_10000FD | MAC_5000FD | MAC_2500FD; + caps |= MAC_10000FD | MAC_5000FD; + fallthrough; + + case PHY_INTERFACE_MODE_10G_QXGMII: + caps |= MAC_2500FD; fallthrough; case PHY_INTERFACE_MODE_RGMII_TXID: @@ -926,6 +931,7 @@ static int phylink_parse_mode(struct phylink *pl, case PHY_INTERFACE_MODE_5GBASER: case PHY_INTERFACE_MODE_25GBASER: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: case PHY_INTERFACE_MODE_10GKR: case PHY_INTERFACE_MODE_10GBASER: case PHY_INTERFACE_MODE_XLGMII: @@ -1124,6 +1130,7 @@ static unsigned int phylink_pcs_neg_mode(unsigned int mode, case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_QUSGMII: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: /* These protocols are designed for use with a PHY which * communicates its negotiation result back to the MAC via * inband communication. Note: there exist PHYs that run diff --git a/include/linux/phy.h b/include/linux/phy.h index e6e83304558e..205fccfc0f60 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -128,6 +128,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN * @PHY_INTERFACE_MODE_QUSGMII: Quad Universal SGMII * @PHY_INTERFACE_MODE_1000BASEKX: 1000Base-KX - with Clause 73 AN + * @PHY_INTERFACE_MODE_10G_QXGMII: 10G-QXGMII - 4 ports over 10G USXGMII * @PHY_INTERFACE_MODE_MAX: Book keeping * * Describes the interface between the MAC and PHY. @@ -168,6 +169,7 @@ typedef enum { PHY_INTERFACE_MODE_10GKR, PHY_INTERFACE_MODE_QUSGMII, PHY_INTERFACE_MODE_1000BASEKX, + PHY_INTERFACE_MODE_10G_QXGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -289,6 +291,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "100base-x"; case PHY_INTERFACE_MODE_QUSGMII: return "qusgmii"; + case PHY_INTERFACE_MODE_10G_QXGMII: + return "10g-qxgmii"; default: return "unknown"; } diff --git a/include/linux/phylink.h b/include/linux/phylink.h index a30a692acc32..2381e07429a2 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -654,6 +654,7 @@ static inline int phylink_get_link_timer_ns(phy_interface_t interface) case PHY_INTERFACE_MODE_SGMII: case PHY_INTERFACE_MODE_QSGMII: case PHY_INTERFACE_MODE_USXGMII: + case PHY_INTERFACE_MODE_10G_QXGMII: return 1600000; case PHY_INTERFACE_MODE_1000BASEX: -- cgit v1.2.3 From c2bc958b2b03e361f14df99983bc64a39a7323a3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 17 Jun 2024 13:06:27 +0200 Subject: fbdev: vesafb: Detect VGA compatibility from screen info's VESA attributes Test the vesa_attributes field in struct screen_info for compatibility with VGA hardware. Vesafb currently tests bit 1 in screen_info's capabilities field which indicates a 64-bit lfb address and is unrelated to VGA compatibility. Section 4.4 of the Vesa VBE 2.0 specifications defines that bit 5 in the mode's attributes field signals VGA compatibility. The mode is compatible with VGA hardware if the bit is clear. In that case, the driver can access VGA state of the VBE's underlying hardware. The vesafb driver uses this feature to program the color LUT in palette modes. Without, colors might be incorrect. The problem got introduced in commit 89ec4c238e7a ("[PATCH] vesafb: Fix incorrect logo colors in x86_64"). It incorrectly stores the mode attributes in the screen_info's capabilities field and updates vesafb accordingly. Later, commit 5e8ddcbe8692 ("Video mode probing support for the new x86 setup code") fixed the screen_info, but did not update vesafb. Color output still tends to work, because bit 1 in capabilities is usually 0. Besides fixing the bug in vesafb, this commit introduces a helper that reads the correct bit from screen_info. Signed-off-by: Thomas Zimmermann Fixes: 5e8ddcbe8692 ("Video mode probing support for the new x86 setup code") Reviewed-by: Javier Martinez Canillas Cc: # v2.6.23+ Signed-off-by: Helge Deller --- drivers/video/fbdev/vesafb.c | 2 +- include/linux/screen_info.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/video/fbdev/vesafb.c b/drivers/video/fbdev/vesafb.c index 8ab64ae4cad3..5a161750a3ae 100644 --- a/drivers/video/fbdev/vesafb.c +++ b/drivers/video/fbdev/vesafb.c @@ -271,7 +271,7 @@ static int vesafb_probe(struct platform_device *dev) if (si->orig_video_isVGA != VIDEO_TYPE_VLFB) return -ENODEV; - vga_compat = (si->capabilities & 2) ? 0 : 1; + vga_compat = !__screen_info_vbe_mode_nonvga(si); vesafb_fix.smem_start = si->lfb_base; vesafb_defined.bits_per_pixel = si->lfb_depth; if (15 == vesafb_defined.bits_per_pixel) diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 75303c126285..6a4a3cec4638 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -49,6 +49,16 @@ static inline u64 __screen_info_lfb_size(const struct screen_info *si, unsigned return lfb_size; } +static inline bool __screen_info_vbe_mode_nonvga(const struct screen_info *si) +{ + /* + * VESA modes typically run on VGA hardware. Set bit 5 signals that this + * is not the case. Drivers can then not make use of VGA resources. See + * Sec 4.4 of the VBE 2.0 spec. + */ + return si->vesa_attributes & BIT(5); +} + static inline unsigned int __screen_info_video_type(unsigned int type) { switch (type) { -- cgit v1.2.3 From 2fbafecb0f05818e25f6c926c6f9ad9ef597429c Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:19 +0200 Subject: ASoC: Constify of_phandle_args in snd_soc_dai_driver ASoC core code does not modify contents of 'of_phandle_args' in 'struct snd_soc_dai_driver', so the pointer can be made as a pointer to const. This makes code safer, serves as clear annotation of core's intentions and allows putting pointed structures in rodata (if ever applicable). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-1-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 15ef268c9845..e6c61c79c483 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -413,7 +413,7 @@ struct snd_soc_dai_driver { unsigned int id; unsigned int base; struct snd_soc_dobj dobj; - struct of_phandle_args *dai_args; + const struct of_phandle_args *dai_args; /* ops */ const struct snd_soc_dai_ops *ops; -- cgit v1.2.3 From 020b37d06f97de289940805bc821190d5858eda0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:20 +0200 Subject: ASoC: Constify of_phandle_args in snd_soc_dai_link_component ASoC core code does not modify contents of 'of_phandle_args' in 'struct snd_soc_dai_link_component', so the pointer can be made as a pointer to const. This makes code safer, serves as clear annotation of core's intentions and allows putting pointed structures in rodata (if ever applicable). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-2-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/sound/soc.h b/include/sound/soc.h index 33671437ee89..f02a51694ab4 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -675,7 +675,7 @@ struct snd_soc_dai_link_component { const char *name; struct device_node *of_node; const char *dai_name; - struct of_phandle_args *dai_args; + const struct of_phandle_args *dai_args; }; /* -- cgit v1.2.3 From f3ac3da7e4d0957b3402fb31a4ca480e739e086f Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:21 +0200 Subject: ASoC: Constify passed data to core function Several ASoC functions receive pointers to data which is not modified, e.g. pointers to 'snd_soc_dai', 'snd_soc_pcm_runtime', 'snd_pcm_hw_params' and 'snd_soc_dai_link'. All these pointers can be made as a pointer to const. This makes code safer, serves as clear annotation of function's intentions (no ownership passed to the function, no modifications) and allows putting pointed structures in rodata (if ever applicable). Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-3-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 18 ++++++++++-------- include/sound/soc.h | 17 +++++++++-------- sound/soc/soc-core.c | 4 ++-- sound/soc/soc-dai.c | 8 ++++---- sound/soc/soc-utils.c | 8 ++++---- 5 files changed, 29 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index e6c61c79c483..3c45b418270e 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -180,8 +180,8 @@ int snd_soc_dai_set_pll(struct snd_soc_dai *dai, int snd_soc_dai_set_bclk_ratio(struct snd_soc_dai *dai, unsigned int ratio); /* Digital Audio interface formatting */ -int snd_soc_dai_get_fmt_max_priority(struct snd_soc_pcm_runtime *rtd); -u64 snd_soc_dai_get_fmt(struct snd_soc_dai *dai, int priority); +int snd_soc_dai_get_fmt_max_priority(const struct snd_soc_pcm_runtime *rtd); +u64 snd_soc_dai_get_fmt(const struct snd_soc_dai *dai, int priority); int snd_soc_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); int snd_soc_dai_set_tdm_slot(struct snd_soc_dai *dai, @@ -202,7 +202,7 @@ int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); -int snd_soc_dai_is_dummy(struct snd_soc_dai *dai); +int snd_soc_dai_is_dummy(const struct snd_soc_dai *dai); int snd_soc_dai_hw_params(struct snd_soc_dai *dai, struct snd_pcm_substream *substream, @@ -218,7 +218,7 @@ void snd_soc_dai_suspend(struct snd_soc_dai *dai); void snd_soc_dai_resume(struct snd_soc_dai *dai); int snd_soc_dai_compress_new(struct snd_soc_dai *dai, struct snd_soc_pcm_runtime *rtd, int num); -bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int stream); +bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int stream); void snd_soc_dai_link_set_capabilities(struct snd_soc_dai_link *dai_link); void snd_soc_dai_action(struct snd_soc_dai *dai, int stream, int action); @@ -232,7 +232,7 @@ static inline void snd_soc_dai_deactivate(struct snd_soc_dai *dai, { snd_soc_dai_action(dai, stream, -1); } -int snd_soc_dai_active(struct snd_soc_dai *dai); +int snd_soc_dai_active(const struct snd_soc_dai *dai); int snd_soc_pcm_dai_probe(struct snd_soc_pcm_runtime *rtd, int order); int snd_soc_pcm_dai_remove(struct snd_soc_pcm_runtime *rtd, int order); @@ -271,7 +271,7 @@ int snd_soc_dai_compr_get_metadata(struct snd_soc_dai *dai, struct snd_compr_stream *cstream, struct snd_compr_metadata *metadata); -const char *snd_soc_dai_name_get(struct snd_soc_dai *dai); +const char *snd_soc_dai_name_get(const struct snd_soc_dai *dai); struct snd_soc_dai_ops { /* DAI driver callbacks */ @@ -518,7 +518,8 @@ static inline void snd_soc_dai_init_dma_data(struct snd_soc_dai *dai, void *play snd_soc_dai_dma_data_set_capture(dai, capture); } -static inline unsigned int snd_soc_dai_tdm_mask_get(struct snd_soc_dai *dai, int stream) +static inline unsigned int snd_soc_dai_tdm_mask_get(const struct snd_soc_dai *dai, + int stream) { return dai->stream[stream].tdm_mask; } @@ -529,7 +530,8 @@ static inline void snd_soc_dai_tdm_mask_set(struct snd_soc_dai *dai, int stream, dai->stream[stream].tdm_mask = tdm_mask; } -static inline unsigned int snd_soc_dai_stream_active(struct snd_soc_dai *dai, int stream) +static inline unsigned int snd_soc_dai_stream_active(const struct snd_soc_dai *dai, + int stream) { /* see snd_soc_dai_action() for setup */ return dai->stream[stream].active; diff --git a/include/sound/soc.h b/include/sound/soc.h index f02a51694ab4..a8e66bbf932b 100644 --- a/include/sound/soc.h +++ b/include/sound/soc.h @@ -534,10 +534,10 @@ static inline int snd_soc_set_dmi_name(struct snd_soc_card *card, /* Utility functions to get clock rates from various things */ int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots); -int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params); +int snd_soc_params_to_frame_size(const struct snd_pcm_hw_params *params); int snd_soc_calc_bclk(int fs, int sample_size, int channels, int tdm_slots); -int snd_soc_params_to_bclk(struct snd_pcm_hw_params *parms); -int snd_soc_tdm_params_to_bclk(struct snd_pcm_hw_params *params, +int snd_soc_params_to_bclk(const struct snd_pcm_hw_params *parms); +int snd_soc_tdm_params_to_bclk(const struct snd_pcm_hw_params *params, int tdm_width, int tdm_slots, int slot_multiple); /* set runtime hw params */ @@ -837,7 +837,8 @@ struct snd_soc_dai_link { #endif }; -static inline int snd_soc_link_num_ch_map(struct snd_soc_dai_link *link) { +static inline int snd_soc_link_num_ch_map(const struct snd_soc_dai_link *link) +{ return max(link->num_cpus, link->num_codecs); } @@ -1299,7 +1300,7 @@ struct soc_enum { #endif }; -static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc) +static inline bool snd_soc_volsw_is_stereo(const struct soc_mixer_control *mc) { if (mc->reg == mc->rreg && mc->shift == mc->rshift) return false; @@ -1311,7 +1312,7 @@ static inline bool snd_soc_volsw_is_stereo(struct soc_mixer_control *mc) return true; } -static inline unsigned int snd_soc_enum_val_to_item(struct soc_enum *e, +static inline unsigned int snd_soc_enum_val_to_item(const struct soc_enum *e, unsigned int val) { unsigned int i; @@ -1326,7 +1327,7 @@ static inline unsigned int snd_soc_enum_val_to_item(struct soc_enum *e, return 0; } -static inline unsigned int snd_soc_enum_item_to_val(struct soc_enum *e, +static inline unsigned int snd_soc_enum_item_to_val(const struct soc_enum *e, unsigned int item) { if (!e->values) @@ -1401,7 +1402,7 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, snd_soc_daifmt_clock_provider_from_bitmap( \ snd_soc_daifmt_parse_clock_provider_as_bitmap(np, prefix)) -int snd_soc_get_stream_cpu(struct snd_soc_dai_link *dai_link, int stream); +int snd_soc_get_stream_cpu(const struct snd_soc_dai_link *dai_link, int stream); int snd_soc_get_dlc(const struct of_phandle_args *args, struct snd_soc_dai_link_component *dlc); int snd_soc_of_get_dlc(struct device_node *of_node, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 3ab6626ad680..724fe1f033b5 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -297,7 +297,7 @@ static int snd_soc_is_matching_dai(const struct snd_soc_dai_link_component *dlc, return 0; } -const char *snd_soc_dai_name_get(struct snd_soc_dai *dai) +const char *snd_soc_dai_name_get(const struct snd_soc_dai *dai) { /* see snd_soc_is_matching_dai() */ if (dai->driver->name) @@ -3430,7 +3430,7 @@ unsigned int snd_soc_daifmt_parse_clock_provider_raw(struct device_node *np, } EXPORT_SYMBOL_GPL(snd_soc_daifmt_parse_clock_provider_raw); -int snd_soc_get_stream_cpu(struct snd_soc_dai_link *dai_link, int stream) +int snd_soc_get_stream_cpu(const struct snd_soc_dai_link *dai_link, int stream) { /* * [Normal] diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 03afd5efb24c..55d1a5a099df 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -134,7 +134,7 @@ int snd_soc_dai_set_bclk_ratio(struct snd_soc_dai *dai, unsigned int ratio) } EXPORT_SYMBOL_GPL(snd_soc_dai_set_bclk_ratio); -int snd_soc_dai_get_fmt_max_priority(struct snd_soc_pcm_runtime *rtd) +int snd_soc_dai_get_fmt_max_priority(const struct snd_soc_pcm_runtime *rtd) { struct snd_soc_dai *dai; int i, max = 0; @@ -166,7 +166,7 @@ int snd_soc_dai_get_fmt_max_priority(struct snd_soc_pcm_runtime *rtd) * modes. This will mean that sometimes fewer formats * are reported here than are supported by set_fmt(). */ -u64 snd_soc_dai_get_fmt(struct snd_soc_dai *dai, int priority) +u64 snd_soc_dai_get_fmt(const struct snd_soc_dai *dai, int priority) { const struct snd_soc_dai_ops *ops = dai->driver->ops; u64 fmt = 0; @@ -471,7 +471,7 @@ int snd_soc_dai_compress_new(struct snd_soc_dai *dai, * * Returns true if the DAI supports the indicated stream type. */ -bool snd_soc_dai_stream_valid(struct snd_soc_dai *dai, int dir) +bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int dir) { struct snd_soc_pcm_stream *stream = snd_soc_dai_get_pcm_stream(dai, dir); @@ -528,7 +528,7 @@ void snd_soc_dai_action(struct snd_soc_dai *dai, } EXPORT_SYMBOL_GPL(snd_soc_dai_action); -int snd_soc_dai_active(struct snd_soc_dai *dai) +int snd_soc_dai_active(const struct snd_soc_dai *dai) { int stream, active; diff --git a/sound/soc/soc-utils.c b/sound/soc/soc-utils.c index d05e712c9518..5dea5609baf4 100644 --- a/sound/soc/soc-utils.c +++ b/sound/soc/soc-utils.c @@ -21,7 +21,7 @@ int snd_soc_calc_frame_size(int sample_size, int channels, int tdm_slots) } EXPORT_SYMBOL_GPL(snd_soc_calc_frame_size); -int snd_soc_params_to_frame_size(struct snd_pcm_hw_params *params) +int snd_soc_params_to_frame_size(const struct snd_pcm_hw_params *params) { int sample_size; @@ -40,7 +40,7 @@ int snd_soc_calc_bclk(int fs, int sample_size, int channels, int tdm_slots) } EXPORT_SYMBOL_GPL(snd_soc_calc_bclk); -int snd_soc_params_to_bclk(struct snd_pcm_hw_params *params) +int snd_soc_params_to_bclk(const struct snd_pcm_hw_params *params) { int ret; @@ -79,7 +79,7 @@ EXPORT_SYMBOL_GPL(snd_soc_params_to_bclk); * Return: bclk frequency in Hz, else a negative error code if params format * is invalid. */ -int snd_soc_tdm_params_to_bclk(struct snd_pcm_hw_params *params, +int snd_soc_tdm_params_to_bclk(const struct snd_pcm_hw_params *params, int tdm_width, int tdm_slots, int slot_multiple) { if (!tdm_slots) @@ -211,7 +211,7 @@ static struct snd_soc_dai_driver dummy_dai = { .ops = &dummy_dai_ops, }; -int snd_soc_dai_is_dummy(struct snd_soc_dai *dai) +int snd_soc_dai_is_dummy(const struct snd_soc_dai *dai) { if (dai->driver == &dummy_dai) return 1; -- cgit v1.2.3 From 785d64c4941221044940ab199e6625af17296470 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:22 +0200 Subject: ASoC: Constify DAI passed to get_channel_map get_channel_map() is supposed to obtain map of channels without modifying the state of the given DAI, so make the pointer to 'struct snd_soc_dai' as pointing to const. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-4-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 4 ++-- sound/soc/codecs/lpass-rx-macro.c | 2 +- sound/soc/codecs/lpass-tx-macro.c | 2 +- sound/soc/codecs/lpass-va-macro.c | 2 +- sound/soc/codecs/lpass-wsa-macro.c | 2 +- sound/soc/codecs/wcd9335.c | 2 +- sound/soc/codecs/wcd934x.c | 2 +- sound/soc/soc-dai.c | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 3c45b418270e..f22969298de1 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -198,7 +198,7 @@ int snd_soc_dai_digital_mute(struct snd_soc_dai *dai, int mute, int direction); -int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai, +int snd_soc_dai_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); @@ -307,7 +307,7 @@ struct snd_soc_dai_ops { int (*set_channel_map)(struct snd_soc_dai *dai, unsigned int tx_num, const unsigned int *tx_slot, unsigned int rx_num, const unsigned int *rx_slot); - int (*get_channel_map)(struct snd_soc_dai *dai, + int (*get_channel_map)(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot); int (*set_tristate)(struct snd_soc_dai *dai, int tristate); diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 00ecc470ba8b..83791507ed13 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -1833,7 +1833,7 @@ static int rx_macro_hw_params(struct snd_pcm_substream *substream, return 0; } -static int rx_macro_get_channel_map(struct snd_soc_dai *dai, +static int rx_macro_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index c98b0b747a92..209c12ec16dd 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -1167,7 +1167,7 @@ static int tx_macro_hw_params(struct snd_pcm_substream *substream, return 0; } -static int tx_macro_get_channel_map(struct snd_soc_dai *dai, +static int tx_macro_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index a555e1e078eb..b0006f4ce8a2 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -892,7 +892,7 @@ static int va_macro_hw_params(struct snd_pcm_substream *substream, return 0; } -static int va_macro_get_channel_map(struct snd_soc_dai *dai, +static int va_macro_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 6ce309980cd1..1e19c2b28a4f 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -992,7 +992,7 @@ static int wsa_macro_hw_params(struct snd_pcm_substream *substream, return 0; } -static int wsa_macro_get_channel_map(struct snd_soc_dai *dai, +static int wsa_macro_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 42a99978fe5a..fc0ab00a253f 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -2014,7 +2014,7 @@ static int wcd9335_set_channel_map(struct snd_soc_dai *dai, return 0; } -static int wcd9335_get_channel_map(struct snd_soc_dai *dai, +static int wcd9335_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index fcad2c9fba55..b5a929659dc8 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1960,7 +1960,7 @@ static int wcd934x_set_channel_map(struct snd_soc_dai *dai, return 0; } -static int wcd934x_get_channel_map(struct snd_soc_dai *dai, +static int wcd934x_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 55d1a5a099df..0c3a834e504d 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -11,7 +11,7 @@ #include #define soc_dai_ret(dai, ret) _soc_dai_ret(dai, __func__, ret) -static inline int _soc_dai_ret(struct snd_soc_dai *dai, +static inline int _soc_dai_ret(const struct snd_soc_dai *dai, const char *func, int ret) { /* Positive, Zero values are not errors */ @@ -327,7 +327,7 @@ EXPORT_SYMBOL_GPL(snd_soc_dai_set_channel_map); * @rx_slot: pointer to an array which imply the RX slot number channel * 0~num-1 uses */ -int snd_soc_dai_get_channel_map(struct snd_soc_dai *dai, +int snd_soc_dai_get_channel_map(const struct snd_soc_dai *dai, unsigned int *tx_num, unsigned int *tx_slot, unsigned int *rx_num, unsigned int *rx_slot) { -- cgit v1.2.3 From de267e7a6ea8e6fa29af2287adfc9fc9d87e6dc9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 17 Jun 2024 15:03:23 +0200 Subject: ASoC: Constify return of snd_soc_dai_get_pcm_stream() Returned 'struct snd_soc_pcm_stream' by snd_soc_dai_get_pcm_stream() is not modified by the users, so it can be changed as pointer to const. This is a necessary step towards making the 'dai->driver' a pointer to const. Signed-off-by: Krzysztof Kozlowski Link: https://msgid.link/r/20240617-n-asoc-const-auto-selectable-formats-v1-5-8004f346ee38@linaro.org Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- sound/soc/intel/avs/pcm.c | 4 ++-- sound/soc/soc-dai.c | 2 +- sound/soc/soc-pcm.c | 26 +++++++++++++------------- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index f22969298de1..bd249710d716 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -473,7 +473,7 @@ struct snd_soc_dai { unsigned int probed:1; }; -static inline struct snd_soc_pcm_stream * +static inline const struct snd_soc_pcm_stream * snd_soc_dai_get_pcm_stream(const struct snd_soc_dai *dai, int stream) { return (stream == SNDRV_PCM_STREAM_PLAYBACK) ? diff --git a/sound/soc/intel/avs/pcm.c b/sound/soc/intel/avs/pcm.c index 88e711875004..c76b86254a8b 100644 --- a/sound/soc/intel/avs/pcm.c +++ b/sound/soc/intel/avs/pcm.c @@ -341,7 +341,7 @@ static int avs_dai_hda_be_prepare(struct snd_pcm_substream *substream, struct sn { struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream); struct snd_pcm_runtime *runtime = substream->runtime; - struct snd_soc_pcm_stream *stream_info; + const struct snd_soc_pcm_stream *stream_info; struct hdac_ext_stream *link_stream; struct hdac_ext_link *link; struct avs_dma_data *data; @@ -637,7 +637,7 @@ static int avs_dai_fe_hw_free(struct snd_pcm_substream *substream, struct snd_so static int avs_dai_fe_prepare(struct snd_pcm_substream *substream, struct snd_soc_dai *dai) { struct snd_pcm_runtime *runtime = substream->runtime; - struct snd_soc_pcm_stream *stream_info; + const struct snd_soc_pcm_stream *stream_info; struct avs_dma_data *data; struct hdac_ext_stream *host_stream; unsigned int format_val; diff --git a/sound/soc/soc-dai.c b/sound/soc/soc-dai.c index 0c3a834e504d..9e47053419c1 100644 --- a/sound/soc/soc-dai.c +++ b/sound/soc/soc-dai.c @@ -473,7 +473,7 @@ int snd_soc_dai_compress_new(struct snd_soc_dai *dai, */ bool snd_soc_dai_stream_valid(const struct snd_soc_dai *dai, int dir) { - struct snd_soc_pcm_stream *stream = snd_soc_dai_get_pcm_stream(dai, dir); + const struct snd_soc_pcm_stream *stream = snd_soc_dai_get_pcm_stream(dai, dir); /* If the codec specifies any channels at all, it supports the stream */ return stream->channels_min; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 711b2f49ed88..bad823718ae4 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -504,7 +504,7 @@ static void soc_pcm_apply_msb(struct snd_pcm_substream *substream) unsigned int bits = 0, cpu_bits = 0; for_each_rtd_codec_dais(rtd, i, codec_dai) { - struct snd_soc_pcm_stream *pcm_codec = snd_soc_dai_get_pcm_stream(codec_dai, stream); + const struct snd_soc_pcm_stream *pcm_codec = snd_soc_dai_get_pcm_stream(codec_dai, stream); if (pcm_codec->sig_bits == 0) { bits = 0; @@ -514,7 +514,7 @@ static void soc_pcm_apply_msb(struct snd_pcm_substream *substream) } for_each_rtd_cpu_dais(rtd, i, cpu_dai) { - struct snd_soc_pcm_stream *pcm_cpu = snd_soc_dai_get_pcm_stream(cpu_dai, stream); + const struct snd_soc_pcm_stream *pcm_cpu = snd_soc_dai_get_pcm_stream(cpu_dai, stream); if (pcm_cpu->sig_bits == 0) { cpu_bits = 0; @@ -538,7 +538,7 @@ static void soc_pcm_hw_init(struct snd_pcm_hardware *hw) } static void soc_pcm_hw_update_rate(struct snd_pcm_hardware *hw, - struct snd_soc_pcm_stream *p) + const struct snd_soc_pcm_stream *p) { hw->rates = snd_pcm_rate_mask_intersect(hw->rates, p->rates); @@ -551,20 +551,20 @@ static void soc_pcm_hw_update_rate(struct snd_pcm_hardware *hw, } static void soc_pcm_hw_update_chan(struct snd_pcm_hardware *hw, - struct snd_soc_pcm_stream *p) + const struct snd_soc_pcm_stream *p) { hw->channels_min = max(hw->channels_min, p->channels_min); hw->channels_max = min(hw->channels_max, p->channels_max); } static void soc_pcm_hw_update_format(struct snd_pcm_hardware *hw, - struct snd_soc_pcm_stream *p) + const struct snd_soc_pcm_stream *p) { hw->formats &= p->formats; } static void soc_pcm_hw_update_subformat(struct snd_pcm_hardware *hw, - struct snd_soc_pcm_stream *p) + const struct snd_soc_pcm_stream *p) { hw->subformats &= p->subformats; } @@ -583,8 +583,8 @@ int snd_soc_runtime_calc_hw(struct snd_soc_pcm_runtime *rtd, { struct snd_soc_dai *codec_dai; struct snd_soc_dai *cpu_dai; - struct snd_soc_pcm_stream *codec_stream; - struct snd_soc_pcm_stream *cpu_stream; + const struct snd_soc_pcm_stream *codec_stream; + const struct snd_soc_pcm_stream *cpu_stream; unsigned int cpu_chan_min = 0, cpu_chan_max = UINT_MAX; int i; @@ -1712,7 +1712,7 @@ static void dpcm_runtime_setup_fe(struct snd_pcm_substream *substream) hw->formats &= formats; for_each_rtd_cpu_dais(fe, i, dai) { - struct snd_soc_pcm_stream *cpu_stream; + const struct snd_soc_pcm_stream *cpu_stream; /* * Skip CPUs which don't support the current stream @@ -1750,7 +1750,7 @@ static void dpcm_runtime_setup_be_format(struct snd_pcm_substream *substream) for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; - struct snd_soc_pcm_stream *codec_stream; + const struct snd_soc_pcm_stream *codec_stream; int i; for_each_rtd_codec_dais(be, i, dai) { @@ -1787,7 +1787,7 @@ static void dpcm_runtime_setup_be_chan(struct snd_pcm_substream *substream) for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; - struct snd_soc_pcm_stream *cpu_stream; + const struct snd_soc_pcm_stream *cpu_stream; struct snd_soc_dai *dai; int i; @@ -1809,7 +1809,7 @@ static void dpcm_runtime_setup_be_chan(struct snd_pcm_substream *substream) * DAIs connected to a single CPU DAI, use CPU DAI's directly */ if (be->dai_link->num_codecs == 1) { - struct snd_soc_pcm_stream *codec_stream = snd_soc_dai_get_pcm_stream( + const struct snd_soc_pcm_stream *codec_stream = snd_soc_dai_get_pcm_stream( snd_soc_rtd_to_codec(be, 0), stream); soc_pcm_hw_update_chan(hw, codec_stream); @@ -1835,7 +1835,7 @@ static void dpcm_runtime_setup_be_rate(struct snd_pcm_substream *substream) for_each_dpcm_be(fe, stream, dpcm) { struct snd_soc_pcm_runtime *be = dpcm->be; - struct snd_soc_pcm_stream *pcm; + const struct snd_soc_pcm_stream *pcm; struct snd_soc_dai *dai; int i; -- cgit v1.2.3 From d6a711a898672dd873aab3844f754a3ca40723a5 Mon Sep 17 00:00:00 2001 From: Patrice Chotard Date: Tue, 18 Jun 2024 15:29:51 +0200 Subject: spi: Fix OCTAL mode support Add OCTAL mode support. Issue detected using "--octal" spidev_test's option. Signed-off-by: Patrice Chotard Link: https://msgid.link/r/20240618132951.2743935-4-patrice.chotard@foss.st.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 6 ++++-- include/linux/spi/spi.h | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 9bc9fd10d538..9da736d51a2b 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4156,7 +4156,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) return -EINVAL; if (xfer->tx_nbits != SPI_NBITS_SINGLE && xfer->tx_nbits != SPI_NBITS_DUAL && - xfer->tx_nbits != SPI_NBITS_QUAD) + xfer->tx_nbits != SPI_NBITS_QUAD && + xfer->tx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->tx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_TX_DUAL | SPI_TX_QUAD))) @@ -4171,7 +4172,8 @@ static int __spi_validate(struct spi_device *spi, struct spi_message *message) return -EINVAL; if (xfer->rx_nbits != SPI_NBITS_SINGLE && xfer->rx_nbits != SPI_NBITS_DUAL && - xfer->rx_nbits != SPI_NBITS_QUAD) + xfer->rx_nbits != SPI_NBITS_QUAD && + xfer->rx_nbits != SPI_NBITS_OCTAL) return -EINVAL; if ((xfer->rx_nbits == SPI_NBITS_DUAL) && !(spi->mode & (SPI_RX_DUAL | SPI_RX_QUAD))) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e8e1e798924f..98fdef6e28f2 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -1085,12 +1085,13 @@ struct spi_transfer { unsigned dummy_data:1; unsigned cs_off:1; unsigned cs_change:1; - unsigned tx_nbits:3; - unsigned rx_nbits:3; + unsigned tx_nbits:4; + unsigned rx_nbits:4; unsigned timestamped:1; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ +#define SPI_NBITS_OCTAL 0x08 /* 8-bit transfer */ u8 bits_per_word; struct spi_delay delay; struct spi_delay cs_change_delay; -- cgit v1.2.3 From 702eb71fd6501b3566283f8c96d7ccc6ddd662e9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 17 Jun 2024 18:23:00 +0200 Subject: fsnotify: Do not generate events for O_PATH file descriptors Currently we will not generate FS_OPEN events for O_PATH file descriptors but we will generate FS_CLOSE events for them. This is asymmetry is confusing. Arguably no fsnotify events should be generated for O_PATH file descriptors as they cannot be used to access or modify file content, they are just convenient handles to file objects like paths. So fix the asymmetry by stopping to generate FS_CLOSE for O_PATH file descriptors. Cc: Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20240617162303.1596-1-jack@suse.cz Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/linux/fsnotify.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 4da80e92f804..278620e063ab 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -112,7 +112,13 @@ static inline int fsnotify_file(struct file *file, __u32 mask) { const struct path *path; - if (file->f_mode & FMODE_NONOTIFY) + /* + * FMODE_NONOTIFY are fds generated by fanotify itself which should not + * generate new events. We also don't want to generate events for + * FMODE_PATH fds (involves open & close events) as they are just + * handle creation / destruction events and not "real" file events. + */ + if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) return 0; path = &file->f_path; -- cgit v1.2.3 From 9f774c757e3fb2ac32dc4377e8f21f3364a8df81 Mon Sep 17 00:00:00 2001 From: Shenghao Ding Date: Fri, 14 Jun 2024 21:36:45 +0800 Subject: ASoc: tas2781: Enable RCA-based playback without DSP firmware download In only loading RCA (Reconfigurable Architecture) binary case, no DSP program will be working inside tas2563/tas2781, that is dsp-bypass mode, do not support speaker protection, or audio acoustic algorithms in this mode. Fixes: ef3bcde75d06 ("ASoC: tas2781: Add tas2781 driver") Signed-off-by: Shenghao Ding Reviewed-by: Pierre-Louis Bossart Link: https://msgid.link/r/20240614133646.910-1-shenghao-ding@ti.com Signed-off-by: Mark Brown --- include/sound/tas2781-dsp.h | 11 +++++++++-- sound/soc/codecs/tas2781-fmwlib.c | 18 +++++++++++++----- sound/soc/codecs/tas2781-i2c.c | 39 ++++++++++++++++++++++++++------------- 3 files changed, 48 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/sound/tas2781-dsp.h b/include/sound/tas2781-dsp.h index 7fba7ea26a4b..3cda9da14f6d 100644 --- a/include/sound/tas2781-dsp.h +++ b/include/sound/tas2781-dsp.h @@ -117,10 +117,17 @@ struct tasdevice_fw { struct device *dev; }; -enum tasdevice_dsp_fw_state { - TASDEVICE_DSP_FW_NONE = 0, +enum tasdevice_fw_state { + /* Driver in startup mode, not load any firmware. */ TASDEVICE_DSP_FW_PENDING, + /* DSP firmware in the system, but parsing error. */ TASDEVICE_DSP_FW_FAIL, + /* + * Only RCA (Reconfigurable Architecture) firmware load + * successfully. + */ + TASDEVICE_RCA_FW_OK, + /* Both RCA and DSP firmware load successfully. */ TASDEVICE_DSP_FW_ALL_OK, }; diff --git a/sound/soc/codecs/tas2781-fmwlib.c b/sound/soc/codecs/tas2781-fmwlib.c index 265a8ca25cbb..838d29fead96 100644 --- a/sound/soc/codecs/tas2781-fmwlib.c +++ b/sound/soc/codecs/tas2781-fmwlib.c @@ -2324,14 +2324,21 @@ void tasdevice_tuning_switch(void *context, int state) struct tasdevice_fw *tas_fmw = tas_priv->fmw; int profile_cfg_id = tas_priv->rcabin.profile_cfg_id; - if (tas_priv->fw_state == TASDEVICE_DSP_FW_FAIL) { - dev_err(tas_priv->dev, "DSP bin file not loaded\n"); + /* + * Only RCA-based Playback can still work with no dsp program running + * inside the chip. + */ + switch (tas_priv->fw_state) { + case TASDEVICE_RCA_FW_OK: + case TASDEVICE_DSP_FW_ALL_OK: + break; + default: return; } if (state == 0) { - if (tas_priv->cur_prog < tas_fmw->nr_programs) { - /*dsp mode or tuning mode*/ + if (tas_fmw && tas_priv->cur_prog < tas_fmw->nr_programs) { + /* dsp mode or tuning mode */ profile_cfg_id = tas_priv->rcabin.profile_cfg_id; tasdevice_select_tuningprm_cfg(tas_priv, tas_priv->cur_prog, tas_priv->cur_conf, @@ -2340,9 +2347,10 @@ void tasdevice_tuning_switch(void *context, int state) tasdevice_select_cfg_blk(tas_priv, profile_cfg_id, TASDEVICE_BIN_BLK_PRE_POWER_UP); - } else + } else { tasdevice_select_cfg_blk(tas_priv, profile_cfg_id, TASDEVICE_BIN_BLK_PRE_SHUTDOWN); + } } EXPORT_SYMBOL_NS_GPL(tasdevice_tuning_switch, SND_SOC_TAS2781_FMWLIB); diff --git a/sound/soc/codecs/tas2781-i2c.c b/sound/soc/codecs/tas2781-i2c.c index 9350972dfefe..c64d458e524e 100644 --- a/sound/soc/codecs/tas2781-i2c.c +++ b/sound/soc/codecs/tas2781-i2c.c @@ -380,23 +380,37 @@ static void tasdevice_fw_ready(const struct firmware *fmw, mutex_lock(&tas_priv->codec_lock); ret = tasdevice_rca_parser(tas_priv, fmw); - if (ret) + if (ret) { + tasdevice_config_info_remove(tas_priv); goto out; + } tasdevice_create_control(tas_priv); tasdevice_dsp_remove(tas_priv); tasdevice_calbin_remove(tas_priv); - tas_priv->fw_state = TASDEVICE_DSP_FW_PENDING; + /* + * The baseline is the RCA-only case, and then the code attempts to + * load DSP firmware but in case of failures just keep going, i.e. + * failing to load DSP firmware is NOT an error. + */ + tas_priv->fw_state = TASDEVICE_RCA_FW_OK; scnprintf(tas_priv->coef_binaryname, 64, "%s_coef.bin", tas_priv->dev_name); ret = tasdevice_dsp_parser(tas_priv); if (ret) { dev_err(tas_priv->dev, "dspfw load %s error\n", tas_priv->coef_binaryname); - tas_priv->fw_state = TASDEVICE_DSP_FW_FAIL; goto out; } - tasdevice_dsp_create_ctrls(tas_priv); + + /* + * If no dsp-related kcontrol created, the dsp resource will be freed. + */ + ret = tasdevice_dsp_create_ctrls(tas_priv); + if (ret) { + dev_err(tas_priv->dev, "dsp controls error\n"); + goto out; + } tas_priv->fw_state = TASDEVICE_DSP_FW_ALL_OK; @@ -417,9 +431,8 @@ static void tasdevice_fw_ready(const struct firmware *fmw, tasdevice_prmg_load(tas_priv, 0); tas_priv->cur_prog = 0; out: - if (tas_priv->fw_state == TASDEVICE_DSP_FW_FAIL) { - /*If DSP FW fail, kcontrol won't be created */ - tasdevice_config_info_remove(tas_priv); + if (tas_priv->fw_state == TASDEVICE_RCA_FW_OK) { + /* If DSP FW fail, DSP kcontrol won't be created. */ tasdevice_dsp_remove(tas_priv); } mutex_unlock(&tas_priv->codec_lock); @@ -466,14 +479,14 @@ static int tasdevice_startup(struct snd_pcm_substream *substream, { struct snd_soc_component *codec = dai->component; struct tasdevice_priv *tas_priv = snd_soc_component_get_drvdata(codec); - int ret = 0; - if (tas_priv->fw_state != TASDEVICE_DSP_FW_ALL_OK) { - dev_err(tas_priv->dev, "DSP bin file not loaded\n"); - ret = -EINVAL; + switch (tas_priv->fw_state) { + case TASDEVICE_RCA_FW_OK: + case TASDEVICE_DSP_FW_ALL_OK: + return 0; + default: + return -EINVAL; } - - return ret; } static int tasdevice_hw_params(struct snd_pcm_substream *substream, -- cgit v1.2.3 From a6816314af5749cd88944bfdceb270c627cdf348 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 3 May 2024 11:17:32 -0700 Subject: KVM: Introduce vcpu->wants_to_run Introduce vcpu->wants_to_run to indicate when a vCPU is in its core run loop, i.e. when the vCPU is running the KVM_RUN ioctl and immediate_exit was not set. Replace all references to vcpu->run->immediate_exit with !vcpu->wants_to_run to avoid TOCTOU races with userspace. For example, a malicious userspace could invoked KVM_RUN with immediate_exit=true and then after KVM reads it to set wants_to_run=false, flip it to false. This would result in the vCPU running in KVM_RUN with wants_to_run=false. This wouldn't cause any real bugs today but is a dangerous landmine. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20240503181734.1467938-2-dmatlack@google.com Signed-off-by: Sean Christopherson --- arch/arm64/kvm/arm.c | 2 +- arch/loongarch/kvm/vcpu.c | 2 +- arch/mips/kvm/mips.c | 2 +- arch/powerpc/kvm/powerpc.c | 2 +- arch/riscv/kvm/vcpu.c | 2 +- arch/s390/kvm/kvm-s390.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 3 +++ 9 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9996a989b52e..f9a418e9ea6a 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1099,7 +1099,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu_load(vcpu); - if (run->immediate_exit) { + if (!vcpu->wants_to_run) { ret = -EINTR; goto out; } diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 9e8030d45129..f27d2bf14dc0 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -1266,7 +1266,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_complete_iocsr_read(vcpu, run); } - if (run->immediate_exit) + if (!vcpu->wants_to_run) return r; /* Clear exit_reason */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 231ac052b506..f1a99962027a 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -436,7 +436,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) vcpu->mmio_needed = 0; } - if (vcpu->run->immediate_exit) + if (!vcpu->wants_to_run) goto out; lose_fpu(1); diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d32abe7fe6ab..961aadc71de2 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1852,7 +1852,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_sigset_activate(vcpu); - if (run->immediate_exit) + if (!vcpu->wants_to_run) r = -EINTR; else r = kvmppc_vcpu_run(vcpu); diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 17e21df36cc1..2123df6ee409 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -760,7 +760,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) return ret; } - if (run->immediate_exit) { + if (!vcpu->wants_to_run) { kvm_vcpu_srcu_read_unlock(vcpu); return -EINTR; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4641083ee100..5da808976085 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5026,7 +5026,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) if (vcpu->kvm->arch.pv.dumping) return -EINVAL; - if (kvm_run->immediate_exit) + if (!vcpu->wants_to_run) return -EINTR; if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS || diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 994aa281b07d..de581487e3c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -11407,7 +11407,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_vcpu_srcu_read_lock(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { - if (kvm_run->immediate_exit) { + if (!vcpu->wants_to_run) { r = -EINTR; goto out; } @@ -11485,7 +11485,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vcpu->mmio_needed); } - if (kvm_run->immediate_exit) { + if (!vcpu->wants_to_run) { r = -EINTR; goto out; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7b9d2633a931..d72ced3e74d1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -378,6 +378,7 @@ struct kvm_vcpu { bool dy_eligible; } spin_loop; #endif + bool wants_to_run; bool preempted; bool ready; bool scheduled_out; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fed80bfbe99b..66bfdfa11805 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4435,7 +4435,10 @@ static long kvm_vcpu_ioctl(struct file *filp, synchronize_rcu(); put_pid(oldpid); } + vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit); r = kvm_arch_vcpu_ioctl_run(vcpu); + vcpu->wants_to_run = false; + trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; } -- cgit v1.2.3 From 4b23e0c199b20fa6fe9655b3d0e12d6c6f18c27f Mon Sep 17 00:00:00 2001 From: David Matlack Date: Fri, 3 May 2024 11:17:33 -0700 Subject: KVM: Ensure new code that references immediate_exit gets extra scrutiny Ensure that any new KVM code that references immediate_exit gets extra scrutiny by renaming it to immediate_exit__unsafe in kernel code. All fields in struct kvm_run are subject to TOCTOU races since they are mapped into userspace, which may be malicious or buggy. To protect KVM, introduces a new macro that appends __unsafe to select field names in struct kvm_run, hinting to developers and reviewers that accessing such fields must be done carefully. Apply the new macro to immediate_exit, since userspace can make immediate_exit inconsistent with vcpu->wants_to_run, i.e. accessing immediate_exit directly could lead to unexpected bugs in the future. Signed-off-by: David Matlack Link: https://lore.kernel.org/r/20240503181734.1467938-3-dmatlack@google.com [sean: massage changelog] Signed-off-by: Sean Christopherson --- include/uapi/linux/kvm.h | 15 ++++++++++++++- virt/kvm/kvm_main.c | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d03842abae57..795773f5db63 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 66bfdfa11805..2fed9a9b8bd6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4435,7 +4435,7 @@ static long kvm_vcpu_ioctl(struct file *filp, synchronize_rcu(); put_pid(oldpid); } - vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit); + vcpu->wants_to_run = !READ_ONCE(vcpu->run->immediate_exit__unsafe); r = kvm_arch_vcpu_ioctl_run(vcpu); vcpu->wants_to_run = false; -- cgit v1.2.3 From 395e73bd8d35fa9b8505e44f63a2a10abde09cce Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 9 Jun 2024 20:12:19 -0700 Subject: srcu: Add NUM_ACTIVE_SRCU_POLL_OLDSTATE This commit adds NUM_ACTIVE_SRCU_POLL_OLDSTATE, which gives the maximum number of distinct return values from get_state_synchronize_rcu() that can, at a given point in time, correspond to not-completed SRCU grace periods. Reported-by: Kent Overstreet Closes: https://lore.kernel.org/all/irycqy4sinjdgm2hkyix2bffunpcmuwgeufsx6nlljvqme3wiu@ify3zdnrmzph/ Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 236610e4a8fa..f664cba7a80c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -61,6 +61,10 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); +// Maximum number of unsigned long values corresponding to +// not-yet-completed SRCU grace periods. +#define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2 + #ifdef CONFIG_NEED_SRCU_NMI_SAFE int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); -- cgit v1.2.3 From e206f33e2c0774276a0497fe538472e12016a362 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 14 Jun 2024 13:26:44 -0700 Subject: srcu: Fill out polled grace-period APIs This commit adds the get_completed_synchronize_srcu() and the same_state_synchronize_srcu() functions. The first returns a cookie that is always interpreted as corresponding to an expired grace period. The second does an equality comparison of a pair of cookies. Signed-off-by: Paul E. McKenney Cc: Kent Overstreet --- include/linux/srcu.h | 31 +++++++++++++++++++++++++++++++ kernel/rcu/srcutiny.c | 3 ++- kernel/rcu/srcutree.c | 3 ++- 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f664cba7a80c..6f6cb5fc1242 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -57,6 +57,20 @@ void cleanup_srcu_struct(struct srcu_struct *ssp); int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); void synchronize_srcu(struct srcu_struct *ssp); + +#define SRCU_GET_STATE_COMPLETED 0x1 + +/** + * get_completed_synchronize_srcu - Return a pre-completed polled state cookie + * + * Returns a value that poll_state_synchronize_srcu() will always treat + * as a cookie whose grace period has already completed. + */ +static inline unsigned long get_completed_synchronize_srcu(void) +{ + return SRCU_GET_STATE_COMPLETED; +} + unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp); unsigned long start_poll_synchronize_srcu(struct srcu_struct *ssp); bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); @@ -65,6 +79,23 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie); // not-yet-completed SRCU grace periods. #define NUM_ACTIVE_SRCU_POLL_OLDSTATE 2 +/** + * same_state_synchronize_srcu - Are two old-state values identical? + * @oldstate1: First old-state value. + * @oldstate2: Second old-state value. + * + * The two old-state values must have been obtained from either + * get_state_synchronize_srcu(), start_poll_synchronize_srcu(), or + * get_completed_synchronize_srcu(). Returns @true if the two values are + * identical and @false otherwise. This allows structures whose lifetimes + * are tracked by old-state values to push these values to a list header, + * allowing those structures to be slightly smaller. + */ +static inline bool same_state_synchronize_srcu(unsigned long oldstate1, unsigned long oldstate2) +{ + return oldstate1 == oldstate2; +} + #ifdef CONFIG_NEED_SRCU_NMI_SAFE int __srcu_read_lock_nmisafe(struct srcu_struct *ssp) __acquires(ssp); void __srcu_read_unlock_nmisafe(struct srcu_struct *ssp, int idx) __releases(ssp); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 5afd5cf494db..549c03336ee9 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -277,7 +277,8 @@ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) unsigned long cur_s = READ_ONCE(ssp->srcu_idx); barrier(); - return ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3); + return cookie == SRCU_GET_STATE_COMPLETED || + ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3); } EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 15dc22a8ff5a..d6a4047719cb 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -1543,7 +1543,8 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); */ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) { - if (!rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie)) + if (cookie != SRCU_GET_STATE_COMPLETED && + !rcu_seq_done(&ssp->srcu_sup->srcu_gp_seq, cookie)) return false; // Ensure that the end of the SRCU grace period happens before // any subsequent code that the caller might execute. -- cgit v1.2.3 From 5c563ee90a22d3295bcd6217e3ecd7bf9f4d9d48 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 24 May 2024 11:58:28 -0700 Subject: cpumask: introduce assign_cpu() macro Now that assign_bit() is a thin macro wrapper around set_bit() and clear_bit(), we can use it in cpumask API and drop duplicating implementations of set_cpu_xxx() helpers with no additional overhead. Bloat-o-meter reports almost 2k less of generated code for allyesconfig, mostly in kernel/cpu.c: add/remove: 2/4 grow/shrink: 3/4 up/down: 498/-2228 (-1730) Reviewed-by: Alexander Lobakin Signed-off-by: Yury Norov --- include/linux/cpumask.h | 40 ++++++---------------------------------- 1 file changed, 6 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 23686bed441d..18410acdbc9e 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1083,44 +1083,16 @@ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); -static inline void -set_cpu_possible(unsigned int cpu, bool possible) -{ - if (possible) - cpumask_set_cpu(cpu, &__cpu_possible_mask); - else - cpumask_clear_cpu(cpu, &__cpu_possible_mask); -} +#define assign_cpu(cpu, mask, val) \ + assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) -static inline void -set_cpu_present(unsigned int cpu, bool present) -{ - if (present) - cpumask_set_cpu(cpu, &__cpu_present_mask); - else - cpumask_clear_cpu(cpu, &__cpu_present_mask); -} +#define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) +#define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) +#define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) +#define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) void set_cpu_online(unsigned int cpu, bool online); -static inline void -set_cpu_active(unsigned int cpu, bool active) -{ - if (active) - cpumask_set_cpu(cpu, &__cpu_active_mask); - else - cpumask_clear_cpu(cpu, &__cpu_active_mask); -} - -static inline void -set_cpu_dying(unsigned int cpu, bool dying) -{ - if (dying) - cpumask_set_cpu(cpu, &__cpu_dying_mask); - else - cpumask_clear_cpu(cpu, &__cpu_dying_mask); -} - /** * to_cpumask - convert a NR_CPUS bitmap to a struct cpumask * * @bitmap: the bitmap -- cgit v1.2.3 From e0eeb938adb0367de4b0946125a06142d8de7d37 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 11 Jun 2024 15:38:12 +0300 Subject: bitops: Add a comment explaining the double underscore macros Linus Walleij pointed out that a new comer might be confused about the difference between set_bit() and __set_bit(). Add a comment explaining the difference. Link: https://lore.kernel.org/all/CACRpkdZFPG_YLici-BmYfk9HZ36f4WavCN3JNotkk8cPgCODCg@mail.gmail.com/ Signed-off-by: Dan Carpenter Reviewed-by: Linus Walleij Signed-off-by: Yury Norov --- include/linux/bitops.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 46d4bdc634c0..ba35bbf07798 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -47,12 +47,17 @@ extern unsigned long __sw_hweight64(__u64 w); __builtin_constant_p(*(const unsigned long *)(addr))) ? \ const##op(nr, addr) : op(nr, addr)) +/* + * The following macros are non-atomic versions of their non-underscored + * counterparts. + */ #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) #define __change_bit(nr, addr) bitop(___change_bit, nr, addr) #define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) + #define test_bit(nr, addr) bitop(_test_bit, nr, addr) #define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) -- cgit v1.2.3 From 52c2e956dcebecc8901911217a9647203ebcaf3c Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:53 -0700 Subject: drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the plan is to support multiple types of perf counter streams (OA is only one type of these streams). Rather than introduce NxM ioctls for these (N perf streams with M ioctl's per perf stream), we decide to multiplex these (N different stream types and the M ops for each of these stream types) through a single PERF ioctl. This multiplexing is the purpose of the PERF layer. In addition to PERF DRM ioctl's, another set of ioctl's on the PERF fd are defined. These are expected to be common to different PERF stream types and therefore defined at the PERF layer itself. v2: Add param_size to 'struct drm_xe_perf_param' (Umesh) v3: Rename 'enum drm_xe_perf_ops' to 'enum drm_xe_perf_ioctls' (Guy Zadicario) Add DRM_ prefix to ioctl names to indicate uapi names v4: Add 'enum drm_xe_perf_op' previously missed out (Guy Zadicario) v5: Squash the ops and PERF layer patches into a single patch (Umesh) Remove param_size from struct 'drm_xe_perf_param' (Umesh) v6: Add DRM_XE_PERF_IOCTL_STATUS v7: Add DRM_XE_PERF_IOCTL_INFO v8: Fix Copyright years, fix DRM_XE_PERF_TYPE_MAX, move '#include "xe_perf.h"' to xe_perf.c, add kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: Guy Zadicario Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-2-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 2 ++ drivers/gpu/drm/xe/xe_perf.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/xe/xe_perf.h | 14 +++++++++ include/uapi/drm/xe_drm.h | 66 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 117 insertions(+) create mode 100644 drivers/gpu/drm/xe/xe_perf.c create mode 100644 drivers/gpu/drm/xe/xe_perf.h (limited to 'include') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index cbf961b90237..f99492449e5d 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -95,6 +95,7 @@ xe-y += xe_bb.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ + xe_perf.o \ xe_pm.o \ xe_preempt_fence.o \ xe_pt.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 64691a56d59c..a44093cbbb71 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -44,6 +44,7 @@ #include "xe_module.h" #include "xe_pat.h" #include "xe_pcode.h" +#include "xe_perf.h" #include "xe_pm.h" #include "xe_query.h" #include "xe_sriov.h" @@ -141,6 +142,7 @@ static const struct drm_ioctl_desc xe_ioctls[] = { DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(XE_PERF, xe_perf_ioctl, DRM_RENDER_ALLOW), }; static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c new file mode 100644 index 000000000000..2963174ecd0e --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include + +#include "xe_perf.h" + +/** + * xe_perf_ioctl - The top level perf layer ioctl + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + * + * The function is called for different perf streams types and allows execution + * of different operations supported by those perf stream types. + * + * Return: 0 on success or a negative error code on failure. + */ +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct drm_xe_perf_param *arg = data; + + if (arg->extensions) + return -EINVAL; + + switch (arg->perf_type) { + default: + return -EINVAL; + } +} diff --git a/drivers/gpu/drm/xe/xe_perf.h b/drivers/gpu/drm/xe/xe_perf.h new file mode 100644 index 000000000000..e7e258eaf0a9 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_perf.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_PERF_H_ +#define _XE_PERF_H_ + +struct drm_device; +struct drm_file; + +int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file); + +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index d7b0903c22b2..c1626027dc69 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,6 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE + * - &DRM_IOCTL_XE_PERF */ /* @@ -100,6 +101,8 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a +#define DRM_XE_PERF 0x0b + /* Must be kept compact -- no holes */ #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query) @@ -113,6 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) +#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) /** * DOC: Xe IOCTL Extensions @@ -1370,6 +1374,68 @@ struct drm_xe_wait_user_fence { __u64 reserved[2]; }; +/** + * enum drm_xe_perf_type - Perf stream types + */ +enum drm_xe_perf_type { + __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ +}; + +/** + * enum drm_xe_perf_op - Perf stream ops + */ +enum drm_xe_perf_op { + /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ + DRM_XE_PERF_OP_STREAM_OPEN, + + /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ + DRM_XE_PERF_OP_ADD_CONFIG, + + /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ + DRM_XE_PERF_OP_REMOVE_CONFIG, +}; + +/** + * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * + * The perf layer enables multiplexing perf counter streams of multiple + * types. The actual params for a particular stream operation are supplied + * via the @param pointer (use __copy_from_user to get these params). + */ +struct drm_xe_perf_param { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ + __u64 perf_type; + /** @perf_op: Perf op, of enum @drm_xe_perf_op */ + __u64 perf_op; + /** @param: Pointer to actual stream params */ + __u64 param; +}; + +/** + * enum drm_xe_perf_ioctls - Perf fd ioctl's + * + * Information exchanged between userspace and kernel for perf fd ioctl's + * is stream type specific + */ +enum drm_xe_perf_ioctls { + /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ + DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), + + /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ + DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + + /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ + DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + + /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ + DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + + /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ + DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 67977882a2f1339f0a7d32576ad61967828b2ca5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:55 -0700 Subject: drm/xe/oa/uapi: Add OA data formats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add and initialize supported OA data formats for various platforms (including Xe2). User can request OA data in any supported format. Bspec: 52198, 60942, 61101 v2: Start 'xe_oa_format_name' enum from 0 (Umesh) Fix error rewind with OA (Umesh) v3: Use graphics versions rather than absolute platform names v4: Add missing kernel doc for struct memebers and enum and other minor changes (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-4-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/Makefile | 1 + drivers/gpu/drm/xe/xe_device.c | 11 +++- drivers/gpu/drm/xe/xe_device_types.h | 4 ++ drivers/gpu/drm/xe/xe_oa.c | 111 +++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 16 +++++ drivers/gpu/drm/xe/xe_oa_types.h | 83 ++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 19 ++++++ 7 files changed, 244 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/xe/xe_oa.c create mode 100644 drivers/gpu/drm/xe/xe_oa.h create mode 100644 drivers/gpu/drm/xe/xe_oa_types.h (limited to 'include') diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index f99492449e5d..7039008be234 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -92,6 +92,7 @@ xe-y += xe_bb.o \ xe_mmio.o \ xe_mocs.o \ xe_module.o \ + xe_oa.o \ xe_pat.o \ xe_pci.o \ xe_pcode.o \ diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index a44093cbbb71..1195c64a715a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -656,10 +656,14 @@ int xe_device_probe(struct xe_device *xe) xe_heci_gsc_init(xe); - err = xe_display_init(xe); + err = xe_oa_init(xe); if (err) goto err_fini_gt; + err = xe_display_init(xe); + if (err) + goto err_fini_oa; + err = drm_dev_register(&xe->drm, 0); if (err) goto err_fini_display; @@ -675,6 +679,9 @@ int xe_device_probe(struct xe_device *xe) err_fini_display: xe_display_driver_remove(xe); +err_fini_oa: + xe_oa_fini(xe); + err_fini_gt: for_each_gt(gt, xe, id) { if (id < last_gt) @@ -707,6 +714,8 @@ void xe_device_remove(struct xe_device *xe) xe_display_fini(xe); + xe_oa_fini(xe); + xe_heci_gsc_fini(xe); for_each_gt(gt, xe, id) diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 52bc461171d5..185986e1d586 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -17,6 +17,7 @@ #include "xe_gt_types.h" #include "xe_lmtt_types.h" #include "xe_memirq_types.h" +#include "xe_oa.h" #include "xe_platform_types.h" #include "xe_pt_types.h" #include "xe_sriov_types.h" @@ -462,6 +463,9 @@ struct xe_device { /** @heci_gsc: graphics security controller */ struct xe_heci_gsc heci_gsc; + /** @oa: oa perf counter subsystem */ + struct xe_oa oa; + /** @needs_flr_on_fini: requests function-reset on fini */ bool needs_flr_on_fini; diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c new file mode 100644 index 000000000000..5c0179ff4f60 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#include + +#include "xe_assert.h" +#include "xe_device.h" +#include "xe_macros.h" +#include "xe_oa.h" + +#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x + +static const struct xe_oa_format oa_formats[] = { + [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, + [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, + [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, + [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, + [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, + [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, + [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, +}; + +static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) +{ + __set_bit(format, oa->format_mask); +} + +static void xe_oa_init_supported_formats(struct xe_oa *oa) +{ + if (GRAPHICS_VER(oa->xe) >= 20) { + /* Xe2+ */ + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64); + oa_format_add(oa, XE_OA_FORMAT_PEC64u64_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_PEC64u32); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G1); + oa_format_add(oa, XE_OA_FORMAT_PEC32u64_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC32u32_G2); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_32_G2_4); + oa_format_add(oa, XE_OA_FORMAT_PEC36u64_G1_4_G2_32); + } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { + /* XE_METEORLAKE */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u64_B8_C8); + oa_format_add(oa, XE_OAM_FORMAT_MPEC8u32_B8_C8); + } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { + /* XE_DG2, XE_PVC */ + oa_format_add(oa, XE_OAR_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A24u40_A14u32_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A24u64_B8_C8); + oa_format_add(oa, XE_OAC_FORMAT_A22u32_R2u32_B8_C8); + } else { + /* Gen12+ */ + xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); + oa_format_add(oa, XE_OA_FORMAT_A12); + oa_format_add(oa, XE_OA_FORMAT_A12_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_A32u40_A4u32_B8_C8); + oa_format_add(oa, XE_OA_FORMAT_C4_B8); + } +} + +/** + * xe_oa_init - OA initialization during device probe + * @xe: @xe_device + * + * Return: 0 on success or a negative error code on failure + */ +int xe_oa_init(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + /* Support OA only with GuC submission and Gen12+ */ + if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) + return 0; + + oa->xe = xe; + oa->oa_formats = oa_formats; + + xe_oa_init_supported_formats(oa); + return 0; +} + +/** + * xe_oa_fini - OA de-initialization during device remove + * @xe: @xe_device + */ +void xe_oa_fini(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + oa->xe = NULL; +} diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h new file mode 100644 index 000000000000..2647c1947746 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_H_ +#define _XE_OA_H_ + +#include "xe_oa_types.h" + +struct xe_device; + +int xe_oa_init(struct xe_device *xe); +void xe_oa_fini(struct xe_device *xe); + +#endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h new file mode 100644 index 000000000000..99940e25b1c6 --- /dev/null +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023-2024 Intel Corporation + */ + +#ifndef _XE_OA_TYPES_H_ +#define _XE_OA_TYPES_H_ + +#include +#include + +enum xe_oa_report_header { + HDR_32_BIT = 0, + HDR_64_BIT, +}; + +enum xe_oa_format_name { + XE_OA_FORMAT_C4_B8, + + /* Gen8+ */ + XE_OA_FORMAT_A12, + XE_OA_FORMAT_A12_B8_C8, + XE_OA_FORMAT_A32u40_A4u32_B8_C8, + + /* DG2 */ + XE_OAR_FORMAT_A32u40_A4u32_B8_C8, + XE_OA_FORMAT_A24u40_A14u32_B8_C8, + + /* DG2/MTL OAC */ + XE_OAC_FORMAT_A24u64_B8_C8, + XE_OAC_FORMAT_A22u32_R2u32_B8_C8, + + /* MTL OAM */ + XE_OAM_FORMAT_MPEC8u64_B8_C8, + XE_OAM_FORMAT_MPEC8u32_B8_C8, + + /* Xe2+ */ + XE_OA_FORMAT_PEC64u64, + XE_OA_FORMAT_PEC64u64_B8_C8, + XE_OA_FORMAT_PEC64u32, + XE_OA_FORMAT_PEC32u64_G1, + XE_OA_FORMAT_PEC32u32_G1, + XE_OA_FORMAT_PEC32u64_G2, + XE_OA_FORMAT_PEC32u32_G2, + XE_OA_FORMAT_PEC36u64_G1_32_G2_4, + XE_OA_FORMAT_PEC36u64_G1_4_G2_32, + + __XE_OA_FORMAT_MAX, +}; + +/** + * struct xe_oa_format - Format fields for supported OA formats. OA format + * properties are specified in PRM/Bspec 52198 and 60942 + */ +struct xe_oa_format { + /** @counter_select: counter select value (see Bspec 52198/60942) */ + u32 counter_select; + /** @size: record size as written by HW (multiple of 64 byte cachelines) */ + int size; + /** @type: of enum @drm_xe_oa_format_type */ + int type; + /** @header: 32 or 64 bit report headers */ + enum xe_oa_report_header header; + /** @counter_size: counter size value (see Bspec 60942) */ + u16 counter_size; + /** @bc_report: BC report value (see Bspec 60942) */ + u16 bc_report; +}; + +/** + * struct xe_oa - OA device level information + */ +struct xe_oa { + /** @xe: back pointer to xe device */ + struct xe_device *xe; + + /** @oa_formats: tracks all OA formats across platforms */ + const struct xe_oa_format *oa_formats; + + /** @format_mask: tracks valid OA formats for a platform */ + unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; +}; +#endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index c1626027dc69..7e10874bfb33 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,25 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec + * 52198/60942 + */ +enum drm_xe_oa_format_type { + /** @DRM_XE_OA_FMT_TYPE_OAG: OAG report format */ + DRM_XE_OA_FMT_TYPE_OAG, + /** @DRM_XE_OA_FMT_TYPE_OAR: OAR report format */ + DRM_XE_OA_FMT_TYPE_OAR, + /** @DRM_XE_OA_FMT_TYPE_OAM: OAM report format */ + DRM_XE_OA_FMT_TYPE_OAM, + /** @DRM_XE_OA_FMT_TYPE_OAC: OAC report format */ + DRM_XE_OA_FMT_TYPE_OAC, + /** @DRM_XE_OA_FMT_TYPE_OAM_MPEC: OAM SAMEDIA or OAM MPEC report format */ + DRM_XE_OA_FMT_TYPE_OAM_MPEC, + /** @DRM_XE_OA_FMT_TYPE_PEC: PEC report format */ + DRM_XE_OA_FMT_TYPE_PEC, +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From a9f905ae7b6f29a337dda2ad773c08b92dafe9a5 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:56 -0700 Subject: drm/xe/oa/uapi: Initialize OA units MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize OA unit data struct's for each gt during device probe. Also assign OA units for hardware engines. v2: Remove XE_OA_UNIT_OAG/XE_OA_UNIT_OAM_SAMEDIA_0 enum (Umesh) Change mtl_oa_base to 0x13000 (Umesh) v3: Switch to drmm_ functions and other cleanups (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-5-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/regs/xe_oa_regs.h | 92 +++++++++++++++++++ drivers/gpu/drm/xe/xe_gt_types.h | 4 + drivers/gpu/drm/xe/xe_hw_engine_types.h | 2 + drivers/gpu/drm/xe/xe_oa.c | 156 ++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 54 +++++++++++ include/uapi/drm/xe_drm.h | 14 +++ 6 files changed, 322 insertions(+) create mode 100644 drivers/gpu/drm/xe/regs/xe_oa_regs.h (limited to 'include') diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h new file mode 100644 index 000000000000..99bad563d51d --- /dev/null +++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2023 Intel Corporation + */ + +#ifndef __XE_OA_REGS__ +#define __XE_OA_REGS__ + +#define RPM_CONFIG1 XE_REG(0xd04) +#define GT_NOA_ENABLE REG_BIT(9) + +#define EU_PERF_CNTL0 XE_REG(0xe458) +#define EU_PERF_CNTL4 XE_REG(0xe45c) +#define EU_PERF_CNTL1 XE_REG(0xe558) +#define EU_PERF_CNTL5 XE_REG(0xe55c) +#define EU_PERF_CNTL2 XE_REG(0xe658) +#define EU_PERF_CNTL6 XE_REG(0xe65c) +#define EU_PERF_CNTL3 XE_REG(0xe758) + +#define OA_TLB_INV_CR XE_REG(0xceec) + +/* OAR unit */ +#define OAR_OACONTROL XE_REG(0x2960) +#define OAR_OACONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAR_OACONTROL_COUNTER_ENABLE REG_BIT(0) + +#define OACTXCONTROL(base) XE_REG((base) + 0x360) +#define OAR_OASTATUS XE_REG(0x2968) +#define OA_COUNTER_RESUME REG_BIT(0) + +/* OAG unit */ +#define OAG_OAGLBCTXCTRL XE_REG(0x2b28) +#define OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK REG_GENMASK(7, 2) +#define OAG_OAGLBCTXCTRL_TIMER_ENABLE REG_BIT(1) +#define OAG_OAGLBCTXCTRL_COUNTER_RESUME REG_BIT(0) + +#define OAG_OAHEADPTR XE_REG(0xdb00) +#define OAG_OAHEADPTR_MASK REG_GENMASK(31, 6) +#define OAG_OATAILPTR XE_REG(0xdb04) +#define OAG_OATAILPTR_MASK REG_GENMASK(31, 6) + +#define OAG_OABUFFER XE_REG(0xdb08) +#define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) +#define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) +#define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) +#define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) +#define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) +#define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) +#define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) +#define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) +#define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) +#define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ + +#define OAG_OACONTROL XE_REG(0xdaf4) +#define OAG_OACONTROL_OA_CCS_SELECT_MASK REG_GENMASK(18, 16) +#define OAG_OACONTROL_OA_COUNTER_SEL_MASK REG_GENMASK(4, 2) +#define OAG_OACONTROL_OA_COUNTER_ENABLE REG_BIT(0) +/* Common to all OA units */ +#define OA_OACONTROL_REPORT_BC_MASK REG_GENMASK(9, 9) +#define OA_OACONTROL_COUNTER_SIZE_MASK REG_GENMASK(8, 8) + +#define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) +#define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6) +#define OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS REG_BIT(5) +#define OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS REG_BIT(1) + +#define OAG_OASTATUS XE_REG(0xdafc) +#define OASTATUS_MMIO_TRG_Q_FULL REG_BIT(6) +#define OASTATUS_COUNTER_OVERFLOW REG_BIT(2) +#define OASTATUS_BUFFER_OVERFLOW REG_BIT(1) +#define OASTATUS_REPORT_LOST REG_BIT(0) +/* OAM unit */ +#define OAM_HEAD_POINTER_OFFSET (0x1a0) +#define OAM_TAIL_POINTER_OFFSET (0x1a4) +#define OAM_BUFFER_OFFSET (0x1a8) +#define OAM_CONTEXT_CONTROL_OFFSET (0x1bc) +#define OAM_CONTROL_OFFSET (0x194) +#define OAM_CONTROL_COUNTER_SEL_MASK REG_GENMASK(3, 1) +#define OAM_DEBUG_OFFSET (0x198) +#define OAM_STATUS_OFFSET (0x19c) +#define OAM_MMIO_TRG_OFFSET (0x1d0) + +#define OAM_HEAD_POINTER(base) XE_REG((base) + OAM_HEAD_POINTER_OFFSET) +#define OAM_TAIL_POINTER(base) XE_REG((base) + OAM_TAIL_POINTER_OFFSET) +#define OAM_BUFFER(base) XE_REG((base) + OAM_BUFFER_OFFSET) +#define OAM_CONTEXT_CONTROL(base) XE_REG((base) + OAM_CONTEXT_CONTROL_OFFSET) +#define OAM_CONTROL(base) XE_REG((base) + OAM_CONTROL_OFFSET) +#define OAM_DEBUG(base) XE_REG((base) + OAM_DEBUG_OFFSET) +#define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET) +#define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET) + +#endif diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 10a9a9529377..24bb95de920f 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -12,6 +12,7 @@ #include "xe_gt_sriov_vf_types.h" #include "xe_hw_engine_types.h" #include "xe_hw_fence_types.h" +#include "xe_oa.h" #include "xe_reg_sr_types.h" #include "xe_sa_types.h" #include "xe_uc_types.h" @@ -387,6 +388,9 @@ struct xe_gt { */ u8 instances_per_class[XE_ENGINE_CLASS_MAX]; } user_engines; + + /** @oa: oa perf counter subsystem per gt info */ + struct xe_oa_gt oa; }; #endif diff --git a/drivers/gpu/drm/xe/xe_hw_engine_types.h b/drivers/gpu/drm/xe/xe_hw_engine_types.h index 580bbd7e83b2..70e6434f150d 100644 --- a/drivers/gpu/drm/xe/xe_hw_engine_types.h +++ b/drivers/gpu/drm/xe/xe_hw_engine_types.h @@ -148,6 +148,8 @@ struct xe_hw_engine { enum xe_hw_engine_id engine_id; /** @eclass: pointer to per hw engine class interface */ struct xe_hw_engine_class_intf *eclass; + /** @oa_unit: oa unit for this hw engine */ + struct xe_oa_unit *oa_unit; }; /** diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 5c0179ff4f60..e836fafa9fb3 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,13 +3,20 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include +#include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_gt.h" +#include "xe_gt_printk.h" #include "xe_macros.h" +#include "xe_mmio.h" #include "xe_oa.h" +#define XE_OA_UNIT_INVALID U32_MAX + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -34,6 +41,142 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static u32 num_oa_units_per_gt(struct xe_gt *gt) +{ + return 1; +} + +static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) +{ + if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) { + /* + * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices + * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA + */ + xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA); + + return 0; + } + + return XE_OA_UNIT_INVALID; +} + +static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) +{ + switch (hwe->class) { + case XE_ENGINE_CLASS_RENDER: + case XE_ENGINE_CLASS_COMPUTE: + return 0; + + case XE_ENGINE_CLASS_VIDEO_DECODE: + case XE_ENGINE_CLASS_VIDEO_ENHANCE: + return __hwe_oam_unit(hwe); + + default: + return XE_OA_UNIT_INVALID; + } +} + +static struct xe_oa_regs __oam_regs(u32 base) +{ + return (struct xe_oa_regs) { + base, + OAM_HEAD_POINTER(base), + OAM_TAIL_POINTER(base), + OAM_BUFFER(base), + OAM_CONTEXT_CONTROL(base), + OAM_CONTROL(base), + OAM_DEBUG(base), + OAM_STATUS(base), + OAM_CONTROL_COUNTER_SEL_MASK, + }; +} + +static struct xe_oa_regs __oag_regs(void) +{ + return (struct xe_oa_regs) { + 0, + OAG_OAHEADPTR, + OAG_OATAILPTR, + OAG_OABUFFER, + OAG_OAGLBCTXCTRL, + OAG_OACONTROL, + OAG_OA_DEBUG, + OAG_OASTATUS, + OAG_OACONTROL_OA_COUNTER_SEL_MASK, + }; +} + +static void __xe_oa_init_oa_units(struct xe_gt *gt) +{ + const u32 mtl_oa_base[] = { 0x13000 }; + int i, num_units = gt->oa.num_oa_units; + + for (i = 0; i < num_units; i++) { + struct xe_oa_unit *u = >->oa.oa_unit[i]; + + if (gt->info.type != XE_GT_TYPE_MEDIA) { + u->regs = __oag_regs(); + u->type = DRM_XE_OA_UNIT_TYPE_OAG; + } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) { + u->regs = __oam_regs(mtl_oa_base[i]); + u->type = DRM_XE_OA_UNIT_TYPE_OAM; + } + + /* Set oa_unit_ids now to ensure ids remain contiguous */ + u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; + } +} + +static int xe_oa_init_gt(struct xe_gt *gt) +{ + u32 num_oa_units = num_oa_units_per_gt(gt); + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_oa_unit *u; + + u = drmm_kcalloc(>_to_xe(gt)->drm, num_oa_units, sizeof(*u), GFP_KERNEL); + if (!u) + return -ENOMEM; + + for_each_hw_engine(hwe, gt, id) { + u32 index = __hwe_oa_unit(hwe); + + hwe->oa_unit = NULL; + if (index < num_oa_units) { + u[index].num_engines++; + hwe->oa_unit = &u[index]; + } + } + + /* + * Fused off engines can result in oa_unit's with num_engines == 0. These units + * will appear in OA unit query, but no perf streams can be opened on them. + */ + gt->oa.num_oa_units = num_oa_units; + gt->oa.oa_unit = u; + + __xe_oa_init_oa_units(gt); + + drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); + + return 0; +} + +static int xe_oa_init_oa_units(struct xe_oa *oa) +{ + struct xe_gt *gt; + int i, ret; + + for_each_gt(gt, oa->xe, i) { + ret = xe_oa_init_gt(gt); + if (ret) + return ret; + } + + return 0; +} + static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) { __set_bit(format, oa->format_mask); @@ -87,6 +230,7 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) int xe_oa_init(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + int ret; /* Support OA only with GuC submission and Gen12+ */ if (XE_WARN_ON(!xe_device_uc_enabled(xe)) || XE_WARN_ON(GRAPHICS_VER(xe) < 12)) @@ -95,8 +239,17 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + ret = xe_oa_init_oa_units(oa); + if (ret) { + drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); + goto exit; + } + xe_oa_init_supported_formats(oa); return 0; +exit: + oa->xe = NULL; + return ret; } /** @@ -107,5 +260,8 @@ void xe_oa_fini(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; + if (!oa->xe) + return; + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 99940e25b1c6..e7b91e31f0e8 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,8 +7,12 @@ #define _XE_OA_TYPES_H_ #include +#include #include +#include +#include "regs/xe_reg_defs.h" + enum xe_oa_report_header { HDR_32_BIT = 0, HDR_64_BIT, @@ -67,6 +71,53 @@ struct xe_oa_format { u16 bc_report; }; +/** struct xe_oa_regs - Registers for each OA unit */ +struct xe_oa_regs { + u32 base; + struct xe_reg oa_head_ptr; + struct xe_reg oa_tail_ptr; + struct xe_reg oa_buffer; + struct xe_reg oa_ctx_ctrl; + struct xe_reg oa_ctrl; + struct xe_reg oa_debug; + struct xe_reg oa_status; + u32 oa_ctrl_counter_select_mask; +}; + +/** + * struct xe_oa_unit - Hardware OA unit + */ +struct xe_oa_unit { + /** @oa_unit_id: identifier for the OA unit */ + u16 oa_unit_id; + + /** @type: Type of OA unit - OAM, OAG etc. */ + enum drm_xe_oa_unit_type type; + + /** @regs: OA registers for programming the OA unit */ + struct xe_oa_regs regs; + + /** @num_engines: number of engines attached to this OA unit */ + u32 num_engines; + + /** @exclusive_stream: The stream currently using the OA unit */ + struct xe_oa_stream *exclusive_stream; +}; + +/** + * struct xe_oa_gt - OA per-gt information + */ +struct xe_oa_gt { + /** @gt_lock: lock protecting create/destroy OA streams */ + struct mutex gt_lock; + + /** @num_oa_units: number of oa units for each gt */ + u32 num_oa_units; + + /** @oa_unit: array of oa_units */ + struct xe_oa_unit *oa_unit; +}; + /** * struct xe_oa - OA device level information */ @@ -79,5 +130,8 @@ struct xe_oa { /** @format_mask: tracks valid OA formats for a platform */ unsigned long format_mask[BITS_TO_LONGS(__XE_OA_FORMAT_MAX)]; + + /** @oa_unit_ids: tracks oa unit ids assigned across gt's */ + u16 oa_unit_ids; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 7e10874bfb33..323d899a276b 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1436,6 +1436,20 @@ enum drm_xe_perf_ioctls { DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), }; +/** + * enum drm_xe_oa_unit_type - OA unit types + */ +enum drm_xe_oa_unit_type { + /** + * @DRM_XE_OA_UNIT_TYPE_OAG: OAG OA unit. OAR/OAC are considered + * sub-types of OAG. For OAR/OAC, use OAG. + */ + DRM_XE_OA_UNIT_TYPE_OAG, + + /** @DRM_XE_OA_UNIT_TYPE_OAM: OAM OA unit */ + DRM_XE_OA_UNIT_TYPE_OAM, +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 -- cgit v1.2.3 From cdf02fe1a94a768cbcd20f5c4e1a1d805f4a06c0 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:57 -0700 Subject: drm/xe/oa/uapi: Add/remove OA config perf ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce add/remove config perf ops for OA. OA configurations consist of a set of event/counter select register address/value pairs. The add_config perf op validates and stores such configurations and also exposes them in the metrics sysfs. These configurations will be programmed to OA unit HW when an OA stream using a configuration is opened. The OA stream can also switch to other stored configurations. v2: Start config id's from 1 and other minor review comments (Umesh) v3: Add 32 bit build v4: Add kernel doc for non-static functions (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-6-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_device.c | 4 + drivers/gpu/drm/xe/xe_oa.c | 434 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 6 + drivers/gpu/drm/xe/xe_oa_types.h | 10 + drivers/gpu/drm/xe/xe_perf.c | 16 ++ include/uapi/drm/xe_drm.h | 25 +++ 6 files changed, 495 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 1195c64a715a..31b549f5f03a 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -670,6 +670,8 @@ int xe_device_probe(struct xe_device *xe) xe_display_register(xe); + xe_oa_register(xe); + xe_debugfs_register(xe); xe_hwmon_register(xe); @@ -710,6 +712,8 @@ void xe_device_remove(struct xe_device *xe) struct xe_gt *gt; u8 id; + xe_oa_unregister(xe); + xe_device_remove_display(xe); xe_display_fini(xe); diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index e836fafa9fb3..4122785735d4 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -14,9 +14,32 @@ #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" +#include "xe_perf.h" #define XE_OA_UNIT_INVALID U32_MAX +struct xe_oa_reg { + struct xe_reg addr; + u32 value; +}; + +struct xe_oa_config { + struct xe_oa *oa; + + char uuid[UUID_STRING_LEN + 1]; + int id; + + const struct xe_oa_reg *regs; + u32 regs_len; + + struct attribute_group sysfs_metric; + struct attribute *attrs[2]; + struct kobj_attribute sysfs_metric_id; + + struct kref ref; + struct rcu_head rcu; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -41,6 +64,405 @@ static const struct xe_oa_format oa_formats[] = { [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, }; +static void xe_oa_config_release(struct kref *ref) +{ + struct xe_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + + kfree(oa_config->regs); + + kfree_rcu(oa_config, rcu); +} + +static void xe_oa_config_put(struct xe_oa_config *oa_config) +{ + if (!oa_config) + return; + + kref_put(&oa_config->ref, xe_oa_config_release); +} + +static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) +{ + static const struct xe_reg flex_eu_regs[] = { + EU_PERF_CNTL0, + EU_PERF_CNTL1, + EU_PERF_CNTL2, + EU_PERF_CNTL3, + EU_PERF_CNTL4, + EU_PERF_CNTL5, + EU_PERF_CNTL6, + }; + int i; + + for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { + if (flex_eu_regs[i].addr == addr) + return true; + } + return false; +} + +static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) +{ + while (table->start && table->end) { + if (addr >= table->start && addr <= table->end) + return true; + + table++; + } + + return false; +} + +static const struct xe_mmio_range xehp_oa_b_counters[] = { + { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ + { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ + {} +}; + +static const struct xe_mmio_range gen12_oa_b_counters[] = { + { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ + { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ + { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ + { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ + { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ + { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ + { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ + {} +}; + +static const struct xe_mmio_range mtl_oam_b_counters[] = { + { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ + { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ + { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ + { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ + {} +}; + +static const struct xe_mmio_range xe2_oa_b_counters[] = { + { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ + { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ + { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ + {}, +}; + +static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_reg_in_range_table(addr, xehp_oa_b_counters) || + xe_oa_reg_in_range_table(addr, gen12_oa_b_counters) || + xe_oa_reg_in_range_table(addr, mtl_oam_b_counters) || + (GRAPHICS_VER(oa->xe) >= 20 && + xe_oa_reg_in_range_table(addr, xe2_oa_b_counters)); +} + +static const struct xe_mmio_range mtl_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ + {} +}; + +static const struct xe_mmio_range gen12_oa_mux_regs[] = { + { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ + { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ + { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ + { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ + { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ + {} +}; + +static const struct xe_mmio_range xe2_oa_mux_regs[] = { + { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ + { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ + { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ + { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ + { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ + {}, +}; + +static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) +{ + if (GRAPHICS_VER(oa->xe) >= 20) + return xe_oa_reg_in_range_table(addr, xe2_oa_mux_regs); + else if (GRAPHICS_VERx100(oa->xe) >= 1270) + return xe_oa_reg_in_range_table(addr, mtl_oa_mux_regs); + else + return xe_oa_reg_in_range_table(addr, gen12_oa_mux_regs); +} + +static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) +{ + return xe_oa_is_valid_flex_addr(oa, addr) || + xe_oa_is_valid_b_counter_addr(oa, addr) || + xe_oa_is_valid_mux_addr(oa, addr); +} + +static struct xe_oa_reg * +xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), + u32 __user *regs, u32 n_regs) +{ + struct xe_oa_reg *oa_regs; + int err; + u32 i; + + oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); + if (!oa_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < n_regs; i++) { + u32 addr, value; + + err = get_user(addr, regs); + if (err) + goto addr_err; + + if (!is_valid(oa, addr)) { + drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n", addr); + err = -EINVAL; + goto addr_err; + } + + err = get_user(value, regs + 1); + if (err) + goto addr_err; + + oa_regs[i].addr = XE_REG(addr); + oa_regs[i].value = value; + + regs += 2; + } + + return oa_regs; + +addr_err: + kfree(oa_regs); + return ERR_PTR(err); +} + +static ssize_t show_dynamic_id(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct xe_oa_config *oa_config = + container_of(attr, typeof(*oa_config), sysfs_metric_id); + + return sysfs_emit(buf, "%d\n", oa_config->id); +} + +static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, + struct xe_oa_config *oa_config) +{ + sysfs_attr_init(&oa_config->sysfs_metric_id.attr); + oa_config->sysfs_metric_id.attr.name = "id"; + oa_config->sysfs_metric_id.attr.mode = 0444; + oa_config->sysfs_metric_id.show = show_dynamic_id; + oa_config->sysfs_metric_id.store = NULL; + + oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; + oa_config->attrs[1] = NULL; + + oa_config->sysfs_metric.name = oa_config->uuid; + oa_config->sysfs_metric.attrs = oa_config->attrs; + + return sysfs_create_group(oa->metrics_kobj, &oa_config->sysfs_metric); +} + +/** + * xe_oa_add_config_ioctl - Adds one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions adds an OA config to the set of OA configs maintained in + * the kernel. The config determines which OA metrics are collected for an + * OA stream. + */ +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct drm_xe_oa_config param; + struct drm_xe_oa_config *arg = ¶m; + struct xe_oa_config *oa_config, *tmp; + struct xe_oa_reg *regs; + int err, id; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n"); + return -EACCES; + } + + err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, arg->extensions) || + XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || + XE_IOCTL_DBG(oa->xe, !arg->n_regs)) + return -EINVAL; + + oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); + if (!oa_config) + return -ENOMEM; + + oa_config->oa = oa; + kref_init(&oa_config->ref); + + if (!uuid_is_valid(arg->uuid)) { + drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n"); + err = -EINVAL; + goto reg_err; + } + + /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ + memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); + + oa_config->regs_len = arg->n_regs; + regs = xe_oa_alloc_regs(oa, xe_oa_is_valid_config_reg_addr, + u64_to_user_ptr(arg->regs_ptr), + arg->n_regs); + if (IS_ERR(regs)) { + drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n"); + err = PTR_ERR(regs); + goto reg_err; + } + oa_config->regs = regs; + + err = mutex_lock_interruptible(&oa->metrics_lock); + if (err) + goto reg_err; + + /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ + idr_for_each_entry(&oa->metrics_idr, tmp, id) { + if (!strcmp(tmp->uuid, oa_config->uuid)) { + drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n"); + err = -EADDRINUSE; + goto sysfs_err; + } + } + + err = create_dynamic_oa_sysfs_entry(oa, oa_config); + if (err) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + goto sysfs_err; + } + + oa_config->id = idr_alloc(&oa->metrics_idr, oa_config, 1, 0, GFP_KERNEL); + if (oa_config->id < 0) { + drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n"); + err = oa_config->id; + goto sysfs_err; + } + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id); + + return oa_config->id; + +sysfs_err: + mutex_unlock(&oa->metrics_lock); +reg_err: + xe_oa_config_put(oa_config); + drm_dbg(&oa->xe->drm, "Failed to add new OA config\n"); + return err; +} + +/** + * xe_oa_remove_config_ioctl - Removes one OA config + * @dev: @drm_device + * @data: pointer to struct @drm_xe_perf_param + * @file: @drm_file + */ +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_oa_config *oa_config; + u64 arg, *ptr = u64_to_user_ptr(data); + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + if (xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n"); + return -EACCES; + } + + ret = get_user(arg, ptr); + if (XE_IOCTL_DBG(oa->xe, ret)) + return ret; + + ret = mutex_lock_interruptible(&oa->metrics_lock); + if (ret) + return ret; + + oa_config = idr_find(&oa->metrics_idr, arg); + if (!oa_config) { + drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n"); + ret = -ENOENT; + goto err_unlock; + } + + WARN_ON(arg != oa_config->id); + + sysfs_remove_group(oa->metrics_kobj, &oa_config->sysfs_metric); + idr_remove(&oa->metrics_idr, arg); + + mutex_unlock(&oa->metrics_lock); + + drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + + xe_oa_config_put(oa_config); + + return 0; + +err_unlock: + mutex_unlock(&oa->metrics_lock); + return ret; +} + +/** + * xe_oa_register - Xe OA registration + * @xe: @xe_device + * + * Exposes the metrics sysfs directory upon completion of module initialization + */ +void xe_oa_register(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + oa->metrics_kobj = kobject_create_and_add("metrics", + &xe->drm.primary->kdev->kobj); +} + +/** + * xe_oa_unregister - Xe OA de-registration + * @xe: @xe_device + */ +void xe_oa_unregister(struct xe_device *xe) +{ + struct xe_oa *oa = &xe->oa; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + static u32 num_oa_units_per_gt(struct xe_gt *gt) { return 1; @@ -239,6 +661,9 @@ int xe_oa_init(struct xe_device *xe) oa->xe = xe; oa->oa_formats = oa_formats; + drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); + idr_init_base(&oa->metrics_idr, 1); + ret = xe_oa_init_oa_units(oa); if (ret) { drm_err(&xe->drm, "OA initialization failed (%pe)\n", ERR_PTR(ret)); @@ -252,6 +677,12 @@ exit: return ret; } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + return 0; +} + /** * xe_oa_fini - OA de-initialization during device remove * @xe: @xe_device @@ -263,5 +694,8 @@ void xe_oa_fini(struct xe_device *xe) if (!oa->xe) return; + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + oa->xe = NULL; } diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 2647c1947746..5ccc772e047a 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -8,9 +8,15 @@ #include "xe_oa_types.h" +struct drm_device; +struct drm_file; struct xe_device; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); +void xe_oa_register(struct xe_device *xe); +void xe_oa_unregister(struct xe_device *xe); +int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); #endif diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index e7b91e31f0e8..f8a45015cf49 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -7,6 +7,7 @@ #define _XE_OA_TYPES_H_ #include +#include #include #include @@ -125,6 +126,15 @@ struct xe_oa { /** @xe: back pointer to xe device */ struct xe_device *xe; + /** @metrics_kobj: kobj for metrics sysfs */ + struct kobject *metrics_kobj; + + /** @metrics_lock: lock protecting add/remove configs */ + struct mutex metrics_lock; + + /** @metrics_idr: List of dynamic configurations (struct xe_oa_config) */ + struct idr metrics_idr; + /** @oa_formats: tracks all OA formats across platforms */ const struct xe_oa_format *oa_formats; diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index f619cf50b453..ca01042d75b1 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -8,11 +8,25 @@ #include +#include "xe_oa.h" #include "xe_perf.h" u32 xe_perf_stream_paranoid = true; static struct ctl_table_header *sysctl_header; +static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, + struct drm_file *file) +{ + switch (arg->perf_op) { + case DRM_XE_PERF_OP_ADD_CONFIG: + return xe_oa_add_config_ioctl(dev, arg->param, file); + case DRM_XE_PERF_OP_REMOVE_CONFIG: + return xe_oa_remove_config_ioctl(dev, arg->param, file); + default: + return -EINVAL; + } +} + /** * xe_perf_ioctl - The top level perf layer ioctl * @dev: @drm_device @@ -32,6 +46,8 @@ int xe_perf_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return -EINVAL; switch (arg->perf_type) { + case DRM_XE_PERF_TYPE_OA: + return xe_oa_ioctl(dev, arg, file); default: return -EINVAL; } diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 323d899a276b..fd9a4bd9e3d4 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1378,6 +1378,7 @@ struct drm_xe_wait_user_fence { * enum drm_xe_perf_type - Perf stream types */ enum drm_xe_perf_type { + DRM_XE_PERF_TYPE_OA, __DRM_XE_PERF_TYPE_MAX, /* non-ABI */ }; @@ -1469,6 +1470,30 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * struct drm_xe_oa_config - OA metric configuration + * + * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * particular config can be specified when opening an OA stream using + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. + */ +struct drm_xe_oa_config { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @uuid: String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x" */ + char uuid[36]; + + /** @n_regs: Number of regs in @regs_ptr */ + __u32 n_regs; + + /** + * @regs_ptr: Pointer to (register address, value) pairs for OA config + * registers. Expected length of buffer is: (2 * sizeof(u32) * @n_regs). + */ + __u64 regs_ptr; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From b6fd51c6211910b1db072a3fa2a17ba85cb3dd51 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:45:58 -0700 Subject: drm/xe/oa/uapi: Define and parse OA stream properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Properties for OA streams are specified by user space, when the stream is opened, as a chain of drm_xe_ext_set_property struct's. Parse and validate these stream properties. v2: Remove struct drm_xe_oa_open_param (Harish Chegondi) Drop DRM_XE_OA_PROPERTY_POLL_OA_PERIOD_US (Umesh) Eliminate comparison with xe_oa_max_sample_rate (Umesh) Drop 'struct drm_xe_oa_record_header' (Umesh) v3: s/DRM_XE_OA_PROPERTY_OA_EXPONENT/ \ DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/ (Jose) v4: Fix 32 bit build v5: Add non-static function kernel doc (Michal) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-7-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 364 +++++++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa.h | 5 + drivers/gpu/drm/xe/xe_perf.c | 2 + include/uapi/drm/xe_drm.h | 72 +++++++++ 4 files changed, 443 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 4122785735d4..9b23eadf56cd 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,18 +3,23 @@ * Copyright © 2023-2024 Intel Corporation */ +#include + #include #include +#include "regs/xe_gt_regs.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_device.h" +#include "xe_exec_queue.h" #include "xe_gt.h" #include "xe_gt_printk.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" #include "xe_perf.h" +#include "xe_pm.h" #define XE_OA_UNIT_INVALID U32_MAX @@ -40,6 +45,19 @@ struct xe_oa_config { struct rcu_head rcu; }; +struct xe_oa_open_param { + u32 oa_unit_id; + bool sample; + u32 metric_set; + enum xe_oa_format_name oa_format; + int period_exponent; + bool disabled; + int exec_queue_id; + int engine_instance; + struct xe_exec_queue *exec_q; + struct xe_hw_engine *hwe; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -82,6 +100,352 @@ static void xe_oa_config_put(struct xe_oa_config *oa_config) kref_put(&oa_config->ref, xe_oa_config_release); } +/** + * xe_oa_timestamp_frequency - Return OA timestamp frequency + * @gt: @xe_gt + * + * OA timestamp frequency = CS timestamp frequency in most platforms. On some + * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such + * cases, return the adjusted CS timestamp frequency to the user. + */ +u32 xe_oa_timestamp_frequency(struct xe_gt *gt) +{ + u32 reg, shift; + + /* + * Wa_18013179988:dg2 + * Wa_14015568240:pvc + * Wa_14015846243:mtl + */ + switch (gt_to_xe(gt)->info.platform) { + case XE_DG2: + case XE_PVC: + case XE_METEORLAKE: + xe_pm_runtime_get(gt_to_xe(gt)); + reg = xe_mmio_read32(gt, RPM_CONFIG0); + xe_pm_runtime_put(gt_to_xe(gt)); + + shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); + return gt->info.reference_clock << (3 - shift); + + default: + return gt->info.reference_clock; + } +} + +static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) +{ + u64 nom = (2ULL << exponent) * NSEC_PER_SEC; + u32 den = xe_oa_timestamp_frequency(gt); + + return div_u64(nom + den - 1, den); +} + +static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) +{ + switch (hwe->oa_unit->type) { + case DRM_XE_OA_UNIT_TYPE_OAG: + return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || + type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; + case DRM_XE_OA_UNIT_TYPE_OAM: + return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; + default: + return false; + } +} + +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +/** + * xe_oa_unit_id - Return OA unit ID for a hardware engine + * @hwe: @xe_hw_engine + * + * Return OA unit ID for a hardware engine when available + */ +u16 xe_oa_unit_id(struct xe_hw_engine *hwe) +{ + return hwe->oa_unit && hwe->oa_unit->num_engines ? + hwe->oa_unit->oa_unit_id : U16_MAX; +} + +static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + struct xe_gt *gt; + int i, ret = 0; + + if (param->exec_q) { + /* When we have an exec_q, get hwe from the exec_q */ + param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class, + param->engine_instance, true); + } else { + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + + /* Else just get the first hwe attached to the oa unit */ + for_each_gt(gt, oa->xe, i) { + for_each_hw_engine(hwe, gt, id) { + if (xe_oa_unit_id(hwe) == param->oa_unit_id) { + param->hwe = hwe; + goto out; + } + } + } + } +out: + if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) { + drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n", + param->exec_q ? param->exec_q->class : -1, + param->engine_instance, param->oa_unit_id); + ret = -EINVAL; + } + + return ret; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); + return xe_oa_set_property_funcs[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, + struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, + struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= DRM_XE_OA_PROPERTY_MAX)) + return -E2BIG; + + err = __copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); + + return 0; +} + +/** + * xe_oa_stream_open_ioctl - Opens an OA stream + * @dev: @drm_device + * @data: pointer to struct @drm_xe_oa_config + * @file: @drm_file + * + * The functions opens an OA stream. An OA stream, opened with specified + * properties, enables perf counter samples to be collected, either + * periodically (time based sampling), or on request (using perf queries) + */ +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) +{ + struct xe_oa *oa = &to_xe_device(dev)->oa; + struct xe_file *xef = to_xe_file(file); + struct xe_oa_open_param param = {}; + const struct xe_oa_format *f; + bool privileged_op = true; + int ret; + + if (!oa->xe) { + drm_dbg(&oa->xe->drm, "xe oa interface not available for this system\n"); + return -ENODEV; + } + + ret = xe_oa_user_extensions(oa, data, 0, ¶m); + if (ret) + return ret; + + if (param.exec_queue_id > 0) { + param.exec_q = xe_exec_queue_lookup(xef, param.exec_queue_id); + if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) + return -ENOENT; + } + + /* + * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, + * without global stream access, can be an unprivileged operation + */ + if (param.exec_q && !param.sample) + privileged_op = false; + + if (privileged_op && xe_perf_stream_paranoid && !perfmon_capable()) { + drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe perf stream\n"); + ret = -EACCES; + goto err_exec_q; + } + + if (!param.exec_q && !param.sample) { + drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n"); + ret = -EINVAL; + goto err_exec_q; + } + + ret = xe_oa_assign_hwe(oa, ¶m); + if (ret) + goto err_exec_q; + + f = &oa->oa_formats[param.oa_format]; + if (!param.oa_format || !f->size || + !engine_supports_oa_format(param.hwe, f->type)) { + drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n", + param.oa_format, f->type, f->size, param.hwe->class); + ret = -EINVAL; + goto err_exec_q; + } + + if (param.period_exponent > 0) { + u64 oa_period, oa_freq_hz; + + /* Requesting samples from OAG buffer is a privileged operation */ + if (!param.sample) { + drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n"); + ret = -EINVAL; + goto err_exec_q; + } + oa_period = oa_exponent_to_ns(param.hwe->gt, param.period_exponent); + oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); + drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); + } +err_exec_q: + if (ret < 0 && param.exec_q) + xe_exec_queue_put(param.exec_q); + return ret; +} + static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) { static const struct xe_reg flex_eu_regs[] = { diff --git a/drivers/gpu/drm/xe/xe_oa.h b/drivers/gpu/drm/xe/xe_oa.h index 5ccc772e047a..87a38820c317 100644 --- a/drivers/gpu/drm/xe/xe_oa.h +++ b/drivers/gpu/drm/xe/xe_oa.h @@ -11,12 +11,17 @@ struct drm_device; struct drm_file; struct xe_device; +struct xe_gt; +struct xe_hw_engine; int xe_oa_init(struct xe_device *xe); void xe_oa_fini(struct xe_device *xe); void xe_oa_register(struct xe_device *xe); void xe_oa_unregister(struct xe_device *xe); +int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file); +u32 xe_oa_timestamp_frequency(struct xe_gt *gt); +u16 xe_oa_unit_id(struct xe_hw_engine *hwe); #endif diff --git a/drivers/gpu/drm/xe/xe_perf.c b/drivers/gpu/drm/xe/xe_perf.c index ca01042d75b1..d6cd74cadf34 100644 --- a/drivers/gpu/drm/xe/xe_perf.c +++ b/drivers/gpu/drm/xe/xe_perf.c @@ -18,6 +18,8 @@ static int xe_oa_ioctl(struct drm_device *dev, struct drm_xe_perf_param *arg, struct drm_file *file) { switch (arg->perf_op) { + case DRM_XE_PERF_OP_STREAM_OPEN: + return xe_oa_stream_open_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_ADD_CONFIG: return xe_oa_add_config_ioctl(dev, arg->param, file); case DRM_XE_PERF_OP_REMOVE_CONFIG: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index fd9a4bd9e3d4..307409f968e2 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1470,6 +1470,78 @@ enum drm_xe_oa_format_type { DRM_XE_OA_FMT_TYPE_PEC, }; +/** + * enum drm_xe_oa_property_id - OA stream property id's + * + * Stream params are specified as a chain of @drm_xe_ext_set_property + * struct's, with @property values from enum @drm_xe_oa_property_id and + * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. + * @param field in struct @drm_xe_perf_param points to the first + * @drm_xe_ext_set_property struct. + */ +enum drm_xe_oa_property_id { +#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 + /** + * @DRM_XE_OA_PROPERTY_OA_UNIT_ID: ID of the OA unit on which to open + * the OA stream, see @oa_unit_id in 'struct + * drm_xe_query_oa_units'. Defaults to 0 if not provided. + */ + DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1, + + /** + * @DRM_XE_OA_PROPERTY_SAMPLE_OA: A value of 1 requests inclusion of raw + * OA unit reports or stream samples in a global buffer attached to an + * OA unit. + */ + DRM_XE_OA_PROPERTY_SAMPLE_OA, + + /** + * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA + * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + */ + DRM_XE_OA_PROPERTY_OA_METRIC_SET, + + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + DRM_XE_OA_PROPERTY_OA_FORMAT, + /* + * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, + * in terms of the following quantities: a. enum @drm_xe_oa_format_type + * b. Counter select c. Counter size and d. BC report. Also refer to the + * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. + */ +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) + + /** + * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit + * sampling with sampling frequency proportional to 2^(period_exponent + 1) + */ + DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT, + + /** + * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA + * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + */ + DRM_XE_OA_PROPERTY_OA_DISABLED, + + /** + * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific + * @exec_queue_id. Perf queries can be executed on this exec queue. + */ + DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, + + /** + * @DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE: Optional engine instance to + * pass along with @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID or will default to 0. + */ + DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE, + + /** @DRM_XE_OA_PROPERTY_MAX: non-ABI */ + DRM_XE_OA_PROPERTY_MAX +}; + /** * struct drm_xe_oa_config - OA metric configuration * -- cgit v1.2.3 From e936f885f1e96f59d9d05fb6cb5a02b9b9b88a05 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:00 -0700 Subject: drm/xe/oa/uapi: Expose OA stream fd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The OA stream open perf op returns an fd with its own file_operations for the newly initialized OA stream. These file_operations allow userspace to enable or disable the stream, as well as apply a different metric configuration for the OA stream. Userspace can also poll for data availability. OA stream initialization is completed in this commit by enabling the OA stream. When sampling is enabled this starts a hrtimer which periodically checks for data availablility. v2: Use stream properties for stream reconfiguration with DRM_XE_PERF_IOCTL_CONFIG v3: Hold runtime_pm reference across oa buffer alloc/free v4: Fix 32 bit build Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-9-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 380 +++++++++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 4 + 2 files changed, 384 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index c2fd2d22677f..a71111859190 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -3,8 +3,10 @@ * Copyright © 2023-2024 Intel Corporation */ +#include #include #include +#include #include #include @@ -29,6 +31,7 @@ #include "xe_pm.h" #include "xe_sched_job.h" +#define OA_TAKEN(tail, head) (((tail) - (head)) & (XE_OA_BUFFER_SIZE - 1)) #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX @@ -147,6 +150,205 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) return &stream->hwe->oa_unit->regs; } +static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) +{ + return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + OAG_OATAILPTR_MASK; +} + +#define oa_report_header_64bit(__s) \ + ((__s)->oa_buffer.format->header == HDR_64_BIT) + +static u64 oa_report_id(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; +} + +static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) +{ + return oa_report_header_64bit(stream) ? + *((u64 *)report + 1) : + *((u32 *)report + 1); +} + +static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + int report_size = stream->oa_buffer.format->size; + u32 tail, hw_tail; + unsigned long flags; + bool pollin; + u32 partial_report_size; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + hw_tail = xe_oa_hw_tail_read(stream); + hw_tail -= gtt_offset; + + /* + * The tail pointer increases in 64 byte (cacheline size), not in report_size + * increments. Also report size may not be a power of 2. Compute potential + * partially landed report in OA buffer. + */ + partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail); + partial_report_size %= report_size; + + /* Subtract partial amount off the tail */ + hw_tail = OA_TAKEN(hw_tail, partial_report_size); + + tail = hw_tail; + + /* + * Walk the stream backward until we find a report with report id and timestamp + * not 0. We can't tell whether a report has fully landed in memory before the + * report id and timestamp of the following report have landed. + * + * This is assuming that the writes of the OA unit land in memory in the order + * they were written. If not : (╯°□°)╯︵ ┻━┻ + */ + while (OA_TAKEN(tail, stream->oa_buffer.tail) >= report_size) { + void *report = stream->oa_buffer.vaddr + tail; + + if (oa_report_id(stream, report) || oa_timestamp(stream, report)) + break; + + tail = OA_TAKEN(tail, report_size); + } + + if (OA_TAKEN(hw_tail, tail) > report_size) + drm_dbg(&stream->oa->xe->drm, + "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n", + stream->oa_buffer.head, tail, hw_tail); + + stream->oa_buffer.tail = tail; + + pollin = OA_TAKEN(stream->oa_buffer.tail, + stream->oa_buffer.head) >= report_size; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + return pollin; +} + +static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) +{ + struct xe_oa_stream *stream = + container_of(hrtimer, typeof(*stream), poll_check_timer); + + if (xe_oa_buffer_check_unlocked(stream)) { + stream->pollin = true; + wake_up(&stream->poll_wq); + } + + hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); + + return HRTIMER_RESTART; +} + +static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) +{ + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + unsigned long flags; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + gtt_offset & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = 0; + + /* + * PRM says: "This MMIO must be set before the OATAILPTR register and after the + * OAHEADPTR register. This is to enable proper functionality of the overflow bit". + */ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + gtt_offset & OAG_OATAILPTR_MASK); + + /* Mark that we need updated tail pointer to read from */ + stream->oa_buffer.tail = 0; + + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ + memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size); +} + +static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) +{ + return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | + REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | + REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); +} + +static void xe_oa_enable(struct xe_oa_stream *stream) +{ + const struct xe_oa_format *format = stream->oa_buffer.format; + const struct xe_oa_regs *regs; + u32 val; + + /* + * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA + * buffer must be correctly initialized + */ + xe_oa_init_oa_buffer(stream); + + regs = __oa_regs(stream); + val = __format_to_oactrl(format, regs->oa_ctrl_counter_select_mask) | + OAG_OACONTROL_OA_COUNTER_ENABLE; + + xe_mmio_write32(stream->gt, regs->oa_ctrl, val); +} + +static void xe_oa_disable(struct xe_oa_stream *stream) +{ + xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); + if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA to be disabled timed out\n"); + + if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { + /* <= XE_METEORLAKE except XE_PVC */ + xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + drm_err(&stream->oa->xe->drm, + "wait for OA tlb invalidate timed out\n"); + } +} + +static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, + struct file *file, poll_table *wait) +{ + __poll_t events = 0; + + poll_wait(file, &stream->poll_wq, wait); + + /* + * We don't explicitly check whether there's something to read here since this + * path may be hot depending on what else userspace is polling, or on the timeout + * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there + * are samples to read + */ + if (stream->pollin) + events |= EPOLLIN; + + return events; +} + +static __poll_t xe_oa_poll(struct file *file, poll_table *wait) +{ + struct xe_oa_stream *stream = file->private_data; + __poll_t ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_poll_locked(stream, file, wait); + mutex_unlock(&stream->stream_lock); + + return ret; +} + static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) { struct xe_sched_job *job; @@ -246,6 +448,27 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); } +static void xe_oa_stream_destroy(struct xe_oa_stream *stream) +{ + struct xe_oa_unit *u = stream->hwe->oa_unit; + struct xe_gt *gt = stream->hwe->gt; + + if (WARN_ON(stream != u->exclusive_stream)) + return; + + WRITE_ONCE(u->exclusive_stream, NULL); + + xe_oa_disable_metric_set(stream); + xe_exec_queue_put(stream->k_exec_q); + + xe_oa_free_oa_buffer(stream); + + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_pm_runtime_put(stream->oa->xe); + + xe_oa_free_configs(stream); +} + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) { struct xe_bo *bo; @@ -383,6 +606,148 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream); } +static void xe_oa_stream_enable(struct xe_oa_stream *stream) +{ + stream->pollin = false; + + xe_oa_enable(stream); + + if (stream->sample) + hrtimer_start(&stream->poll_check_timer, + ns_to_ktime(stream->poll_period_ns), + HRTIMER_MODE_REL_PINNED); +} + +static void xe_oa_stream_disable(struct xe_oa_stream *stream) +{ + xe_oa_disable(stream); + + if (stream->sample) + hrtimer_cancel(&stream->poll_check_timer); +} + +static void xe_oa_enable_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + return; + + stream->enabled = true; + + xe_oa_stream_enable(stream); +} + +static void xe_oa_disable_locked(struct xe_oa_stream *stream) +{ + if (!stream->enabled) + return; + + stream->enabled = false; + + xe_oa_stream_disable(stream); +} + +static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) +{ + struct drm_xe_ext_set_property ext; + long ret = stream->oa_config->id; + struct xe_oa_config *config; + int err; + + err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); + if (XE_IOCTL_DBG(stream->oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || + XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || + XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) + return -EINVAL; + + config = xe_oa_get_oa_config(stream->oa, ext.value); + if (!config) + return -ENODEV; + + if (config != stream->oa_config) { + err = xe_oa_emit_oa_config(stream); + if (!err) + config = xchg(&stream->oa_config, config); + else + ret = err; + } + + xe_oa_config_put(config); + + return ret; +} + +static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, + unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case DRM_XE_PERF_IOCTL_ENABLE: + xe_oa_enable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_DISABLE: + xe_oa_disable_locked(stream); + return 0; + case DRM_XE_PERF_IOCTL_CONFIG: + return xe_oa_config_locked(stream, arg); + } + + return -EINVAL; +} + +static long xe_oa_ioctl(struct file *file, + unsigned int cmd, + unsigned long arg) +{ + struct xe_oa_stream *stream = file->private_data; + long ret; + + mutex_lock(&stream->stream_lock); + ret = xe_oa_ioctl_locked(stream, cmd, arg); + mutex_unlock(&stream->stream_lock); + + return ret; +} + +static void xe_oa_destroy_locked(struct xe_oa_stream *stream) +{ + if (stream->enabled) + xe_oa_disable_locked(stream); + + xe_oa_stream_destroy(stream); + + if (stream->exec_q) + xe_exec_queue_put(stream->exec_q); + + kfree(stream); +} + +static int xe_oa_release(struct inode *inode, struct file *file) +{ + struct xe_oa_stream *stream = file->private_data; + struct xe_gt *gt = stream->gt; + + mutex_lock(>->oa.gt_lock); + xe_oa_destroy_locked(stream); + mutex_unlock(>->oa.gt_lock); + + /* Release the reference the perf stream kept on the driver */ + drm_dev_put(>_to_xe(gt)->drm); + + return 0; +} + +static const struct file_operations xe_oa_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .release = xe_oa_release, + .poll = xe_oa_poll, + .unlocked_ioctl = xe_oa_ioctl, +}; + static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { @@ -436,6 +801,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, WRITE_ONCE(u->exclusive_stream, stream); + hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; + init_waitqueue_head(&stream->poll_wq); + spin_lock_init(&stream->oa_buffer.ptr_lock); mutex_init(&stream->stream_lock); @@ -479,10 +848,21 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, if (ret) goto err_free; + stream_fd = anon_inode_getfd("[xe_oa]", &xe_oa_fops, stream, 0); + if (stream_fd < 0) { + ret = stream_fd; + goto err_destroy; + } + + if (!param->disabled) + xe_oa_enable_locked(stream); + /* Hold a reference on the drm device till stream_fd is released */ drm_dev_get(&stream->oa->xe->drm); return stream_fd; +err_destroy: + xe_oa_stream_destroy(stream); err_free: kfree(stream); exit: diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 307409f968e2..1e09f786b3e6 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1478,6 +1478,10 @@ enum drm_xe_oa_format_type { * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. * @param field in struct @drm_xe_perf_param points to the first * @drm_xe_ext_set_property struct. + * + * Exactly the same mechanism is also used for stream reconfiguration using + * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of + * properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 -- cgit v1.2.3 From efb315d0a013cdc8b1e49f5c07b1a2972bc624d4 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:01 -0700 Subject: drm/xe/oa/uapi: Read file_operation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the OA stream read file_operation. Both blocking and non-blocking reads are supported. As part of read system call, the read copies OA perf data from the OA buffer to the user buffer, after appending packet headers for status and data packets. v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh) v3: Introduce 'struct drm_xe_oa_stream_status' v4: Define oa_status register bitfields (Umesh) v5: Add extensions to 'struct drm_xe_oa_stream_status' v6: Minor cleanup, eliminate report32 variable v7: Use -EIO to signal to userspace to read OASTATUS using DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to return -EINVAL Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh) rmw oa_status bits (Umesh) Acked-by: Rodrigo Vivi Acked-by: José Roberto de Souza Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 201 +++++++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_oa_types.h | 3 + include/uapi/drm/xe_drm.h | 20 ++++ 3 files changed, 224 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index a71111859190..86d56b080eff 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -164,6 +164,14 @@ static u64 oa_report_id(struct xe_oa_stream *stream, void *report) return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; } +static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)report = 0; + else + *report = 0; +} + static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) { return oa_report_header_64bit(stream) ? @@ -171,6 +179,14 @@ static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) *((u32 *)report + 1); } +static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) +{ + if (oa_report_header_64bit(stream)) + *(u64 *)&report[2] = 0; + else + report[1] = 0; +} + static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -245,6 +261,95 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) return HRTIMER_RESTART; } +static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset, const u8 *report) +{ + int report_size = stream->oa_buffer.format->size; + int report_size_partial; + u8 *oa_buf_end; + + if ((count - *offset) < report_size) + return -ENOSPC; + + buf += *offset; + + oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + report_size_partial = oa_buf_end - report; + + if (report_size_partial < report_size) { + if (copy_to_user(buf, report, report_size_partial)) + return -EFAULT; + buf += report_size_partial; + + if (copy_to_user(buf, stream->oa_buffer.vaddr, + report_size - report_size_partial)) + return -EFAULT; + } else if (copy_to_user(buf, report, report_size)) { + return -EFAULT; + } + + *offset += report_size; + + return 0; +} + +static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + int report_size = stream->oa_buffer.format->size; + u8 *oa_buf_base = stream->oa_buffer.vaddr; + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 mask = (XE_OA_BUFFER_SIZE - 1); + size_t start_offset = *offset; + unsigned long flags; + u32 head, tail; + int ret = 0; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + head = stream->oa_buffer.head; + tail = stream->oa_buffer.tail; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + + xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); + + for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) { + u8 *report = oa_buf_base + head; + + ret = xe_oa_append_report(stream, buf, count, offset, report); + if (ret) + break; + + if (is_power_of_2(report_size)) { + /* Clear out report id and timestamp to detect unlanded reports */ + oa_report_id_clear(stream, (void *)report); + oa_timestamp_clear(stream, (void *)report); + } else { + u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + u32 part = oa_buf_end - report; + + /* Zero out the entire report */ + if (report_size <= part) { + memset(report, 0, report_size); + } else { + memset(report, 0, part); + memset(oa_buf_base, 0, report_size - part); + } + } + } + + if (start_offset != *offset) { + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; + + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); + xe_mmio_write32(stream->gt, oaheadptr, + (head + gtt_offset) & OAG_OAHEADPTR_MASK); + stream->oa_buffer.head = head; + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); + } + + return ret; +} + static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); @@ -318,6 +423,78 @@ static void xe_oa_disable(struct xe_oa_stream *stream) } } +static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) +{ + /* We might wait indefinitely if periodic sampling is not enabled */ + if (!stream->periodic) + return -EINVAL; + + return wait_event_interruptible(stream->poll_wq, + xe_oa_buffer_check_unlocked(stream)); +} + +#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) + +static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, + size_t count, size_t *offset) +{ + /* Only clear our bits to avoid side-effects */ + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + OASTATUS_RELEVANT_BITS, 0); + /* + * Signal to userspace that there is non-zero OA status to read via + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl + */ + if (stream->oa_status & OASTATUS_RELEVANT_BITS) + return -EIO; + + return xe_oa_append_reports(stream, buf, count, offset); +} + +static ssize_t xe_oa_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct xe_oa_stream *stream = file->private_data; + size_t offset = 0; + int ret; + + /* Can't read from disabled streams */ + if (!stream->enabled || !stream->sample) + return -EINVAL; + + if (!(file->f_flags & O_NONBLOCK)) { + do { + ret = xe_oa_wait_unlocked(stream); + if (ret) + return ret; + + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } while (!offset && !ret); + } else { + mutex_lock(&stream->stream_lock); + ret = __xe_oa_read(stream, buf, count, &offset); + mutex_unlock(&stream->stream_lock); + } + + /* + * Typically we clear pollin here in order to wait for the new hrtimer callback + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, + * which means that more OA data is available than could fit in the user provided + * buffer. In this case we want the next poll() call to not block. + * + * Also in case of -EIO, we have already waited for data before returning + * -EIO, so need to wait again + */ + if (ret != -ENOSPC && ret != -EIO) + stream->pollin = false; + + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ + return offset ?: (ret ?: -EAGAIN); +} + static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, struct file *file, poll_table *wait) { @@ -680,6 +857,27 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) return ret; } +static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_status status = {}; + void __user *uaddr = (void __user *)arg; + + /* Map from register to uapi bits */ + if (stream->oa_status & OASTATUS_REPORT_LOST) + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; + + if (copy_to_user(uaddr, &status, sizeof(status))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -693,6 +891,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return 0; case DRM_XE_PERF_IOCTL_CONFIG: return xe_oa_config_locked(stream, arg); + case DRM_XE_PERF_IOCTL_STATUS: + return xe_oa_status_locked(stream, arg); } return -EINVAL; @@ -745,6 +945,7 @@ static const struct file_operations xe_oa_fops = { .llseek = no_llseek, .release = xe_oa_release, .poll = xe_oa_poll, + .read = xe_oa_read, .unlocked_ioctl = xe_oa_ioctl, }; diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h index 6700383b1a52..5bb8ce0d71c9 100644 --- a/drivers/gpu/drm/xe/xe_oa_types.h +++ b/drivers/gpu/drm/xe/xe_oa_types.h @@ -222,5 +222,8 @@ struct xe_oa_stream { /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ u64 poll_period_ns; + + /** @oa_status: temporary storage for oa_status register value */ + u32 oa_status; }; #endif diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 1e09f786b3e6..03a6e479227a 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1570,6 +1570,26 @@ struct drm_xe_oa_config { __u64 regs_ptr; }; +/** + * struct drm_xe_oa_stream_status - OA stream status returned from + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to + * query stream status in response to EIO errno from perf fd read(). + */ +struct drm_xe_oa_stream_status { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_status: OA stream status (see Bspec 46717/61226) */ + __u64 oa_status; +#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) +#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) +#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) +#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From dd6b4718c3bab611588922ae8a7736c58eafcc93 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Mon, 17 Jun 2024 18:46:04 -0700 Subject: drm/xe/oa/uapi: Query OA unit properties Implement query for properties of OA units present on a device. v2: Clean up reserved/pad fields (Umesh) Follow the same scheme as other query structs v3: Skip reporting reserved engines attached to OA units v4: Expose oa_buf_size via DRM_XE_PERF_IOCTL_INFO (Umesh) v5: Don't expose capabilities as OR of properties (Umesh) v6: Add extensions to query output structs: drm_xe_oa_unit, drm_xe_query_oa_units and drm_xe_oa_stream_info v7: Change oa_units[] array to __u64 type Acked-by: Rodrigo Vivi Reviewed-by: Umesh Nerlige Ramappa Signed-off-by: Ashutosh Dixit Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-13-ashutosh.dixit@intel.com --- drivers/gpu/drm/xe/xe_oa.c | 13 +++++++ drivers/gpu/drm/xe/xe_query.c | 77 +++++++++++++++++++++++++++++++++++++++ include/uapi/drm/xe_drm.h | 85 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 42b0ba014e35..038caeb7c9e7 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -1050,6 +1050,17 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) return 0; } +static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) +{ + struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + void __user *uaddr = (void __user *)arg; + + if (copy_to_user(uaddr, &info, sizeof(info))) + return -EFAULT; + + return 0; +} + static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, unsigned int cmd, unsigned long arg) @@ -1065,6 +1076,8 @@ static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, return xe_oa_config_locked(stream, arg); case DRM_XE_PERF_IOCTL_STATUS: return xe_oa_status_locked(stream, arg); + case DRM_XE_PERF_IOCTL_INFO: + return xe_oa_info_locked(stream, arg); } return -EINVAL; diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 995effcb904b..4e01df6b1b7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -602,6 +602,82 @@ query_uc_fw_version(struct xe_device *xe, struct drm_xe_device_query *query) return 0; } +static size_t calc_oa_unit_query_size(struct xe_device *xe) +{ + size_t size = sizeof(struct drm_xe_query_oa_units); + struct xe_gt *gt; + int i, id; + + for_each_gt(gt, xe, id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + size += sizeof(struct drm_xe_oa_unit); + size += gt->oa.oa_unit[i].num_engines * + sizeof(struct drm_xe_engine_class_instance); + } + } + + return size; +} + +static int query_oa_units(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_oa_unit_query_size(xe); + struct drm_xe_query_oa_units *qoa; + enum xe_hw_engine_id hwe_id; + struct drm_xe_oa_unit *du; + struct xe_hw_engine *hwe; + struct xe_oa_unit *u; + int gt_id, i, j, ret; + struct xe_gt *gt; + u8 *pdu; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + qoa = kzalloc(size, GFP_KERNEL); + if (!qoa) + return -ENOMEM; + + pdu = (u8 *)&qoa->oa_units[0]; + for_each_gt(gt, xe, gt_id) { + for (i = 0; i < gt->oa.num_oa_units; i++) { + u = >->oa.oa_unit[i]; + du = (struct drm_xe_oa_unit *)pdu; + + du->oa_unit_id = u->oa_unit_id; + du->oa_unit_type = u->type; + du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); + du->capabilities = DRM_XE_OA_CAPS_BASE; + + j = 0; + for_each_hw_engine(hwe, gt, hwe_id) { + if (!xe_hw_engine_is_reserved(hwe) && + xe_oa_unit_id(hwe) == u->oa_unit_id) { + du->eci[j].engine_class = + xe_to_user_engine_class[hwe->class]; + du->eci[j].engine_instance = hwe->logical_instance; + du->eci[j].gt_id = gt->info.id; + j++; + } + } + du->num_engines = j; + pdu += sizeof(*du) + j * sizeof(du->eci[0]); + qoa->num_oa_units++; + } + } + + ret = copy_to_user(query_ptr, qoa, size); + kfree(qoa); + + return ret ? -EFAULT : 0; +} + static int (* const xe_query_funcs[])(struct xe_device *xe, struct drm_xe_device_query *query) = { query_engines, @@ -612,6 +688,7 @@ static int (* const xe_query_funcs[])(struct xe_device *xe, query_gt_topology, query_engine_cycles, query_uc_fw_version, + query_oa_units, }; int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 03a6e479227a..93e00be44b2d 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -689,6 +689,7 @@ struct drm_xe_device_query { #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7 +#define DRM_XE_DEVICE_QUERY_OA_UNITS 8 /** @query: The type of data to query */ __u32 query; @@ -1451,6 +1452,75 @@ enum drm_xe_oa_unit_type { DRM_XE_OA_UNIT_TYPE_OAM, }; +/** + * struct drm_xe_oa_unit - describe OA unit + */ +struct drm_xe_oa_unit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_unit_id: OA unit ID */ + __u32 oa_unit_id; + + /** @oa_unit_type: OA unit type of @drm_xe_oa_unit_type */ + __u32 oa_unit_type; + + /** @capabilities: OA capabilities bit-mask */ + __u64 capabilities; +#define DRM_XE_OA_CAPS_BASE (1 << 0) + + /** @oa_timestamp_freq: OA timestamp freq */ + __u64 oa_timestamp_freq; + + /** @reserved: MBZ */ + __u64 reserved[4]; + + /** @num_engines: number of engines in @eci array */ + __u64 num_engines; + + /** @eci: engines attached to this OA unit */ + struct drm_xe_engine_class_instance eci[]; +}; + +/** + * struct drm_xe_query_oa_units - describe OA units + * + * If a query is made with a struct drm_xe_device_query where .query + * is equal to DRM_XE_DEVICE_QUERY_OA_UNITS, then the reply uses struct + * drm_xe_query_oa_units in .data. + * + * OA unit properties for all OA units can be accessed using a code block + * such as the one below: + * + * .. code-block:: C + * + * struct drm_xe_query_oa_units *qoa; + * struct drm_xe_oa_unit *oau; + * u8 *poau; + * + * // malloc qoa and issue DRM_XE_DEVICE_QUERY_OA_UNITS. Then: + * poau = (u8 *)&qoa->oa_units[0]; + * for (int i = 0; i < qoa->num_oa_units; i++) { + * oau = (struct drm_xe_oa_unit *)poau; + * // Access 'struct drm_xe_oa_unit' fields here + * poau += sizeof(*oau) + oau->num_engines * sizeof(oau->eci[0]); + * } + */ +struct drm_xe_query_oa_units { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + /** @num_oa_units: number of OA units returned in oau[] */ + __u32 num_oa_units; + /** @pad: MBZ */ + __u32 pad; + /** + * @oa_units: struct @drm_xe_oa_unit array returned for this device. + * Written below as a u64 array to avoid problems with nested flexible + * arrays with some compilers + */ + __u64 oa_units[]; +}; + /** * enum drm_xe_oa_format_type - OA format types as specified in PRM/Bspec * 52198/60942 @@ -1590,6 +1660,21 @@ struct drm_xe_oa_stream_status { __u64 reserved[3]; }; +/** + * struct drm_xe_oa_stream_info - OA stream info returned from + * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + */ +struct drm_xe_oa_stream_info { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @oa_buf_size: OA buffer size */ + __u64 oa_buf_size; + + /** @reserved: reserved for future use */ + __u64 reserved[3]; +}; + #if defined(__cplusplus) } #endif -- cgit v1.2.3 From 382d1741b5b2feffef7942dd074206372afe1a96 Mon Sep 17 00:00:00 2001 From: Haiyang Zhang Date: Mon, 17 Jun 2024 13:17:26 -0700 Subject: net: mana: Add support for page sizes other than 4KB on ARM64 As defined by the MANA Hardware spec, the queue size for DMA is 4KB minimal, and power of 2. And, the HWC queue size has to be exactly 4KB. To support page sizes other than 4KB on ARM64, define the minimal queue size as a macro separately from the PAGE_SIZE, which we always assumed it to be 4KB before supporting ARM64. Also, add MANA specific macros and update code related to size alignment, DMA region calculations, etc. Signed-off-by: Haiyang Zhang Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/1718655446-6576-1-git-send-email-haiyangz@microsoft.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/microsoft/Kconfig | 2 +- drivers/net/ethernet/microsoft/mana/gdma_main.c | 10 +++++----- drivers/net/ethernet/microsoft/mana/hw_channel.c | 14 +++++++------- drivers/net/ethernet/microsoft/mana/mana_en.c | 8 ++++---- drivers/net/ethernet/microsoft/mana/shm_channel.c | 13 +++++++------ include/net/mana/gdma.h | 10 +++++++++- include/net/mana/mana.h | 3 ++- 7 files changed, 35 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/microsoft/Kconfig b/drivers/net/ethernet/microsoft/Kconfig index 286f0d5697a1..901fbffbf718 100644 --- a/drivers/net/ethernet/microsoft/Kconfig +++ b/drivers/net/ethernet/microsoft/Kconfig @@ -18,7 +18,7 @@ if NET_VENDOR_MICROSOFT config MICROSOFT_MANA tristate "Microsoft Azure Network Adapter (MANA) support" depends on PCI_MSI - depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN && ARM64_4K_PAGES) + depends on X86_64 || (ARM64 && !CPU_BIG_ENDIAN) depends on PCI_HYPERV select AUXILIARY_BUS select PAGE_POOL diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 1332db9a08eb..e1d70d21e207 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -182,7 +182,7 @@ int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, dma_addr_t dma_handle; void *buf; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; gmi->dev = gc->dev; @@ -717,7 +717,7 @@ EXPORT_SYMBOL_NS(mana_gd_destroy_dma_region, NET_MANA); static int mana_gd_create_dma_region(struct gdma_dev *gd, struct gdma_mem_info *gmi) { - unsigned int num_page = gmi->length / PAGE_SIZE; + unsigned int num_page = gmi->length / MANA_PAGE_SIZE; struct gdma_create_dma_region_req *req = NULL; struct gdma_create_dma_region_resp resp = {}; struct gdma_context *gc = gd->gdma_context; @@ -727,10 +727,10 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, int err; int i; - if (length < PAGE_SIZE || !is_power_of_2(length)) + if (length < MANA_PAGE_SIZE || !is_power_of_2(length)) return -EINVAL; - if (offset_in_page(gmi->virt_addr) != 0) + if (!MANA_PAGE_ALIGNED(gmi->virt_addr)) return -EINVAL; hwc = gc->hwc.driver_data; @@ -751,7 +751,7 @@ static int mana_gd_create_dma_region(struct gdma_dev *gd, req->page_addr_list_len = num_page; for (i = 0; i < num_page; i++) - req->page_addr_list[i] = gmi->dma_handle + i * PAGE_SIZE; + req->page_addr_list[i] = gmi->dma_handle + i * MANA_PAGE_SIZE; err = mana_gd_send_request(gc, req_msg_size, req, sizeof(resp), &resp); if (err) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index bbc4f9e16c98..cafded2f9382 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -362,12 +362,12 @@ static int mana_hwc_create_cq(struct hw_channel_context *hwc, u16 q_depth, int err; eq_size = roundup_pow_of_two(GDMA_EQE_SIZE * q_depth); - if (eq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - eq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (eq_size < MANA_MIN_QSIZE) + eq_size = MANA_MIN_QSIZE; cq_size = roundup_pow_of_two(GDMA_CQE_SIZE * q_depth); - if (cq_size < MINIMUM_SUPPORTED_PAGE_SIZE) - cq_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (cq_size < MANA_MIN_QSIZE) + cq_size = MANA_MIN_QSIZE; hwc_cq = kzalloc(sizeof(*hwc_cq), GFP_KERNEL); if (!hwc_cq) @@ -429,7 +429,7 @@ static int mana_hwc_alloc_dma_buf(struct hw_channel_context *hwc, u16 q_depth, dma_buf->num_reqs = q_depth; - buf_size = PAGE_ALIGN(q_depth * max_msg_size); + buf_size = MANA_PAGE_ALIGN(q_depth * max_msg_size); gmi = &dma_buf->mem_info; err = mana_gd_alloc_memory(gc, buf_size, gmi); @@ -497,8 +497,8 @@ static int mana_hwc_create_wq(struct hw_channel_context *hwc, else queue_size = roundup_pow_of_two(GDMA_MAX_SQE_SIZE * q_depth); - if (queue_size < MINIMUM_SUPPORTED_PAGE_SIZE) - queue_size = MINIMUM_SUPPORTED_PAGE_SIZE; + if (queue_size < MANA_MIN_QSIZE) + queue_size = MANA_MIN_QSIZE; hwc_wq = kzalloc(sizeof(*hwc_wq), GFP_KERNEL); if (!hwc_wq) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 93e526e5dd16..3f094a8a4e97 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1904,10 +1904,10 @@ static int mana_create_txq(struct mana_port_context *apc, * to prevent overflow. */ txq_size = MAX_SEND_BUFFERS_PER_QUEUE * 32; - BUILD_BUG_ON(!PAGE_ALIGNED(txq_size)); + BUILD_BUG_ON(!MANA_PAGE_ALIGNED(txq_size)); cq_size = MAX_SEND_BUFFERS_PER_QUEUE * COMP_ENTRY_SIZE; - cq_size = PAGE_ALIGN(cq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); gc = gd->gdma_context; @@ -2204,8 +2204,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, if (err) goto out; - rq_size = PAGE_ALIGN(rq_size); - cq_size = PAGE_ALIGN(cq_size); + rq_size = MANA_PAGE_ALIGN(rq_size); + cq_size = MANA_PAGE_ALIGN(cq_size); /* Create RQ */ memset(&spec, 0, sizeof(spec)); diff --git a/drivers/net/ethernet/microsoft/mana/shm_channel.c b/drivers/net/ethernet/microsoft/mana/shm_channel.c index 5553af9c8085..0f1679ebad96 100644 --- a/drivers/net/ethernet/microsoft/mana/shm_channel.c +++ b/drivers/net/ethernet/microsoft/mana/shm_channel.c @@ -6,6 +6,7 @@ #include #include +#include #include #define PAGE_FRAME_L48_WIDTH_BYTES 6 @@ -155,8 +156,8 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, return err; } - if (!PAGE_ALIGNED(eq_addr) || !PAGE_ALIGNED(cq_addr) || - !PAGE_ALIGNED(rq_addr) || !PAGE_ALIGNED(sq_addr)) + if (!MANA_PAGE_ALIGNED(eq_addr) || !MANA_PAGE_ALIGNED(cq_addr) || + !MANA_PAGE_ALIGNED(rq_addr) || !MANA_PAGE_ALIGNED(sq_addr)) return -EINVAL; if ((eq_msix_index & VECTOR_MASK) != eq_msix_index) @@ -183,7 +184,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* EQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(eq_addr); + frame_addr = MANA_PFN(eq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -191,7 +192,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* CQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(cq_addr); + frame_addr = MANA_PFN(cq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -199,7 +200,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* RQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(rq_addr); + frame_addr = MANA_PFN(rq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); @@ -207,7 +208,7 @@ int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, /* SQ addr: low 48 bits of frame address */ shmem = (u64 *)ptr; - frame_addr = PHYS_PFN(sq_addr); + frame_addr = MANA_PFN(sq_addr); *shmem = frame_addr & PAGE_FRAME_L48_MASK; all_addr_h4bits |= (frame_addr >> PAGE_FRAME_L48_WIDTH_BITS) << (frame_addr_seq++ * PAGE_FRAME_H4_WIDTH_BITS); diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index c547756c4284..83963d9e804d 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -224,7 +224,15 @@ struct gdma_dev { struct auxiliary_device *adev; }; -#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE +/* MANA_PAGE_SIZE is the DMA unit */ +#define MANA_PAGE_SHIFT 12 +#define MANA_PAGE_SIZE BIT(MANA_PAGE_SHIFT) +#define MANA_PAGE_ALIGN(x) ALIGN((x), MANA_PAGE_SIZE) +#define MANA_PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), MANA_PAGE_SIZE) +#define MANA_PFN(a) ((a) >> MANA_PAGE_SHIFT) + +/* Required by HW */ +#define MANA_MIN_QSIZE MANA_PAGE_SIZE #define GDMA_CQE_SIZE 64 #define GDMA_EQE_SIZE 16 diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 59823901b74f..e39b8676fe54 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -42,7 +42,8 @@ enum TRI_STATE { #define MAX_SEND_BUFFERS_PER_QUEUE 256 -#define EQ_SIZE (8 * PAGE_SIZE) +#define EQ_SIZE (8 * MANA_PAGE_SIZE) + #define LOG2_EQ_THROTTLE 3 #define MAX_PORTS_IN_MANA_DEV 256 -- cgit v1.2.3 From 1e4c64b71c9bf230b25fde12cbcceacfdc8b3332 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Thu, 13 Jun 2024 11:55:07 -0400 Subject: mm/memblock: Add "reserve_mem" to reserved named memory at boot up In order to allow for requesting a memory region that can be used for things like pstore on multiple machines where the memory layout is not the same, add a new option to the kernel command line called "reserve_mem". The format is: reserve_mem=nn:align:name Where it will find nn amount of memory at the given alignment of align. The name field is to allow another subsystem to retrieve where the memory was found. For example: reserve_mem=12M:4096:oops ramoops.mem_name=oops Where ramoops.mem_name will tell ramoops that memory was reserved for it via the reserve_mem option and it can find it by calling: if (reserve_mem_find_by_name("oops", &start, &size)) { // start holds the start address and size holds the size given This is typically used for systems that do not wipe the RAM, and this command line will try to reserve the same physical memory on soft reboots. Note, it is not guaranteed to be the same location. For example, if KASLR places the kernel at the location of where the RAM reservation was from a previous boot, the new reservation will be at a different location. Any subsystem using this feature must add a way to verify that the contents of the physical memory is from a previous boot, as there may be cases where the memory will not be located at the same location. Not all systems may work either. There could be bit flips if the reboot goes through the BIOS. Using kexec to reboot the machine is likely to have better results in such cases. Link: https://lore.kernel.org/all/ZjJVnZUX3NZiGW6q@kernel.org/ Suggested-by: Mike Rapoport Tested-by: Guilherme G. Piccoli Signed-off-by: Steven Rostedt (Google) Link: https://lore.kernel.org/r/20240613155527.437020271@goodmis.org Signed-off-by: Mike Rapoport (IBM) --- Documentation/admin-guide/kernel-parameters.txt | 22 +++++ include/linux/mm.h | 2 + mm/memblock.c | 117 ++++++++++++++++++++++++ 3 files changed, 141 insertions(+) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b600df82669d..56e18b1a520d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5710,6 +5710,28 @@ them. If is less than 0x10000, the region is assumed to be I/O ports; otherwise it is memory. + reserve_mem= [RAM] + Format: nn[KNG]::