From e940e0895a82c6fbaa259f2615eb52b57ee91a7e Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Fri, 26 Feb 2021 10:24:56 +0100 Subject: can: skb: can_skb_set_owner(): fix ref counting if socket was closed before setting skb ownership There are two ref count variables controlling the free()ing of a socket: - struct sock::sk_refcnt - which is changed by sock_hold()/sock_put() - struct sock::sk_wmem_alloc - which accounts the memory allocated by the skbs in the send path. In case there are still TX skbs on the fly and the socket() is closed, the struct sock::sk_refcnt reaches 0. In the TX-path the CAN stack clones an "echo" skb, calls sock_hold() on the original socket and references it. This produces the following back trace: | WARNING: CPU: 0 PID: 280 at lib/refcount.c:25 refcount_warn_saturate+0x114/0x134 | refcount_t: addition on 0; use-after-free. | Modules linked in: coda_vpu(E) v4l2_jpeg(E) videobuf2_vmalloc(E) imx_vdoa(E) | CPU: 0 PID: 280 Comm: test_can.sh Tainted: G E 5.11.0-04577-gf8ff6603c617 #203 | Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) | Backtrace: | [<80bafea4>] (dump_backtrace) from [<80bb0280>] (show_stack+0x20/0x24) r7:00000000 r6:600f0113 r5:00000000 r4:81441220 | [<80bb0260>] (show_stack) from [<80bb593c>] (dump_stack+0xa0/0xc8) | [<80bb589c>] (dump_stack) from [<8012b268>] (__warn+0xd4/0x114) r9:00000019 r8:80f4a8c2 r7:83e4150c r6:00000000 r5:00000009 r4:80528f90 | [<8012b194>] (__warn) from [<80bb09c4>] (warn_slowpath_fmt+0x88/0xc8) r9:83f26400 r8:80f4a8d1 r7:00000009 r6:80528f90 r5:00000019 r4:80f4a8c2 | [<80bb0940>] (warn_slowpath_fmt) from [<80528f90>] (refcount_warn_saturate+0x114/0x134) r8:00000000 r7:00000000 r6:82b44000 r5:834e5600 r4:83f4d540 | [<80528e7c>] (refcount_warn_saturate) from [<8079a4c8>] (__refcount_add.constprop.0+0x4c/0x50) | [<8079a47c>] (__refcount_add.constprop.0) from [<8079a57c>] (can_put_echo_skb+0xb0/0x13c) | [<8079a4cc>] (can_put_echo_skb) from [<8079ba98>] (flexcan_start_xmit+0x1c4/0x230) r9:00000010 r8:83f48610 r7:0fdc0000 r6:0c080000 r5:82b44000 r4:834e5600 | [<8079b8d4>] (flexcan_start_xmit) from [<80969078>] (netdev_start_xmit+0x44/0x70) r9:814c0ba0 r8:80c8790c r7:00000000 r6:834e5600 r5:82b44000 r4:82ab1f00 | [<80969034>] (netdev_start_xmit) from [<809725a4>] (dev_hard_start_xmit+0x19c/0x318) r9:814c0ba0 r8:00000000 r7:82ab1f00 r6:82b44000 r5:00000000 r4:834e5600 | [<80972408>] (dev_hard_start_xmit) from [<809c6584>] (sch_direct_xmit+0xcc/0x264) r10:834e5600 r9:00000000 r8:00000000 r7:82b44000 r6:82ab1f00 r5:834e5600 r4:83f27400 | [<809c64b8>] (sch_direct_xmit) from [<809c6c0c>] (__qdisc_run+0x4f0/0x534) To fix this problem, only set skb ownership to sockets which have still a ref count > 0. Fixes: 0ae89beb283a ("can: add destructor for self generated skbs") Cc: Oliver Hartkopp Cc: Andre Naujoks Link: https://lore.kernel.org/r/20210226092456.27126-1-o.rempel@pengutronix.de Suggested-by: Eric Dumazet Signed-off-by: Oleksij Rempel Reviewed-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/skb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 685f34cfba20..d438eb058069 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -65,8 +65,12 @@ static inline void can_skb_reserve(struct sk_buff *skb) static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { - if (sk) { - sock_hold(sk); + /* If the socket has already been closed by user space, the + * refcount may already be 0 (and the socket will be freed + * after the last TX skb has been freed). So only increase + * socket refcount if the refcount is > 0. + */ + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } -- cgit v1.2.3 From b228c9b058760500fda5edb3134527f629fc2dc3 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Mon, 1 Mar 2021 15:09:44 +0000 Subject: net: expand textsearch ts_state to fit skb_seq_state The referenced commit expands the skb_seq_state used by skb_find_text with a 4B frag_off field, growing it to 48B. This exceeds container ts_state->cb, causing a stack corruption: [ 73.238353] Kernel panic - not syncing: stack-protector: Kernel stack is corrupted in: skb_find_text+0xc5/0xd0 [ 73.247384] CPU: 1 PID: 376 Comm: nping Not tainted 5.11.0+ #4 [ 73.252613] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 [ 73.260078] Call Trace: [ 73.264677] dump_stack+0x57/0x6a [ 73.267866] panic+0xf6/0x2b7 [ 73.270578] ? skb_find_text+0xc5/0xd0 [ 73.273964] __stack_chk_fail+0x10/0x10 [ 73.277491] skb_find_text+0xc5/0xd0 [ 73.280727] string_mt+0x1f/0x30 [ 73.283639] ipt_do_table+0x214/0x410 The struct is passed between skb_find_text and its callbacks skb_prepare_seq_read, skb_seq_read and skb_abort_seq read through the textsearch interface using TS_SKB_CB. I assumed that this mapped to skb->cb like other .._SKB_CB wrappers. skb->cb is 48B. But it maps to ts_state->cb, which is only 40B. skb->cb was increased from 40B to 48B after ts_state was introduced, in commit 3e3850e989c5 ("[NETFILTER]: Fix xfrm lookup in ip_route_me_harder/ip6_route_me_harder"). Increase ts_state.cb[] to 48 to fit the struct. Also add a BUILD_BUG_ON to avoid a repeat. The alternative is to directly add a dependency from textsearch onto linux/skbuff.h, but I think the intent is textsearch to have no such dependencies on its callers. Link: https://bugzilla.kernel.org/show_bug.cgi?id=211911 Fixes: 97550f6fa592 ("net: compound page support in skb_seq_read") Reported-by: Kris Karas Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/textsearch.h | 2 +- net/core/skbuff.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 13770cfe33ad..6673e4d4ac2e 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -23,7 +23,7 @@ struct ts_config; struct ts_state { unsigned int offset; - char cb[40]; + char cb[48]; }; /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 545a472273a5..c421c8f80925 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3659,6 +3659,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, struct ts_state state; unsigned int ret; + BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb)); + config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; -- cgit v1.2.3 From c95c34f01bbda4421c25fdc9b04a4a4aab10d36c Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Wed, 3 Mar 2021 19:56:34 +0100 Subject: xsk: Remove dangling function declaration from header file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit xdp_umem_query() is dead for a long time, drop the declaration from include/linux/netdevice.h Fixes: c9b47cc1fabc ("xsk: fix bug when trying to use both copy and zero-copy on one queue id") Signed-off-by: Maciej Fijalkowski Signed-off-by: Daniel Borkmann Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20210303185636.18070-2-maciej.fijalkowski@intel.com --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f06fbee8638e..5b67ea89d5f2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3959,8 +3959,6 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); -int xdp_umem_query(struct net_device *dev, u16 queue_id); - int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From a5398bffc01fe044848c5024e5e867e407f239b8 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 30 Nov 2020 11:38:40 -0800 Subject: perf/core: Flush PMU internal buffers for per-CPU events Sometimes the PMU internal buffers have to be flushed for per-CPU events during a context switch, e.g., large PEBS. Otherwise, the perf tool may report samples in locations that do not belong to the process where the samples are processed in, because PEBS does not tag samples with PID/TID. The current code only flush the buffers for a per-task event. It doesn't check a per-CPU event. Add a new event state flag, PERF_ATTACH_SCHED_CB, to indicate that the PMU internal buffers have to be flushed for this event during a context switch. Add sched_cb_entry and perf_sched_cb_usages back to track the PMU/cpuctx which is required to be flushed. Only need to invoke the sched_task() for per-CPU events in this patch. The per-task events have been handled in perf_event_context_sched_in/out already. Fixes: 9c964efa4330 ("perf/x86/intel: Drain the PEBS buffer during context switches") Reported-by: Gabriel Marin Originally-by: Namhyung Kim Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20201130193842.10569-1-kan.liang@linux.intel.com --- include/linux/perf_event.h | 2 ++ kernel/events/core.c | 42 ++++++++++++++++++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index fab42cfbd350..3f7f89ea5e51 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -606,6 +606,7 @@ struct swevent_hlist { #define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 +#define PERF_ATTACH_SCHED_CB 0x20 struct perf_cgroup; struct perf_buffer; @@ -872,6 +873,7 @@ struct perf_cpu_context { struct list_head cgrp_cpuctx_entry; #endif + struct list_head sched_cb_entry; int sched_cb_usage; int online; diff --git a/kernel/events/core.c b/kernel/events/core.c index 0aeca5f3c0ac..03db40f6cba9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex); static atomic_t perf_sched_count; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; @@ -3461,11 +3462,16 @@ unlock: } } +static DEFINE_PER_CPU(struct list_head, sched_cb_list); + void perf_sched_cb_dec(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - --cpuctx->sched_cb_usage; + this_cpu_dec(perf_sched_cb_usages); + + if (!--cpuctx->sched_cb_usage) + list_del(&cpuctx->sched_cb_entry); } @@ -3473,7 +3479,10 @@ void perf_sched_cb_inc(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - cpuctx->sched_cb_usage++; + if (!cpuctx->sched_cb_usage++) + list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list)); + + this_cpu_inc(perf_sched_cb_usages); } /* @@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + + if (prev == next) + return; + + list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) { + /* will be handled in perf_event_context_sched_in/out */ + if (cpuctx->task_ctx) + continue; + + __perf_pmu_sched_task(cpuctx, sched_in); + } +} + static void perf_event_switch(struct task_struct *task, struct task_struct *next_prev, bool sched_in); @@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(task, next, false); + if (atomic_read(&nr_switch_events)) perf_event_switch(task, next, false); @@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(&nr_switch_events)) perf_event_switch(task, prev, true); + + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(prev, task, true); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event) if (event->parent) return; - if (event->attach_state & PERF_ATTACH_TASK) + if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) dec = true; if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); @@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event) if (event->parent) return; - if (event->attach_state & PERF_ATTACH_TASK) + if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) inc = true; if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); @@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void) #ifdef CONFIG_CGROUP_PERF INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu)); #endif + INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu)); } } -- cgit v1.2.3 From baf186c4d345f5a105e63df01100936ad622f369 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 6 Mar 2021 11:02:15 +0000 Subject: io_uring: index io_uring->xa by ctx not file We don't use task file notes anymore, and no need left in indexing task->io_uring->xa by file, and replace it with ctx. It's better design-wise, especially since we keep a dangling file, and so have to keep an eye on not dereferencing it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- fs/io_uring.c | 24 +++++++++++------------- include/linux/io_uring.h | 2 +- 2 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/io_uring.c b/fs/io_uring.c index f448213267c8..01a7fa4a4889 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -809,7 +809,6 @@ struct io_kiocb { struct io_tctx_node { struct list_head ctx_node; struct task_struct *task; - struct file *file; struct io_ring_ctx *ctx; }; @@ -8540,7 +8539,7 @@ static bool io_run_ctx_fallback(struct io_ring_ctx *ctx) struct io_tctx_exit { struct callback_head task_work; struct completion completion; - unsigned long index; + struct io_ring_ctx *ctx; }; static void io_tctx_exit_cb(struct callback_head *cb) @@ -8554,7 +8553,7 @@ static void io_tctx_exit_cb(struct callback_head *cb) * node. It'll be removed by the end of cancellation, just ignore it. */ if (!atomic_read(&tctx->in_idle)) - io_uring_del_task_file(work->index); + io_uring_del_task_file((unsigned long)work->ctx); complete(&work->completion); } @@ -8579,7 +8578,7 @@ static void io_ring_exit_work(struct work_struct *work) while (!list_empty(&ctx->tctx_list)) { node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, ctx_node); - exit.index = (unsigned long)node->file; + exit.ctx = ctx; init_completion(&exit.completion); init_task_work(&exit.task_work, io_tctx_exit_cb); ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); @@ -8798,7 +8797,7 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, /* * Note that this task has used io_uring. We use it for cancelation purposes. */ -static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) +static int io_uring_add_task_file(struct io_ring_ctx *ctx) { struct io_uring_task *tctx = current->io_uring; struct io_tctx_node *node; @@ -8810,18 +8809,17 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) return ret; tctx = current->io_uring; } - if (tctx->last != file) { - void *old = xa_load(&tctx->xa, (unsigned long)file); + if (tctx->last != ctx) { + void *old = xa_load(&tctx->xa, (unsigned long)ctx); if (!old) { node = kmalloc(sizeof(*node), GFP_KERNEL); if (!node) return -ENOMEM; node->ctx = ctx; - node->file = file; node->task = current; - ret = xa_err(xa_store(&tctx->xa, (unsigned long)file, + ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, node, GFP_KERNEL)); if (ret) { kfree(node); @@ -8832,7 +8830,7 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file) list_add(&node->ctx_node, &ctx->tctx_list); mutex_unlock(&ctx->uring_lock); } - tctx->last = file; + tctx->last = ctx; } /* @@ -8867,7 +8865,7 @@ static void io_uring_del_task_file(unsigned long index) list_del(&node->ctx_node); mutex_unlock(&node->ctx->uring_lock); - if (tctx->last == node->file) + if (tctx->last == node->ctx) tctx->last = NULL; kfree(node); } @@ -9166,7 +9164,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, } submitted = to_submit; } else if (to_submit) { - ret = io_uring_add_task_file(ctx, f.file); + ret = io_uring_add_task_file(ctx); if (unlikely(ret)) goto out; mutex_lock(&ctx->uring_lock); @@ -9375,7 +9373,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) if (fd < 0) return fd; - ret = io_uring_add_task_file(ctx, file); + ret = io_uring_add_task_file(ctx); if (ret) { put_unused_fd(fd); return ret; diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h index 7cb7bd0e334c..9761a0ec9f95 100644 --- a/include/linux/io_uring.h +++ b/include/linux/io_uring.h @@ -18,7 +18,7 @@ struct io_uring_task { /* submission side */ struct xarray xa; struct wait_queue_head wait; - struct file *last; + void *last; void *io_wq; struct percpu_counter inflight; atomic_t in_idle; -- cgit v1.2.3 From 62d5247d239d4b48762192a251c647d7c997616a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:18 +0200 Subject: gpiolib: acpi: Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk On some systems the ACPI tables has wrong pin number and instead of having a relative one it provides an absolute one in the global GPIO number space. Add ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER quirk to cope with such cases. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 7 ++++++- include/linux/gpio/consumer.h | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 86efa2d9bf7f..0fa0127d50ec 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -677,6 +677,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) if (!lookup->desc) { const struct acpi_resource_gpio *agpio = &ares->data.gpio; bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT; + struct gpio_desc *desc; u16 pin_index; if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint) @@ -689,8 +690,12 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) if (pin_index >= agpio->pin_table_length) return 1; - lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr, + if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER) + desc = gpio_to_desc(agpio->pin_table[pin_index]); + else + desc = acpi_get_gpiod(agpio->resource_source.string_ptr, agpio->pin_table[pin_index]); + lookup->desc = desc; lookup->info.pin_config = agpio->pin_config; lookup->info.debounce = agpio->debounce_timeout; lookup->info.gpioint = gpioint; diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index ef49307611d2..c73b25bc9213 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -674,6 +674,8 @@ struct acpi_gpio_mapping { * get GpioIo type explicitly, this quirk may be used. */ #define ACPI_GPIO_QUIRK_ONLY_GPIOIO BIT(1) +/* Use given pin as an absolute GPIO number in the system */ +#define ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER BIT(2) unsigned int quirks; }; -- cgit v1.2.3 From 809390219fb9c2421239afe5c9eb862d73978ba0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 25 Feb 2021 18:33:19 +0200 Subject: gpiolib: acpi: Allow to find GpioInt() resource by name and index Currently only search by index is supported. However, in some cases we might need to pass the quirks to the acpi_dev_gpio_irq_get(). For this, split out acpi_dev_gpio_irq_get_by() and replace acpi_dev_gpio_irq_get() by calling above with NULL for name parameter. Fixes: ba8c90c61847 ("gpio: pca953x: Override IRQ for one of the expanders on Galileo Gen 2") Depends-on: 0ea683931adb ("gpio: dwapb: Convert driver to using the GPIO-lib-based IRQ-chip") Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Acked-by: Linus Walleij --- drivers/gpio/gpiolib-acpi.c | 12 ++++++++---- include/linux/acpi.h | 10 ++++++++-- 2 files changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 0fa0127d50ec..1aacd2a5a1fd 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -945,8 +945,9 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, } /** - * acpi_dev_gpio_irq_get() - Find GpioInt and translate it to Linux IRQ number + * acpi_dev_gpio_irq_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from + * @name: optional name of GpioInt resource * @index: index of GpioInt resource (starting from %0) * * If the device has one or more GpioInt resources, this function can be @@ -956,9 +957,12 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, * The function is idempotent, though each time it runs it will configure GPIO * pin direction according to the flags in GpioInt resource. * + * The function takes optional @name parameter. If the resource has a property + * name, then only those will be taken into account. + * * Return: Linux IRQ number (> %0) on success, negative errno on failure. */ -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index) { int idx, i; unsigned int irq_flags; @@ -968,7 +972,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) struct acpi_gpio_info info; struct gpio_desc *desc; - desc = acpi_get_gpiod_by_index(adev, NULL, i, &info); + desc = acpi_get_gpiod_by_index(adev, name, i, &info); /* Ignore -EPROBE_DEFER, it only matters if idx matches */ if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) @@ -1013,7 +1017,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } return -ENOENT; } -EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get); +EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get_by); static acpi_status acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9f432411e988..fcdaab723916 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1079,19 +1079,25 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } -static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, + const char *name, int index) { return -ENXIO; } #endif +static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +{ + return acpi_dev_gpio_irq_get_by(adev, NULL, index); +} + /* Device properties */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From 69dd4503a7e6bae3389b8e028e5768008be8f2d7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 16 Feb 2021 15:36:07 +0100 Subject: irqdomain: Remove debugfs_file from struct irq_domain There's no need to keep around a dentry pointer to a simple file that debugfs itself can look up when we need to remove it from the system. So simplify the code by deleting the variable and cleaning up the logic around the debugfs file. Cc: Marc Zyngier Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Greg Kroah-Hartman Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/YCvYV53ZdzQSWY6w@kroah.com --- include/linux/irqdomain.h | 4 ---- kernel/irq/irqdomain.c | 9 ++++----- 2 files changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 42d196805f58..33cacc8af26d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -150,7 +150,6 @@ struct irq_domain_chip_generic; * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains - * @debugfs_file: dentry for the domain debugfs file * * Revmap data, used internally by irq_domain * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that @@ -174,9 +173,6 @@ struct irq_domain { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif -#ifdef CONFIG_GENERIC_IRQ_DEBUGFS - struct dentry *debugfs_file; -#endif /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index 288151393a06..d10ab1d689d5 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -1898,16 +1898,15 @@ DEFINE_SHOW_ATTRIBUTE(irq_domain_debug); static void debugfs_add_domain_dir(struct irq_domain *d) { - if (!d->name || !domain_dir || d->debugfs_file) + if (!d->name || !domain_dir) return; - d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d, - &irq_domain_debug_fops); + debugfs_create_file(d->name, 0444, domain_dir, d, + &irq_domain_debug_fops); } static void debugfs_remove_domain_dir(struct irq_domain *d) { - debugfs_remove(d->debugfs_file); - d->debugfs_file = NULL; + debugfs_remove(debugfs_lookup(d->name, domain_dir)); } void __init irq_domain_debugfs_init(struct dentry *root) -- cgit v1.2.3 From 1019d7923d9d4cc878a1a85d4fc2d6619cfe1a6a Mon Sep 17 00:00:00 2001 From: Tong Zhang Date: Sun, 7 Mar 2021 22:25:28 -0500 Subject: atm: fix a typo in the struct description phy_data means private PHY data not date Signed-off-by: Tong Zhang Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 60cd25c0461b..9b02961d65ee 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -151,7 +151,7 @@ struct atm_dev { const char *type; /* device type name */ int number; /* device index */ void *dev_data; /* per-device data */ - void *phy_data; /* private PHY date */ + void *phy_data; /* private PHY data */ unsigned long flags; /* device flags (ATM_DF_*) */ struct list_head local; /* local ATM addresses */ struct list_head lecs; /* LECS ATM addresses learned via ILMI */ -- cgit v1.2.3 From 924a9bc362a5223cd448ca08c3dde21235adc310 Mon Sep 17 00:00:00 2001 From: Balazs Nemeth Date: Tue, 9 Mar 2021 12:31:00 +0100 Subject: net: check if protocol extracted by virtio_net_hdr_set_proto is correct For gso packets, virtio_net_hdr_set_proto sets the protocol (if it isn't set) based on the type in the virtio net hdr, but the skb could contain anything since it could come from packet_snd through a raw socket. If there is a mismatch between what virtio_net_hdr_set_proto sets and the actual protocol, then the skb could be handled incorrectly later on. An example where this poses an issue is with the subsequent call to skb_flow_dissect_flow_keys_basic which relies on skb->protocol being set correctly. A specially crafted packet could fool skb_flow_dissect_flow_keys_basic preventing EINVAL to be returned. Avoid blindly trusting the information provided by the virtio net header by checking that the protocol in the packet actually matches the protocol set by virtio_net_hdr_set_proto. Note that since the protocol is only checked if skb->dev implements header_ops->parse_protocol, packets from devices without the implementation are not checked at this stage. Fixes: 9274124f023b ("net: stricter validation of untrusted gso packets") Signed-off-by: Balazs Nemeth Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/virtio_net.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index e8a924eeea3d..6b5fcfa1e555 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -79,8 +79,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (gso_type && skb->network_header) { struct flow_keys_basic keys; - if (!skb->protocol) + if (!skb->protocol) { + __be16 protocol = dev_parse_header_protocol(skb); + virtio_net_hdr_set_proto(skb, hdr); + if (protocol && protocol != skb->protocol) + return -EINVAL; + } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, -- cgit v1.2.3 From 51f24030358bdeeb9e75a38618dd029c5a53beeb Mon Sep 17 00:00:00 2001 From: Shuo Liu Date: Sun, 21 Feb 2021 21:43:38 +0800 Subject: cpu/hotplug: Fix build error of using {add,remove}_cpu() with !CONFIG_SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 279dcf693ac7 ("virt: acrn: Introduce an interface for Service VM to control vCPU") introduced {add,remove}_cpu() usage and it hit below error with !CONFIG_SMP: ../drivers/virt/acrn/hsm.c: In function ‘remove_cpu_store’: ../drivers/virt/acrn/hsm.c:389:3: error: implicit declaration of function ‘remove_cpu’; [-Werror=implicit-function-declaration] remove_cpu(cpu); ../drivers/virt/acrn/hsm.c:402:2: error: implicit declaration of function ‘add_cpu’; [-Werror=implicit-function-declaration] add_cpu(cpu); Add add_cpu() function prototypes with !CONFIG_SMP and remove_cpu() with !CONFIG_HOTPLUG_CPU for such usage. Fixes: 279dcf693ac7 ("virt: acrn: Introduce an interface for Service VM to control vCPU") Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Qais Yousef Reported-by: Randy Dunlap Reviewed-by: Qais Yousef Acked-by: Randy Dunlap # build-tested Signed-off-by: Shuo Liu Link: https://lore.kernel.org/r/20210221134339.57851-1-shuo.a.liu@intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cpu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3aaa0687e8df..94a578a96202 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -108,6 +108,8 @@ static inline void cpu_maps_update_done(void) { } +static inline int add_cpu(unsigned int cpu) { return 0;} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -137,6 +139,7 @@ static inline int cpus_read_trylock(void) { return true; } static inline void lockdep_assert_cpus_held(void) { } static inline void cpu_hotplug_disable(void) { } static inline void cpu_hotplug_enable(void) { } +static inline int remove_cpu(unsigned int cpu) { return -EPERM; } static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { } #endif /* !CONFIG_HOTPLUG_CPU */ -- cgit v1.2.3 From d5b0e0677bfd5efd17c5bbb00156931f0d41cb85 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2021 09:38:12 +0100 Subject: u64_stats,lockdep: Fix u64_stats_init() vs lockdep Jakub reported that: static struct net_device *rtl8139_init_board(struct pci_dev *pdev) { ... u64_stats_init(&tp->rx_stats.syncp); u64_stats_init(&tp->tx_stats.syncp); ... } results in lockdep getting confused between the RX and TX stats lock. This is because u64_stats_init() is an inline calling seqcount_init(), which is a macro using a static variable to generate a lockdep class. By wrapping that in an inline, we negate the effect of the macro and fold the static key variable, hence the confusion. Fix by also making u64_stats_init() a macro for the case where it matters, leaving the other case an inline for argument validation etc. Reported-by: Jakub Kicinski Debugged-by: "Ahmed S. Darwish" Signed-off-by: Peter Zijlstra (Intel) Tested-by: "Erhard F." Link: https://lkml.kernel.org/r/YEXicy6+9MksdLZh@hirez.programming.kicks-ass.net --- include/linux/u64_stats_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index c6abb79501b3..e81856c0ba13 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - seqcount_init(&syncp->seq); -#endif } +#endif static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { -- cgit v1.2.3 From 4817a52b306136c8b2b2271d8770401441e4cf79 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2021 15:21:18 +0100 Subject: seqlock,lockdep: Fix seqcount_latch_init() seqcount_init() must be a macro in order to preserve the static variable that is used for the lockdep key. Don't then wrap it in an inline function, which destroys that. Luckily there aren't many users of this function, but fix it before it becomes a problem. Fixes: 80793c3471d9 ("seqlock: Introduce seqcount_latch_t") Reported-by: Eric Dumazet Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YEeFEbNUVkZaXDp4@hirez.programming.kicks-ass.net --- include/linux/seqlock.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 2f7bb92b4c9e..f61e34fbaaea 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -664,10 +664,7 @@ typedef struct { * seqcount_latch_init() - runtime initializer for seqcount_latch_t * @s: Pointer to the seqcount_latch_t instance */ -static inline void seqcount_latch_init(seqcount_latch_t *s) -{ - seqcount_init(&s->seqcount); -} +#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) /** * raw_read_seqcount_latch() - pick even/odd latch data copy -- cgit v1.2.3 From d15dfd31384ba3cb93150e5f87661a76fa419f74 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 9 Mar 2021 12:26:01 +0000 Subject: arm64: mte: Map hotplugged memory as Normal Tagged In a system supporting MTE, the linear map must allow reading/writing allocation tags by setting the memory type as Normal Tagged. Currently, this is only handled for memory present at boot. Hotplugged memory uses Normal non-Tagged memory. Introduce pgprot_mhp() for hotplugged memory and use it in add_memory_resource(). The arm64 code maps pgprot_mhp() to pgprot_tagged(). Note that ZONE_DEVICE memory should not be mapped as Tagged and therefore setting the memory type in arch_add_memory() is not feasible. Signed-off-by: Catalin Marinas Fixes: 0178dc761368 ("arm64: mte: Use Normal Tagged attributes for the linear map") Reported-by: Patrick Daly Tested-by: Patrick Daly Link: https://lore.kernel.org/r/1614745263-27827-1-git-send-email-pdaly@codeaurora.org Cc: # 5.10.x Cc: Will Deacon Cc: Andrew Morton Cc: Vincenzo Frascino Cc: David Hildenbrand Reviewed-by: David Hildenbrand Reviewed-by: Vincenzo Frascino Reviewed-by: Anshuman Khandual Link: https://lore.kernel.org/r/20210309122601.5543-1-catalin.marinas@arm.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable-prot.h | 1 - arch/arm64/include/asm/pgtable.h | 3 +++ arch/arm64/mm/mmu.c | 3 ++- include/linux/pgtable.h | 4 ++++ mm/memory_hotplug.c | 2 +- 5 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 046be789fbb4..9a65fb528110 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -66,7 +66,6 @@ extern bool arm64_use_ng_mappings; #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e17b96d0e4b5..47027796c2f9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -486,6 +486,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +#define pgprot_tagged(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) +#define pgprot_mhp pgprot_tagged /* * DMA allocations for non-coherent devices use what the Arm architecture calls * "Normal non-cacheable" memory, which permits speculation, unaligned accesses diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 3802cfbdd20d..9c8aa1b44cd5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -512,7 +512,8 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + flags); } /* diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index cdfc4e9f253e..5e772392a379 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -904,6 +904,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_mhp +#define pgprot_mhp(prot) (prot) +#endif + #ifdef CONFIG_MMU #ifndef pgprot_modify #define pgprot_modify pgprot_modify diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 5ba51a8bdaeb..0cdbbfbc5757 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1072,7 +1072,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { - struct mhp_params params = { .pgprot = PAGE_KERNEL }; + struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; u64 start, size; bool new_node = false; int ret; -- cgit v1.2.3 From 98b94b6e38ca0c4eeb29949c656f6a315000c23e Mon Sep 17 00:00:00 2001 From: Frieder Schrempf Date: Mon, 22 Feb 2021 12:52:20 +0100 Subject: regulator: pca9450: Clear PRESET_EN bit to fix BUCK1/2/3 voltage setting The driver uses the DVS registers PCA9450_REG_BUCKxOUT_DVS0 to set the voltage for the buck regulators 1, 2 and 3. This has no effect as the PRESET_EN bit is set by default and therefore the preset values are used instead, which are set to 850 mV. To fix this we clear the PRESET_EN bit at time of initialization. Fixes: 0935ff5f1f0a ("regulator: pca9450: add pca9450 pmic driver") Cc: Signed-off-by: Frieder Schrempf Link: https://lore.kernel.org/r/20210222115229.166620-1-frieder.schrempf@kontron.de Signed-off-by: Mark Brown --- drivers/regulator/pca9450-regulator.c | 8 ++++++++ include/linux/regulator/pca9450.h | 3 +++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index 89b806be399f..2f7ee212cb8c 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c @@ -797,6 +797,14 @@ static int pca9450_i2c_probe(struct i2c_client *i2c, return ret; } + /* Clear PRESET_EN bit in BUCK123_DVS to use DVS registers */ + ret = regmap_clear_bits(pca9450->regmap, PCA9450_REG_BUCK123_DVS, + BUCK123_PRESET_EN); + if (ret) { + dev_err(&i2c->dev, "Failed to clear PRESET_EN bit: %d\n", ret); + return ret; + } + /* Set reset behavior on assertion of WDOG_B signal */ ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_RESET_CTRL, WDOG_B_CFG_MASK, WDOG_B_CFG_COLD_LDO12); diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index ccdb5320a240..71902f41c919 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -147,6 +147,9 @@ enum { #define BUCK6_FPWM 0x04 #define BUCK6_ENMODE_MASK 0x03 +/* PCA9450_REG_BUCK123_PRESET_EN bit */ +#define BUCK123_PRESET_EN 0x80 + /* PCA9450_BUCK1OUT_DVS0 bits */ #define BUCK1OUT_DVS0_MASK 0x7F #define BUCK1OUT_DVS0_DEFAULT 0x14 -- cgit v1.2.3 From 2a92c90f2ecca4475d6050f2f938a1755a8954cc Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 1 Mar 2021 17:30:12 +0300 Subject: software node: Fix device_add_software_node() The function device_add_software_node() was meant to register the node supplied to it, but only if that node wasn't already registered. Right now the function attempts to always register the node. That will cause a failure with nodes that are already registered. Fixing that by incrementing the reference count of the nodes that have already been registered, and only registering the new nodes. Also, clarifying the behaviour in the function documentation. Fixes: e68d0119e328 ("software node: Introduce device_add_software_node()") Signed-off-by: Heikki Krogerus Reviewed-by: Andy Shevchenko Tested-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 26 +++++++++++++++++--------- include/linux/property.h | 2 +- 2 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 74db8c971db7..fa3719ef80e4 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -1005,25 +1005,33 @@ EXPORT_SYMBOL_GPL(fwnode_remove_software_node); /** * device_add_software_node - Assign software node to a device * @dev: The device the software node is meant for. - * @swnode: The software node. + * @node: The software node. * - * This function will register @swnode and make it the secondary firmware node - * pointer of @dev. If @dev has no primary node, then @swnode will become the primary - * node. + * This function will make @node the secondary firmware node pointer of @dev. If + * @dev has no primary node, then @node will become the primary node. The + * function will register @node automatically if it wasn't already registered. */ -int device_add_software_node(struct device *dev, const struct software_node *swnode) +int device_add_software_node(struct device *dev, const struct software_node *node) { + struct swnode *swnode; int ret; /* Only one software node per device. */ if (dev_to_swnode(dev)) return -EBUSY; - ret = software_node_register(swnode); - if (ret) - return ret; + swnode = software_node_to_swnode(node); + if (swnode) { + kobject_get(&swnode->kobj); + } else { + ret = software_node_register(node); + if (ret) + return ret; + + swnode = software_node_to_swnode(node); + } - set_secondary_fwnode(dev, software_node_fwnode(swnode)); + set_secondary_fwnode(dev, &swnode->fwnode); return 0; } diff --git a/include/linux/property.h b/include/linux/property.h index dafccfce0262..dd4687b56239 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -488,7 +488,7 @@ fwnode_create_software_node(const struct property_entry *properties, const struct fwnode_handle *parent); void fwnode_remove_software_node(struct fwnode_handle *fwnode); -int device_add_software_node(struct device *dev, const struct software_node *swnode); +int device_add_software_node(struct device *dev, const struct software_node *node); void device_remove_software_node(struct device *dev); int device_create_managed_software_node(struct device *dev, -- cgit v1.2.3 From ce6ed1c4c9876c2880f52f18c41ef2a30d070bc5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 4 Mar 2021 20:37:08 +0900 Subject: kbuild: rebuild GCC plugins when the compiler is upgraded Linus reported a build error due to the GCC plugin incompatibility when the compiler is upgraded. [1] GCC plugins are tied to a particular GCC version. So, they must be rebuilt when the compiler is upgraded. This seems to be a long-standing flaw since the initial support of GCC plugins. Extend commit 8b59cd81dc5e ("kbuild: ensure full rebuild when the compiler is updated"), so that GCC plugins are covered by the compiler upgrade detection. [1]: https://lore.kernel.org/lkml/CAHk-=wieoN5ttOy7SnsGwZv+Fni3R6m-Ut=oxih6bbZ28G+4dw@mail.gmail.com/ Reported-by: Linus Torvalds Signed-off-by: Masahiro Yamada Reviewed-by: Kees Cook --- Makefile | 1 + include/linux/compiler-version.h | 14 ++++++++++++++ include/linux/kconfig.h | 2 -- init/Kconfig | 8 ++++---- scripts/gcc-plugins/Makefile | 1 + 5 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 include/linux/compiler-version.h (limited to 'include/linux') diff --git a/Makefile b/Makefile index 3f9f44eac27f..10bb0a62cc7d 100644 --- a/Makefile +++ b/Makefile @@ -479,6 +479,7 @@ USERINCLUDE := \ -I$(objtree)/arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/include/uapi \ -I$(objtree)/include/generated/uapi \ + -include $(srctree)/include/linux/compiler-version.h \ -include $(srctree)/include/linux/kconfig.h # Use LINUXINCLUDE when you must reference the include/ directory. diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h new file mode 100644 index 000000000000..2b2972c77c62 --- /dev/null +++ b/include/linux/compiler-version.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifdef __LINUX_COMPILER_VERSION_H +#error "Please do not include . This is done by the build system." +#endif +#define __LINUX_COMPILER_VERSION_H + +/* + * This header exists to force full rebuild when the compiler is upgraded. + * + * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT" + * and add dependency on include/config/cc/version/text.h, which is touched + * by Kconfig when the version string from the compiler changes. + */ diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index e78e17a76dc9..24a59cb06963 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -2,8 +2,6 @@ #ifndef __LINUX_KCONFIG_H #define __LINUX_KCONFIG_H -/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */ - #include #ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/init/Kconfig b/init/Kconfig index 22946fe5ded9..30c849094c28 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -20,10 +20,10 @@ config CC_VERSION_TEXT When the compiler is updated, Kconfig will be invoked. - Ensure full rebuild when the compiler is updated - include/linux/kconfig.h contains this option in the comment line so - fixdep adds include/config/cc/version/text.h into the auto-generated - dependency. When the compiler is updated, syncconfig will touch it - and then every file will be rebuilt. + include/linux/compiler-version.h contains this option in the comment + line so fixdep adds include/config/cc/version/text.h into the + auto-generated dependency. When the compiler is updated, syncconfig + will touch it and then every file will be rebuilt. config CC_IS_GCC def_bool $(success,test "$(cc-name)" = GCC) diff --git a/scripts/gcc-plugins/Makefile b/scripts/gcc-plugins/Makefile index b5487cce69e8..1952d3bb80c6 100644 --- a/scripts/gcc-plugins/Makefile +++ b/scripts/gcc-plugins/Makefile @@ -22,6 +22,7 @@ always-y += $(GCC_PLUGIN) GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin) plugin_cxxflags = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \ + -include $(srctree)/include/linux/compiler-version.h \ -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \ -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \ -ggdb -Wno-narrowing -Wno-unused-variable \ -- cgit v1.2.3 From 659ab7a49cbebe0deffcbe1f9560e82006b21817 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Wed, 3 Mar 2021 14:32:29 +0100 Subject: drm: Use USB controller's DMA mask when importing dmabufs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit USB devices cannot perform DMA and hence have no dma_mask set in their device structure. Therefore importing dmabuf into a USB-based driver fails, which breaks joining and mirroring of display in X11. For USB devices, pick the associated USB controller as attachment device. This allows the DRM import helpers to perform the DMA setup. If the DMA controller does not support DMA transfers, we're out of luck and cannot import. Our current USB-based DRM drivers don't use DMA, so the actual DMA device is not important. Tested by joining/mirroring displays of udl and radeon under Gnome/X11. v8: * release dmadev if device initialization fails (Noralf) * fix commit description (Noralf) v7: * fix use-before-init bug in gm12u320 (Dan) v6: * implement workaround in DRM drivers and hold reference to DMA device while USB device is in use * remove dev_is_usb() (Greg) * collapse USB helper into usb_intf_get_dma_device() (Alan) * integrate Daniel's TODO statement (Daniel) * fix typos (Greg) v5: * provide a helper for USB interfaces (Alan) * add FIXME item to documentation and TODO list (Daniel) v4: * implement workaround with USB helper functions (Greg) * use struct usb_device->bus->sysdev as DMA device (Takashi) v3: * drop gem_create_object * use DMA mask of USB controller, if any (Daniel, Christian, Noralf) v2: * move fix to importer side (Christian, Daniel) * update SHMEM and CMA helpers for new PRIME callbacks Signed-off-by: Thomas Zimmermann Fixes: 6eb0233ec2d0 ("usb: don't inherity DMA properties for USB devices") Tested-by: Pavel Machek Reviewed-by: Greg Kroah-Hartman Acked-by: Christian König Acked-by: Daniel Vetter Acked-by: Noralf Trønnes Cc: Christoph Hellwig Cc: Greg Kroah-Hartman Cc: # v5.10+ Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20210303133229.3288-1-tzimmermann@suse.de Signed-off-by: Maarten Lankhorst --- Documentation/gpu/todo.rst | 21 ++++++++++++++++++++ drivers/gpu/drm/tiny/gm12u320.c | 44 +++++++++++++++++++++++++++++++++-------- drivers/gpu/drm/udl/udl_drv.c | 17 ++++++++++++++++ drivers/gpu/drm/udl/udl_drv.h | 1 + drivers/gpu/drm/udl/udl_main.c | 10 ++++++++++ drivers/usb/core/usb.c | 32 ++++++++++++++++++++++++++++++ include/linux/usb.h | 2 ++ 7 files changed, 119 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 40ccac61137e..22ce801e3a8d 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -613,6 +613,27 @@ Some of these date from the very introduction of KMS in 2008 ... Level: Intermediate +Remove automatic page mapping from dma-buf importing +---------------------------------------------------- + +When importing dma-bufs, the dma-buf and PRIME frameworks automatically map +imported pages into the importer's DMA area. drm_gem_prime_fd_to_handle() and +drm_gem_prime_handle_to_fd() require that importers call dma_buf_attach() +even if they never do actual device DMA, but only CPU access through +dma_buf_vmap(). This is a problem for USB devices, which do not support DMA +operations. + +To fix the issue, automatic page mappings should be removed from the +buffer-sharing code. Fixing this is a bit more involved, since the import/export +cache is also tied to &drm_gem_object.import_attach. Meanwhile we paper over +this problem for USB devices by fishing out the USB host controller device, as +long as that supports DMA. Otherwise importing can still needlessly fail. + +Contact: Thomas Zimmermann , Daniel Vetter + +Level: Advanced + + Better Testing ============== diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c index 33f65f4626e5..23866a54e3f9 100644 --- a/drivers/gpu/drm/tiny/gm12u320.c +++ b/drivers/gpu/drm/tiny/gm12u320.c @@ -83,6 +83,7 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); struct gm12u320_device { struct drm_device dev; + struct device *dmadev; struct drm_simple_display_pipe pipe; struct drm_connector conn; unsigned char *cmd_buf; @@ -601,6 +602,22 @@ static const uint64_t gm12u320_pipe_modifiers[] = { DRM_FORMAT_MOD_INVALID }; +/* + * FIXME: Dma-buf sharing requires DMA support by the importing device. + * This function is a workaround to make USB devices work as well. + * See todo.rst for how to fix the issue in the dma-buf framework. + */ +static struct drm_gem_object *gm12u320_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct gm12u320_device *gm12u320 = to_gm12u320(dev); + + if (!gm12u320->dmadev) + return ERR_PTR(-ENODEV); + + return drm_gem_prime_import_dev(dev, dma_buf, gm12u320->dmadev); +} + DEFINE_DRM_GEM_FOPS(gm12u320_fops); static const struct drm_driver gm12u320_drm_driver = { @@ -614,6 +631,7 @@ static const struct drm_driver gm12u320_drm_driver = { .fops = &gm12u320_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .gem_prime_import = gm12u320_gem_prime_import, }; static const struct drm_mode_config_funcs gm12u320_mode_config_funcs = { @@ -640,15 +658,18 @@ static int gm12u320_usb_probe(struct usb_interface *interface, struct gm12u320_device, dev); if (IS_ERR(gm12u320)) return PTR_ERR(gm12u320); + dev = &gm12u320->dev; + + gm12u320->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); + if (!gm12u320->dmadev) + drm_warn(dev, "buffer sharing not supported"); /* not an error */ INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work); mutex_init(&gm12u320->fb_update.lock); - dev = &gm12u320->dev; - ret = drmm_mode_config_init(dev); if (ret) - return ret; + goto err_put_device; dev->mode_config.min_width = GM12U320_USER_WIDTH; dev->mode_config.max_width = GM12U320_USER_WIDTH; @@ -658,15 +679,15 @@ static int gm12u320_usb_probe(struct usb_interface *interface, ret = gm12u320_usb_alloc(gm12u320); if (ret) - return ret; + goto err_put_device; ret = gm12u320_set_ecomode(gm12u320); if (ret) - return ret; + goto err_put_device; ret = gm12u320_conn_init(gm12u320); if (ret) - return ret; + goto err_put_device; ret = drm_simple_display_pipe_init(&gm12u320->dev, &gm12u320->pipe, @@ -676,24 +697,31 @@ static int gm12u320_usb_probe(struct usb_interface *interface, gm12u320_pipe_modifiers, &gm12u320->conn); if (ret) - return ret; + goto err_put_device; drm_mode_config_reset(dev); usb_set_intfdata(interface, dev); ret = drm_dev_register(dev, 0); if (ret) - return ret; + goto err_put_device; drm_fbdev_generic_setup(dev, 0); return 0; + +err_put_device: + put_device(gm12u320->dmadev); + return ret; } static void gm12u320_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); + struct gm12u320_device *gm12u320 = to_gm12u320(dev); + put_device(gm12u320->dmadev); + gm12u320->dmadev = NULL; drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); } diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 9269092697d8..5703277c6f52 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -32,6 +32,22 @@ static int udl_usb_resume(struct usb_interface *interface) return drm_mode_config_helper_resume(dev); } +/* + * FIXME: Dma-buf sharing requires DMA support by the importing device. + * This function is a workaround to make USB devices work as well. + * See todo.rst for how to fix the issue in the dma-buf framework. + */ +static struct drm_gem_object *udl_driver_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct udl_device *udl = to_udl(dev); + + if (!udl->dmadev) + return ERR_PTR(-ENODEV); + + return drm_gem_prime_import_dev(dev, dma_buf, udl->dmadev); +} + DEFINE_DRM_GEM_FOPS(udl_driver_fops); static const struct drm_driver driver = { @@ -40,6 +56,7 @@ static const struct drm_driver driver = { /* GEM hooks */ .fops = &udl_driver_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .gem_prime_import = udl_driver_gem_prime_import, .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 875e73551ae9..cc16a13316e4 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -50,6 +50,7 @@ struct urb_list { struct udl_device { struct drm_device drm; struct device *dev; + struct device *dmadev; struct drm_simple_display_pipe display_pipe; diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 0e2a376cb075..853f147036f6 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -315,6 +315,10 @@ int udl_init(struct udl_device *udl) DRM_DEBUG("\n"); + udl->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); + if (!udl->dmadev) + drm_warn(dev, "buffer sharing not supported"); /* not an error */ + mutex_init(&udl->gem_lock); if (!udl_parse_vendor_descriptor(udl)) { @@ -343,12 +347,18 @@ int udl_init(struct udl_device *udl) err: if (udl->urbs.count) udl_free_urb_list(dev); + put_device(udl->dmadev); DRM_ERROR("%d\n", ret); return ret; } int udl_drop_usb(struct drm_device *dev) { + struct udl_device *udl = to_udl(dev); + udl_free_urb_list(dev); + put_device(udl->dmadev); + udl->dmadev = NULL; + return 0; } diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 8f07b0516100..a566bb494e24 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -748,6 +748,38 @@ void usb_put_intf(struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usb_put_intf); +/** + * usb_intf_get_dma_device - acquire a reference on the usb interface's DMA endpoint + * @intf: the usb interface + * + * While a USB device cannot perform DMA operations by itself, many USB + * controllers can. A call to usb_intf_get_dma_device() returns the DMA endpoint + * for the given USB interface, if any. The returned device structure must be + * released with put_device(). + * + * See also usb_get_dma_device(). + * + * Returns: A reference to the usb interface's DMA endpoint; or NULL if none + * exists. + */ +struct device *usb_intf_get_dma_device(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct device *dmadev; + + if (!udev->bus) + return NULL; + + dmadev = get_device(udev->bus->sysdev); + if (!dmadev || !dmadev->dma_mask) { + put_device(dmadev); + return NULL; + } + + return dmadev; +} +EXPORT_SYMBOL_GPL(usb_intf_get_dma_device); + /* USB device locking * * USB devices and interfaces are locked using the semaphore in their diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d72c4e0713c..d6a41841b93e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -746,6 +746,8 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, extern int usb_reset_device(struct usb_device *dev); extern void usb_queue_reset_device(struct usb_interface *dev); +extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); + #ifdef CONFIG_ACPI extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable); -- cgit v1.2.3 From a8affc03a9b375e19bc81573de0c9108317d78c7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Mar 2021 12:01:37 +0100 Subject: block: rename BIO_MAX_PAGES to BIO_MAX_VECS Ever since the addition of multipage bio_vecs BIO_MAX_PAGES has been horribly confusingly misnamed. Rename it to BIO_MAX_VECS to stop confusing users of the bio API. Signed-off-by: Christoph Hellwig Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20210311110137.1132391-2-hch@lst.de Signed-off-by: Jens Axboe --- block/bio.c | 14 +++++++------- block/blk-crypto-fallback.c | 2 +- block/blk-lib.c | 2 +- block/blk-map.c | 2 +- block/bounce.c | 6 +++--- drivers/block/drbd/drbd_int.h | 2 +- drivers/md/bcache/super.c | 2 +- drivers/md/dm-crypt.c | 8 ++++---- drivers/md/dm-writecache.c | 4 ++-- drivers/md/raid5-cache.c | 4 ++-- drivers/md/raid5-ppl.c | 2 +- drivers/nvme/target/passthru.c | 6 +++--- fs/block_dev.c | 6 +++--- fs/btrfs/extent_io.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/crypto/bio.c | 6 +++--- fs/erofs/zdata.c | 2 +- fs/ext4/page-io.c | 2 +- fs/f2fs/checkpoint.c | 2 +- fs/f2fs/data.c | 4 ++-- fs/f2fs/segment.c | 2 +- fs/f2fs/segment.h | 4 ++-- fs/f2fs/super.c | 4 ++-- fs/gfs2/lops.c | 2 +- fs/iomap/buffered-io.c | 4 ++-- fs/iomap/direct-io.c | 4 ++-- fs/mpage.c | 2 +- fs/nilfs2/segbuf.c | 2 +- fs/squashfs/block.c | 2 +- fs/zonefs/super.c | 2 +- include/linux/bio.h | 4 ++-- 31 files changed, 56 insertions(+), 56 deletions(-) (limited to 'include/linux') diff --git a/block/bio.c b/block/bio.c index a1c4d2900c7a..26b7f721cda8 100644 --- a/block/bio.c +++ b/block/bio.c @@ -33,7 +33,7 @@ static struct biovec_slab { { .nr_vecs = 16, .name = "biovec-16" }, { .nr_vecs = 64, .name = "biovec-64" }, { .nr_vecs = 128, .name = "biovec-128" }, - { .nr_vecs = BIO_MAX_PAGES, .name = "biovec-max" }, + { .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" }, }; static struct biovec_slab *biovec_slab(unsigned short nr_vecs) @@ -46,7 +46,7 @@ static struct biovec_slab *biovec_slab(unsigned short nr_vecs) return &bvec_slabs[1]; case 65 ... 128: return &bvec_slabs[2]; - case 129 ... BIO_MAX_PAGES: + case 129 ... BIO_MAX_VECS: return &bvec_slabs[3]; default: BUG(); @@ -151,9 +151,9 @@ out: void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs) { - BIO_BUG_ON(nr_vecs > BIO_MAX_PAGES); + BIO_BUG_ON(nr_vecs > BIO_MAX_VECS); - if (nr_vecs == BIO_MAX_PAGES) + if (nr_vecs == BIO_MAX_VECS) mempool_free(bv, pool); else if (nr_vecs > BIO_INLINE_VECS) kmem_cache_free(biovec_slab(nr_vecs)->slab, bv); @@ -186,15 +186,15 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, /* * Try a slab allocation first for all smaller allocations. If that * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool. - * The mempool is sized to handle up to BIO_MAX_PAGES entries. + * The mempool is sized to handle up to BIO_MAX_VECS entries. */ - if (*nr_vecs < BIO_MAX_PAGES) { + if (*nr_vecs < BIO_MAX_VECS) { struct bio_vec *bvl; bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask)); if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM)) return bvl; - *nr_vecs = BIO_MAX_PAGES; + *nr_vecs = BIO_MAX_VECS; } return mempool_alloc(pool, gfp_mask); diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index c176b7af56a7..c322176a1e09 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -219,7 +219,7 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr) bio_for_each_segment(bv, bio, iter) { num_sectors += bv.bv_len >> SECTOR_SHIFT; - if (++i == BIO_MAX_PAGES) + if (++i == BIO_MAX_VECS) break; } if (num_sectors < bio_sectors(bio)) { diff --git a/block/blk-lib.c b/block/blk-lib.c index 752f9c722062..7b256131b20b 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -296,7 +296,7 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) { sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); - return min(pages, (sector_t)BIO_MAX_PAGES); + return min(pages, (sector_t)BIO_MAX_VECS); } static int __blkdev_issue_zero_pages(struct block_device *bdev, diff --git a/block/blk-map.c b/block/blk-map.c index 369e204d14d0..1ffef782fcf2 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -249,7 +249,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, if (!iov_iter_count(iter)) return -EINVAL; - bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); + bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS)); if (!bio) return -ENOMEM; bio->bi_opf |= req_op(rq); diff --git a/block/bounce.c b/block/bounce.c index 87983a35079c..6c441f4f1cd4 100644 --- a/block/bounce.c +++ b/block/bounce.c @@ -229,10 +229,10 @@ static struct bio *bounce_clone_bio(struct bio *bio_src) * - The point of cloning the biovec is to produce a bio with a biovec * the caller can modify: bi_idx and bi_bvec_done should be 0. * - * - The original bio could've had more than BIO_MAX_PAGES biovecs; if + * - The original bio could've had more than BIO_MAX_VECS biovecs; if * we tried to clone the whole thing bio_alloc_bioset() would fail. * But the clone should succeed as long as the number of biovecs we - * actually need to allocate is fewer than BIO_MAX_PAGES. + * actually need to allocate is fewer than BIO_MAX_VECS. * * - Lastly, bi_vcnt should not be looked at or relied upon by code * that does not own the bio - reason being drivers don't use it for @@ -299,7 +299,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, int sectors = 0; bio_for_each_segment(from, *bio_orig, iter) { - if (i++ < BIO_MAX_PAGES) + if (i++ < BIO_MAX_VECS) sectors += from.bv_len >> 9; if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn) bounce = true; diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 7d9cc433b758..5d9181382ce1 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1324,7 +1324,7 @@ struct bm_extent { * A followup commit may allow even bigger BIO sizes, * once we thought that through. */ #define DRBD_MAX_BIO_SIZE (1U << 20) -#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT) +#if DRBD_MAX_BIO_SIZE > (BIO_MAX_VECS << PAGE_SHIFT) #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE #endif #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12) /* Works always = 4k */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 71691f32959b..03e1fe4de53d 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -965,7 +965,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, q->limits.max_hw_sectors = UINT_MAX; q->limits.max_sectors = UINT_MAX; q->limits.max_segment_size = UINT_MAX; - q->limits.max_segments = BIO_MAX_PAGES; + q->limits.max_segments = BIO_MAX_VECS; blk_queue_max_discard_sectors(q, UINT_MAX); q->limits.discard_granularity = 512; q->limits.io_min = block_size; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 11c105ecd165..b0ab080f2567 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -229,7 +229,7 @@ static DEFINE_SPINLOCK(dm_crypt_clients_lock); static unsigned dm_crypt_clients_n = 0; static volatile unsigned long dm_crypt_pages_per_client; #define DM_CRYPT_MEMORY_PERCENT 2 -#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_PAGES * 16) +#define DM_CRYPT_MIN_PAGES_PER_CLIENT (BIO_MAX_VECS * 16) static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); @@ -3246,7 +3246,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size, ARCH_KMALLOC_MINALIGN); - ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc); + ret = mempool_init(&cc->page_pool, BIO_MAX_VECS, crypt_page_alloc, crypt_page_free, cc); if (ret) { ti->error = "Cannot allocate page mempool"; goto bad; @@ -3373,9 +3373,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) /* * Check if bio is too large, split as needed. */ - if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) && (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size)) - dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT)); /* * Ensure that bio is a multiple of internal sector encryption size diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 844c4be11768..4f72b6f66c3a 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -1892,10 +1892,10 @@ restart: list_add(&g->lru, &wbl.list); wbl.size++; g->write_in_progress = true; - g->wc_list_contiguous = BIO_MAX_PAGES; + g->wc_list_contiguous = BIO_MAX_VECS; f = g; e->wc_list_contiguous++; - if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) { + if (unlikely(e->wc_list_contiguous == BIO_MAX_VECS)) { if (unlikely(wc->writeback_all)) { next_node = rb_next(&f->rb_node); if (likely(next_node)) diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 4337ae0e6af2..0b5dcaabbc15 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -735,7 +735,7 @@ static void r5l_submit_current_io(struct r5l_log *log) static struct bio *r5l_bio_alloc(struct r5l_log *log) { - struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs); + struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs); bio_set_op_attrs(bio, REQ_OP_WRITE, 0); bio_set_dev(bio, log->rdev->bdev); @@ -1634,7 +1634,7 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log, { struct page *page; - ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs); + ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs); if (!ctx->ra_bio) return -ENOMEM; diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index e8c118e05dfd..3ddc2aa0b530 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -496,7 +496,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) { struct bio *prev = bio; - bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, + bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &ppl_conf->bs); bio->bi_opf = prev->bi_opf; bio->bi_write_hint = prev->bi_write_hint; diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 26c587ccd152..2798944899b7 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -50,9 +50,9 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) /* * nvmet_passthru_map_sg is limitted to using a single bio so limit - * the mdts based on BIO_MAX_PAGES as well + * the mdts based on BIO_MAX_VECS as well */ - max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9), + max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9), max_hw_sectors); page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; @@ -191,7 +191,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) struct bio *bio; int i; - if (req->sg_cnt > BIO_MAX_PAGES) + if (req->sg_cnt > BIO_MAX_VECS) return -EINVAL; if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { diff --git a/fs/block_dev.c b/fs/block_dev.c index 03166b3dea4d..92ed7d5df677 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -432,7 +432,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES); + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); if (!nr_pages) { bool polled = false; @@ -500,8 +500,8 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (!iov_iter_count(iter)) return 0; - nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES + 1); - if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES) + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); + if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) return __blkdev_direct_IO_simple(iocb, iter, nr_pages); return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4dfb3ead1175..db8cb98c020c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3048,7 +3048,7 @@ struct bio *btrfs_bio_alloc(u64 first_byte) { struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset); bio->bi_iter.bi_sector = first_byte >> 9; btrfs_io_bio_init(btrfs_io_bio(bio)); return bio; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 582df11d298a..6daa4309c974 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1428,7 +1428,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info, if (!first_page->dev->bdev) goto out; - bio = btrfs_io_bio_alloc(BIO_MAX_PAGES); + bio = btrfs_io_bio_alloc(BIO_MAX_VECS); bio_set_dev(bio, first_page->dev->bdev); for (page_num = 0; page_num < sblock->page_count; page_num++) { diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index b048a0e38516..68a2de6b5a9b 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -52,7 +52,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, int num_pages = 0; /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */ - bio = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); + bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS); while (len) { unsigned int blocks_this_page = min(len, blocks_per_page); @@ -74,7 +74,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode, len -= blocks_this_page; lblk += blocks_this_page; pblk += blocks_this_page; - if (num_pages == BIO_MAX_PAGES || !len || + if (num_pages == BIO_MAX_VECS || !len || !fscrypt_mergeable_bio(bio, inode, lblk)) { err = submit_bio_wait(bio); if (err) @@ -126,7 +126,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return fscrypt_zeroout_range_inline_crypt(inode, lblk, pblk, len); - BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES); + BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS); nr_pages = min_t(unsigned int, ARRAY_SIZE(pages), (len + blocks_per_page - 1) >> blocks_per_page_bits); diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 6cb356c4217b..3851e1a64f73 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1235,7 +1235,7 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); bio->bi_end_io = z_erofs_decompressqueue_endio; bio_set_dev(bio, sb->s_bdev); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 03a44a0de86a..f038d578d8d8 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -398,7 +398,7 @@ static void io_submit_init_bio(struct ext4_io_submit *io, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset(). */ - bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); bio_set_dev(bio, bh->b_bdev); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 174a0819ad96..be5415a0dbbc 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -292,7 +292,7 @@ void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) f2fs_put_page(page, 0); if (readahead) - f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7c95818639a6..4e5257c763d0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -857,7 +857,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); alloc_new: if (!bio) { - bio = __bio_alloc(fio, BIO_MAX_PAGES); + bio = __bio_alloc(fio, BIO_MAX_VECS); __attach_io_flag(fio); f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, fio->page->index, fio, GFP_NOIO); @@ -932,7 +932,7 @@ alloc_new: fio->retry = true; goto skip; } - io->bio = __bio_alloc(fio, BIO_MAX_PAGES); + io->bio = __bio_alloc(fio, BIO_MAX_VECS); f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, bio_page->index, fio, GFP_NOIO); io->fio = *fio; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 993004f06a77..c2866561263e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -4381,7 +4381,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi) block_t total_node_blocks = 0; do { - readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES, + readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS, META_SIT, true); start = start_blk * sit_i->sents_per_block; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 229814b4f4a6..e9a7a637d688 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -851,7 +851,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) else if (type == NODE) return 8 * sbi->blocks_per_seg; else if (type == META) - return 8 * BIO_MAX_PAGES; + return 8 * BIO_MAX_VECS; else return 0; } @@ -868,7 +868,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, return 0; nr_to_write = wbc->nr_to_write; - desired = BIO_MAX_PAGES; + desired = BIO_MAX_VECS; if (type == NODE) desired <<= 1; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7069793752f1..82592b19b4e0 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -753,9 +753,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_io_size_bits: if (args->from && match_int(args, &arg)) return -EINVAL; - if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) { + if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) { f2fs_warn(sbi, "Not support %d, larger than %d", - 1 << arg, BIO_MAX_PAGES); + 1 << arg, BIO_MAX_VECS); return -EINVAL; } F2FS_OPTION(sbi).write_io_size_bits = arg; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index dc1b93a877c6..a82f4747aa8d 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -267,7 +267,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, bio_end_io_t *end_io) { struct super_block *sb = sdp->sd_vfs; - struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); + struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS); bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; bio_set_dev(bio, sb->s_bdev); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7ffcd7ef33d4..414769a6ad11 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1221,7 +1221,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend; struct bio *bio; - bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &iomap_ioend_bioset); + bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset); bio_set_dev(bio, wpc->iomap.bdev); bio->bi_iter.bi_sector = sector; bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); @@ -1252,7 +1252,7 @@ iomap_chain_bio(struct bio *prev) { struct bio *new; - new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); + new = bio_alloc(GFP_NOFS, BIO_MAX_VECS); bio_copy_dev(new, prev);/* also copies over blkcg information */ new->bi_iter.bi_sector = bio_end_sector(prev); new->bi_opf = prev->bi_opf; diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index e2c4991833b8..bdd0d89bbf0a 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -296,7 +296,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, */ bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua); - nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_PAGES); + nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS); do { size_t n; if (dio->error) { @@ -338,7 +338,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, copied += n; nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, - BIO_MAX_PAGES); + BIO_MAX_VECS); iomap_dio_submit_bio(dio, iomap, bio, pos); pos += n; } while (nr_pages); diff --git a/fs/mpage.c b/fs/mpage.c index 961234d68779..334e7d09aa65 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -616,7 +616,7 @@ alloc_new: goto out; } bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH); + BIO_MAX_VECS, GFP_NOFS|__GFP_HIGH); if (bio == NULL) goto confused; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 1e75417bfe6e..56872e93823d 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -399,7 +399,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; - wi->max_pages = BIO_MAX_PAGES; + wi->max_pages = BIO_MAX_VECS; wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; wi->blocknr = segbuf->sb_pseg_start; diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 45f44425d856..b9e87ebb1060 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -87,7 +87,7 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length, int error, i; struct bio *bio; - if (page_count <= BIO_MAX_PAGES) + if (page_count <= BIO_MAX_VECS) bio = bio_alloc(GFP_NOIO, page_count); else bio = bio_kmalloc(GFP_NOIO, page_count); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index b6ff4a21abac..0fe76f376dee 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -684,7 +684,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize); iov_iter_truncate(from, max); - nr_pages = iov_iter_npages(from, BIO_MAX_PAGES); + nr_pages = iov_iter_npages(from, BIO_MAX_VECS); if (!nr_pages) return 0; diff --git a/include/linux/bio.h b/include/linux/bio.h index 983ed2fe7c85..d0246c92a6e8 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -20,11 +20,11 @@ #define BIO_BUG_ON #endif -#define BIO_MAX_PAGES 256U +#define BIO_MAX_VECS 256U static inline unsigned int bio_max_segs(unsigned int nr_segs) { - return min(nr_segs, BIO_MAX_PAGES); + return min(nr_segs, BIO_MAX_VECS); } #define bio_prio(bio) (bio)->bi_ioprio -- cgit v1.2.3 From 34dc2efb39a231280fd6696a59bbe712bf3c5c4a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:01 -0800 Subject: memblock: fix section mismatch warning The inlining logic in clang-13 is rewritten to often not inline some functions that were inlined by all earlier compilers. In case of the memblock interfaces, this exposed a harmless bug of a missing __init annotation: WARNING: modpost: vmlinux.o(.text+0x507c0a): Section mismatch in reference from the function memblock_bottom_up() to the variable .meminit.data:memblock The function memblock_bottom_up() references the variable __meminitdata memblock. This is often because memblock_bottom_up lacks a __meminitdata annotation or the annotation of memblock is wrong. Interestingly, these annotations were present originally, but got removed with the explanation that the __init annotation prevents the function from getting inlined. I checked this again and found that while this is the case with clang, gcc (version 7 through 10, did not test others) does inline the functions regardless. As the previous change was apparently intended to help the clang builds, reverting it to help the newer clang versions seems appropriate as well. gcc builds don't seem to care either way. Link: https://lkml.kernel.org/r/20210225133808.2188581-1-arnd@kernel.org Fixes: 5bdba520c1b3 ("mm: memblock: drop __init from memblock functions to make it inline") Reference: 2cfb3665e864 ("include/linux/memblock.h: add __init to memblock_set_bottom_up()") Signed-off-by: Arnd Bergmann Reviewed-by: David Hildenbrand Reviewed-by: Mike Rapoport Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Faiyaz Mohammed Cc: Baoquan He Cc: Thomas Bogendoerfer Cc: Aslan Bakirov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c88bc24e31aa..d13e3cd938b4 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline void memblock_set_bottom_up(bool enable) +static inline __init void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } @@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable) * if this is true, that said, memblock will allocate memory * in bottom-up direction. */ -static inline bool memblock_bottom_up(void) +static inline __init bool memblock_bottom_up(void) { return memblock.bottom_up; } -- cgit v1.2.3 From cbf78d85079cee662c45749ef4f744d41be85d48 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:04 -0800 Subject: stop_machine: mark helpers __always_inline With clang-13, some functions only get partially inlined, with a specialized version referring to a global variable. This triggers a harmless build-time check for the intel-rng driver: WARNING: modpost: drivers/char/hw_random/intel-rng.o(.text+0xe): Section mismatch in reference from the function stop_machine() to the function .init.text:intel_rng_hw_init() The function stop_machine() references the function __init intel_rng_hw_init(). This is often because stop_machine lacks a __init annotation or the annotation of intel_rng_hw_init is wrong. In this instance, an easy workaround is to force the stop_machine() function to be inline, along with related interfaces that did not show the same behavior at the moment, but theoretically could. The combination of the two patches listed below triggers the behavior in clang-13, but individually these commits are correct. Link: https://lkml.kernel.org/r/20210225130153.1956990-1-arnd@kernel.org Fixes: fe5595c07400 ("stop_machine: Provide stop_machine_cpuslocked()") Fixes: ee527cd3a20c ("Use stop_machine_run in the Intel RNG driver") Signed-off-by: Arnd Bergmann Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: "Paul E. McKenney" Cc: Ingo Molnar Cc: Prarit Bhargava Cc: Daniel Bristot de Oliveira Cc: Peter Zijlstra Cc: Valentin Schneider Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/stop_machine.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 30577c3aecf8..46fb3ebdd16e 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ -static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, +static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, return ret; } -static inline int stop_machine(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { return stop_machine_cpuslocked(fn, data, cpus); } -static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) { return stop_machine(fn, data, cpus); } -- cgit v1.2.3 From 82e69a121be4b1597ce758534816a8ee04c8b761 Mon Sep 17 00:00:00 2001 From: Fenghua Yu Date: Fri, 12 Mar 2021 21:07:15 -0800 Subject: mm/fork: clear PASID for new mm When a new mm is created, its PASID should be cleared, i.e. the PASID is initialized to its init state 0 on both ARM and X86. This patch was part of the series introducing mm->pasid, but got lost along the way [1]. It still makes sense to have it, because each address space has a different PASID. And the IOMMU code in iommu_sva_alloc_pasid() expects the pasid field of a new mm struct to be cleared. [1] https://lore.kernel.org/linux-iommu/YDgh53AcQHT+T3L0@otcwcpicx3.sc.intel.com/ Link: https://lkml.kernel.org/r/20210302103837.2562625-1-jean-philippe@linaro.org Signed-off-by: Fenghua Yu Signed-off-by: Jean-Philippe Brucker Reviewed-by: Tony Luck Cc: Jacob Pan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 + kernel/fork.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0974ad501a47..6613b26a8894 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) +#define INIT_PASID 0 struct address_space; struct mem_cgroup; diff --git a/kernel/fork.c b/kernel/fork.c index d3171e8e88e5..54cc905e5fe0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -994,6 +994,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } +static void mm_init_pasid(struct mm_struct *mm) +{ +#ifdef CONFIG_IOMMU_SUPPORT + mm->pasid = INIT_PASID; +#endif +} + static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1024,6 +1031,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + mm_init_pasid(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); -- cgit v1.2.3 From 97a7e4733b9b221d012ae68fcd3b3251febf6341 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 12 Mar 2021 21:07:26 -0800 Subject: mm: introduce page_needs_cow_for_dma() for deciding whether cow We've got quite a few places (pte, pmd, pud) that explicitly checked against whether we should break the cow right now during fork(). It's easier to provide a helper, especially before we work the same thing on hugetlbfs. Since we'll reference is_cow_mapping() in mm.h, move it there too. Actually it suites mm.h more since internal.h is mm/ only, but mm.h is exported to the whole kernel. With that we should expect another patch to use is_cow_mapping() whenever we can across the kernel since we do use it quite a lot but it's always done with raw code against VM_* flags. Link: https://lkml.kernel.org/r/20210217233547.93892-4-peterx@redhat.com Signed-off-by: Peter Xu Reviewed-by: Jason Gunthorpe Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Christoph Hellwig Cc: Daniel Vetter Cc: David Airlie Cc: David Gibson Cc: Gal Pressman Cc: Jan Kara Cc: Jann Horn Cc: Kirill Shutemov Cc: Kirill Tkhai Cc: Matthew Wilcox Cc: Miaohe Lin Cc: Mike Kravetz Cc: Mike Rapoport Cc: Roland Scheidegger Cc: VMware Graphics Cc: Wei Zhang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++++++++++++++++++ mm/huge_memory.c | 8 ++------ mm/internal.h | 5 ----- mm/memory.c | 8 +------- 4 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 77e64e3eac80..64a71bf20536 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page) GUP_PIN_COUNTING_BIAS; } +static inline bool is_cow_mapping(vm_flags_t flags) +{ + return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; +} + +/* + * This should most likely only be called during fork() to see whether we + * should break the cow immediately for a page on the src mm. + */ +static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma, + struct page *page) +{ + if (!is_cow_mapping(vma->vm_flags)) + return false; + + if (!atomic_read(&vma->vm_mm->has_pinned)) + return false; + + return page_maybe_dma_pinned(page); +} + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 395c75111d33..da1d63a41aec 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1100,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, * best effort that the pinned pages won't be replaced by another * random page during the coming copy-on-write. */ - if (unlikely(is_cow_mapping(vma->vm_flags) && - atomic_read(&src_mm->has_pinned) && - page_maybe_dma_pinned(src_page))) { + if (unlikely(page_needs_cow_for_dma(vma, src_page))) { pte_free(dst_mm, pgtable); spin_unlock(src_ptl); spin_unlock(dst_ptl); @@ -1214,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, } /* Please refer to comments in copy_huge_pmd() */ - if (unlikely(is_cow_mapping(vma->vm_flags) && - atomic_read(&src_mm->has_pinned) && - page_maybe_dma_pinned(pud_page(pud)))) { + if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) { spin_unlock(src_ptl); spin_unlock(dst_ptl); __split_huge_pud(vma, src_pud, addr); diff --git a/mm/internal.h b/mm/internal.h index 9902648f2206..1432feec62df 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page) */ #define buddy_order_unsafe(page) READ_ONCE(page_private(page)) -static inline bool is_cow_mapping(vm_flags_t flags) -{ - return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; -} - /* * These three helpers classifies VMAs for virtual memory accounting. */ diff --git a/mm/memory.c b/mm/memory.c index c8e357627318..523230005db1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss, struct page **prealloc, pte_t pte, struct page *page) { - struct mm_struct *src_mm = src_vma->vm_mm; struct page *new_page; - if (!is_cow_mapping(src_vma->vm_flags)) - return 1; - /* * What we want to do is to check whether this page may * have been pinned by the parent process. If so, @@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma * the page count. That might give false positives for * for pinning, but it will work correctly. */ - if (likely(!atomic_read(&src_mm->has_pinned))) - return 1; - if (likely(!page_maybe_dma_pinned(page))) + if (likely(!page_needs_cow_for_dma(src_vma, page))) return 1; new_page = *prealloc; -- cgit v1.2.3 From 97e4910232fa1f81e806aa60c25a0450276d99a2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 12 Mar 2021 21:07:47 -0800 Subject: linux/compiler-clang.h: define HAVE_BUILTIN_BSWAP* Separating compiler-clang.h from compiler-gcc.h inadventently dropped the definitions of the three HAVE_BUILTIN_BSWAP macros, which requires falling back to the open-coded version and hoping that the compiler detects it. Since all versions of clang support the __builtin_bswap interfaces, add back the flags and have the headers pick these up automatically. This results in a 4% improvement of compilation speed for arm defconfig. Note: it might also be worth revisiting which architectures set CONFIG_ARCH_USE_BUILTIN_BSWAP for one compiler or the other, today this is set on six architectures (arm32, csky, mips, powerpc, s390, x86), while another ten architectures define custom helpers (alpha, arc, ia64, m68k, mips, nios2, parisc, sh, sparc, xtensa), and the rest (arm64, h8300, hexagon, microblaze, nds32, openrisc, riscv) just get the unoptimized version and rely on the compiler to detect it. A long time ago, the compiler builtins were architecture specific, but nowadays, all compilers that are able to build the kernel have correct implementations of them, though some may not be as optimized as the inline asm versions. The patch that dropped the optimization landed in v4.19, so as discussed it would be fairly safe to backport this revert to stable kernels to the 4.19/5.4/5.10 stable kernels, but there is a remaining risk for regressions, and it has no known side-effects besides compile speed. Link: https://lkml.kernel.org/r/20210226161151.2629097-1-arnd@kernel.org Link: https://lore.kernel.org/lkml/20210225164513.3667778-1-arnd@kernel.org/ Fixes: 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Reviewed-by: Kees Cook Acked-by: Miguel Ojeda Acked-by: Nick Desaulniers Acked-by: Luc Van Oostenryck Cc: Masahiro Yamada Cc: Nick Hu Cc: Greentime Hu Cc: Vincent Chen Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Cc: Guo Ren Cc: Randy Dunlap Cc: Sami Tolvanen Cc: Marco Elver Cc: Arvind Sankar Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 04c0a5a717f7..d217c382b02d 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -31,6 +31,12 @@ #define __no_sanitize_thread #endif +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + #if __has_feature(undefined_behavior_sanitizer) /* GCC does not have __SANITIZE_UNDEFINED__ */ #define __no_sanitize_undefined \ -- cgit v1.2.3 From 149fc787353f65b7e72e05e7b75d34863266c3e2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 12 Mar 2021 21:08:03 -0800 Subject: include/linux/sched/mm.h: use rcu_dereference in in_vfork() Fix a sparse warning by using rcu_dereference(). Technically this is a bug and a sufficiently aggressive compiler could reload the `real_parent' pointer outside the protection of the rcu lock (and access freed memory), but I think it's pretty unlikely to happen. Link: https://lkml.kernel.org/r/20210221194207.1351703-1-willy@infradead.org Fixes: b18dc5f291c0 ("mm, oom: skip vforked tasks from being selected") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Miaohe Lin Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1ae08b8462a4..90b2a0bce11c 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk) * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + ret = tsk->vfork_done && + rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; -- cgit v1.2.3 From be6c8982e4ab9a41907555f601b711a7e2a17d4c Mon Sep 17 00:00:00 2001 From: Zhou Guanghui Date: Fri, 12 Mar 2021 21:08:30 -0800 Subject: mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument Rename mem_cgroup_split_huge_fixup to split_page_memcg and explicitly pass in page number argument. In this way, the interface name is more common and can be used by potential users. In addition, the complete info(memcg and flag) of the memcg needs to be set to the tail pages. Link: https://lkml.kernel.org/r/20210304074053.65527-2-zhouguanghui1@huawei.com Signed-off-by: Zhou Guanghui Acked-by: Johannes Weiner Reviewed-by: Zi Yan Reviewed-by: Shakeel Butt Acked-by: Michal Hocko Cc: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Nicholas Piggin Cc: Kefeng Wang Cc: Hanjun Guo Cc: Tianhong Ding Cc: Weilong Chen Cc: Rui Xiang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- mm/huge_memory.c | 2 +- mm/memcontrol.c | 15 ++++++--------- 3 files changed, 9 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e6dc793d587d..0c04d39a7967 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -void mem_cgroup_split_huge_fixup(struct page *head); -#endif +void split_page_memcg(struct page *head, unsigned int nr); #else /* CONFIG_MEMCG */ @@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, return 0; } -static inline void mem_cgroup_split_huge_fixup(struct page *head) +static inline void split_page_memcg(struct page *head, unsigned int nr) { } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index da1d63a41aec..ae907a9c2050 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2467,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, int i; /* complete memcg works before add pages to LRU */ - mem_cgroup_split_huge_fixup(head); + split_page_memcg(head, nr); if (PageAnon(head) && PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 845eec01ef9d..e064ac0d850a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3287,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) #endif /* CONFIG_MEMCG_KMEM */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* - * Because page_memcg(head) is not set on compound tails, set it now. + * Because page_memcg(head) is not set on tails, set it now. */ -void mem_cgroup_split_huge_fixup(struct page *head) +void split_page_memcg(struct page *head, unsigned int nr) { struct mem_cgroup *memcg = page_memcg(head); int i; - if (mem_cgroup_disabled()) + if (mem_cgroup_disabled() || !memcg) return; - for (i = 1; i < HPAGE_PMD_NR; i++) { - css_get(&memcg->css); - head[i].memcg_data = (unsigned long)memcg; - } + for (i = 1; i < nr; i++) + head[i].memcg_data = head->memcg_data; + css_get_many(&memcg->css, nr - 1); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_MEMCG_SWAP /** -- cgit v1.2.3