From b724a6418f1f853bcb39c8923bf14a50c7bdbd07 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Sun, 17 Sep 2023 23:38:46 +0800 Subject: bpf: Fix tr dereferencing Fix 'tr' dereferencing bug when CONFIG_BPF_JIT is turned off. When CONFIG_BPF_JIT is turned off, 'bpf_trampoline_get()' returns NULL, which is same as the cases when CONFIG_BPF_JIT is turned on. Closes: https://lore.kernel.org/r/202309131936.5Nc8eUD0-lkp@intel.com/ Fixes: f7b12b6fea00 ("bpf: verifier: refactor check_attach_btf_id()") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Leon Hwang Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20230917153846.88732-1-hffilwlqm@gmail.com --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 024e8b28c34b..49f8b691496c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1307,7 +1307,7 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, static inline struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info) { - return ERR_PTR(-EOPNOTSUPP); + return NULL; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} #define DEFINE_BPF_DISPATCHER(name) -- cgit v1.2.3 From 8e56b063c86569e51eed1c5681ce6361fa97fc7a Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 3 Oct 2023 13:17:53 -0400 Subject: netfilter: handle the connecting collision properly in nf_conntrack_proto_sctp In Scenario A and B below, as the delayed INIT_ACK always changes the peer vtag, SCTP ct with the incorrect vtag may cause packet loss. Scenario A: INIT_ACK is delayed until the peer receives its own INIT_ACK 192.168.1.2 > 192.168.1.1: [INIT] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT] [init tag: 1414468151] 192.168.1.2 > 192.168.1.1: [INIT ACK] [init tag: 1328086772] 192.168.1.1 > 192.168.1.2: [INIT ACK] [init tag: 1650211246] * 192.168.1.2 > 192.168.1.1: [COOKIE ECHO] 192.168.1.1 > 192.168.1.2: [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: [COOKIE ACK] Scenario B: INIT_ACK is delayed until the peer completes its own handshake 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] 192.168.1.2 > 192.168.1.1: sctp (1) [INIT ACK] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [COOKIE ECHO] 192.168.1.2 > 192.168.1.1: sctp (1) [COOKIE ACK] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT ACK] [init tag: 3914796021] * This patch fixes it as below: In SCTP_CID_INIT processing: - clear ct->proto.sctp.init[!dir] if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]. (Scenario E) - set ct->proto.sctp.init[dir]. In SCTP_CID_INIT_ACK processing: - drop it if !ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario B, Scenario C) - drop it if ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && ct->proto.sctp.vtag[!dir] != ih->init_tag. (Scenario A) In SCTP_CID_COOKIE_ACK processing: - clear ct->proto.sctp.init[dir] and ct->proto.sctp.init[!dir]. (Scenario D) Also, it's important to allow the ct state to move forward with cookie_echo and cookie_ack from the opposite dir for the collision scenarios. There are also other Scenarios where it should allow the packet through, addressed by the processing above: Scenario C: new CT is created by INIT_ACK. Scenario D: start INIT on the existing ESTABLISHED ct. Scenario E: start INIT after the old collision on the existing ESTABLISHED ct. 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 3922216408] 192.168.1.1 > 192.168.1.2: sctp (1) [INIT] [init tag: 144230885] (both side are stopped, then start new connection again in hours) 192.168.1.2 > 192.168.1.1: sctp (1) [INIT] [init tag: 242308742] Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Xin Long Signed-off-by: Florian Westphal --- include/linux/netfilter/nf_conntrack_sctp.h | 1 + net/netfilter/nf_conntrack_proto_sctp.c | 43 ++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 625f491b95de..fb31312825ae 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,7 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 init[IP_CT_DIR_MAX]; u8 last_dir; u8 flags; }; diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index b6bcc8f2f46b..c6bd533983c1 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -112,7 +112,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, /* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ /* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* cookie_ack */ {sCL, sCL, sCW, sES, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ /* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, /* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, /* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -126,7 +126,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { /* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, /* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, /* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_echo */ {sIV, sCL, sCE, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ /* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, /* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, /* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, @@ -412,6 +412,9 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; + } else if (sch->type == SCTP_CID_COOKIE_ACK) { + ct->proto.sctp.init[dir] = 0; + ct->proto.sctp.init[!dir] = 0; } else if (sch->type == SCTP_CID_HEARTBEAT) { if (ct->proto.sctp.vtag[dir] == 0) { pr_debug("Setting %d vtag %x for dir %d\n", sch->type, sh->vtag, dir); @@ -461,16 +464,18 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } /* If it is an INIT or an INIT ACK note down the vtag */ - if (sch->type == SCTP_CID_INIT || - sch->type == SCTP_CID_INIT_ACK) { - struct sctp_inithdr _inithdr, *ih; + if (sch->type == SCTP_CID_INIT) { + struct sctp_inithdr _ih, *ih; - ih = skb_header_pointer(skb, offset + sizeof(_sch), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) goto out_unlock; - pr_debug("Setting vtag %x for dir %d\n", - ih->init_tag, !dir); + + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir]) + ct->proto.sctp.init[!dir] = 0; + ct->proto.sctp.init[dir] = 1; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; /* don't renew timeout on init retransmit so @@ -481,6 +486,24 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, old_state == SCTP_CONNTRACK_CLOSED && nf_ct_is_confirmed(ct)) ignore = true; + } else if (sch->type == SCTP_CID_INIT_ACK) { + struct sctp_inithdr _ih, *ih; + __be32 vtag; + + ih = skb_header_pointer(skb, offset + sizeof(_sch), sizeof(*ih), &_ih); + if (!ih) + goto out_unlock; + + vtag = ct->proto.sctp.vtag[!dir]; + if (!ct->proto.sctp.init[!dir] && vtag && vtag != ih->init_tag) + goto out_unlock; + /* collision */ + if (ct->proto.sctp.init[dir] && ct->proto.sctp.init[!dir] && + vtag != ih->init_tag) + goto out_unlock; + + pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); + ct->proto.sctp.vtag[!dir] = ih->init_tag; } ct->proto.sctp.state = new_state; -- cgit v1.2.3 From 4785aa8028536c2be656d22c74ec1995b97056f3 Mon Sep 17 00:00:00 2001 From: Oza Pawandeep Date: Tue, 3 Oct 2023 10:33:33 -0700 Subject: cpuidle, ACPI: Evaluate LPI arch_flags for broadcast timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arm® Functional Fixed Hardware Specification defines LPI states, which provide an architectural context loss flags field that can be used to describe the context that might be lost when an LPI state is entered. - Core context Lost - General purpose registers. - Floating point and SIMD registers. - System registers, include the System register based - generic timer for the core. - Debug register in the core power domain. - PMU registers in the core power domain. - Trace register in the core power domain. - Trace context loss - GICR - GICD Qualcomm's custom CPUs preserves the architectural state, including keeping the power domain for local timers active. when core is power gated, the local timers are sufficient to wake the core up without needing broadcast timer. The patch fixes the evaluation of cpuidle arch_flags, and moves only to broadcast timer if core context lost is defined in ACPI LPI. Fixes: a36a7fecfe60 ("ACPI / processor_idle: Add support for Low Power Idle(LPI) states") Reviewed-by: Sudeep Holla Acked-by: Rafael J. Wysocki Signed-off-by: Oza Pawandeep Link: https://lore.kernel.org/r/20231003173333.2865323-1-quic_poza@quicinc.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/acpi.h | 19 +++++++++++++++++++ drivers/acpi/processor_idle.c | 3 +-- include/linux/acpi.h | 9 +++++++++ 3 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 4d537d56eb84..6792a1f83f2a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -9,6 +9,7 @@ #ifndef _ASM_ACPI_H #define _ASM_ACPI_H +#include #include #include #include @@ -44,6 +45,24 @@ #define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ trbe_interrupt) + sizeof(u16)) +/* + * Arm® Functional Fixed Hardware Specification Version 1.2. + * Table 2: Arm Architecture context loss flags + */ +#define CPUIDLE_CORE_CTXT BIT(0) /* Core context Lost */ + +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + if (arch_flags & CPUIDLE_CORE_CTXT) + return CPUIDLE_FLAG_TIMER_STOP; + + return 0; +} +#define arch_get_idle_state_flags arch_get_idle_state_flags + +#define CPUIDLE_TRACE_CTXT BIT(1) /* Trace context loss */ +#define CPUIDLE_GICR_CTXT BIT(2) /* GICR */ +#define CPUIDLE_GICD_CTXT BIT(3) /* GICD */ /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index dc615ef6550a..3a34a8c425fe 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -1217,8 +1217,7 @@ static int acpi_processor_setup_lpi_states(struct acpi_processor *pr) strscpy(state->desc, lpi->desc, CPUIDLE_DESC_LEN); state->exit_latency = lpi->wake_latency; state->target_residency = lpi->min_residency; - if (lpi->arch_flags) - state->flags |= CPUIDLE_FLAG_TIMER_STOP; + state->flags |= arch_get_idle_state_flags(lpi->arch_flags); if (i != 0 && lpi->entry_method == ACPI_CSTATE_FFH) state->flags |= CPUIDLE_FLAG_RCU_IDLE; state->enter = acpi_idle_lpi_enter; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a73246c3c35e..afd94c9b8b8a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1480,6 +1480,15 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PROCESSOR_IDLE +#ifndef arch_get_idle_state_flags +static inline unsigned int arch_get_idle_state_flags(u32 arch_flags) +{ + return 0; +} +#endif +#endif /* CONFIG_ACPI_PROCESSOR_IDLE */ + #ifdef CONFIG_ACPI_PPTT int acpi_pptt_cpu_is_thread(unsigned int cpu); int find_acpi_cpu_topology(unsigned int cpu, int level); -- cgit v1.2.3 From 0f28ada1fbf0054557cddcdb93ad17f767105208 Mon Sep 17 00:00:00 2001 From: Jorge Sanjuan Garcia Date: Wed, 6 Sep 2023 11:49:26 +0000 Subject: mcb: remove is_added flag from mcb_device struct When calling mcb_bus_add_devices(), both mcb devices and the mcb bus will attempt to attach a device to a driver because they share the same bus_type. This causes an issue when trying to cast the container of the device to mcb_device struct using to_mcb_device(), leading to a wrong cast when the mcb_bus is added. A crash occurs when freing the ida resources as the bus numbering of mcb_bus gets confused with the is_added flag on the mcb_device struct. The only reason for this cast was to keep an is_added flag on the mcb_device struct that does not seem necessary. The function device_attach() handles already bound devices and the mcb subsystem does nothing special with this is_added flag so remove it completely. Fixes: 18d288198099 ("mcb: Correctly initialize the bus's device") Cc: stable Signed-off-by: Jorge Sanjuan Garcia Co-developed-by: Jose Javier Rodriguez Barbarin Signed-off-by: Jose Javier Rodriguez Barbarin Link: https://lore.kernel.org/r/20230906114901.63174-2-JoseJavier.Rodriguez@duagon.com Signed-off-by: Greg Kroah-Hartman --- drivers/mcb/mcb-core.c | 10 +++------- drivers/mcb/mcb-parse.c | 2 -- include/linux/mcb.h | 1 - 3 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c index 978fdfc19a06..0cac5bead84f 100644 --- a/drivers/mcb/mcb-core.c +++ b/drivers/mcb/mcb-core.c @@ -387,17 +387,13 @@ EXPORT_SYMBOL_NS_GPL(mcb_free_dev, MCB); static int __mcb_bus_add_devices(struct device *dev, void *data) { - struct mcb_device *mdev = to_mcb_device(dev); int retval; - if (mdev->is_added) - return 0; - retval = device_attach(dev); - if (retval < 0) + if (retval < 0) { dev_err(dev, "Error adding device (%d)\n", retval); - - mdev->is_added = true; + return retval; + } return 0; } diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c index 2aef990f379f..656b6b71c768 100644 --- a/drivers/mcb/mcb-parse.c +++ b/drivers/mcb/mcb-parse.c @@ -99,8 +99,6 @@ static int chameleon_parse_gdd(struct mcb_bus *bus, mdev->mem.end = mdev->mem.start + size - 1; mdev->mem.flags = IORESOURCE_MEM; - mdev->is_added = false; - ret = mcb_device_register(bus, mdev); if (ret < 0) goto err; diff --git a/include/linux/mcb.h b/include/linux/mcb.h index 1e5893138afe..0b971b24a804 100644 --- a/include/linux/mcb.h +++ b/include/linux/mcb.h @@ -63,7 +63,6 @@ static inline struct mcb_bus *to_mcb_bus(struct device *dev) struct mcb_device { struct device dev; struct mcb_bus *bus; - bool is_added; struct mcb_driver *driver; u16 id; int inst; -- cgit v1.2.3 From b83ce9cb4a465b8f9a3fa45561b721a9551f60e3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 8 Sep 2023 10:27:23 +0200 Subject: dma-buf: add dma_fence_timestamp helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a fence signals there is a very small race window where the timestamp isn't updated yet. sync_file solves this by busy waiting for the timestamp to appear, but on other ocassions didn't handled this correctly. Provide a dma_fence_timestamp() helper function for this and use it in all appropriate cases. Another alternative would be to grab the spinlock when that happens. v2 by teddy: add a wait parameter to wait for the timestamp to show up, in case the accurate timestamp is needed and/or the timestamp is not based on ktime (e.g. hw timestamp) v3 chk: drop the parameter again for unified handling Signed-off-by: Yunxiang Li Signed-off-by: Christian König Fixes: 1774baa64f93 ("drm/scheduler: Change scheduled fence track v2") Reviewed-by: Alex Deucher CC: stable@vger.kernel.org Link: https://patchwork.freedesktop.org/patch/msgid/20230929104725.2358-1-christian.koenig@amd.com --- drivers/dma-buf/dma-fence-unwrap.c | 13 ++++--------- drivers/dma-buf/sync_file.c | 9 +++------ drivers/gpu/drm/scheduler/sched_main.c | 2 +- include/linux/dma-fence.h | 19 +++++++++++++++++++ 4 files changed, 27 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c index c625bb2b5d56..628af51c81af 100644 --- a/drivers/dma-buf/dma-fence-unwrap.c +++ b/drivers/dma-buf/dma-fence-unwrap.c @@ -76,16 +76,11 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) { if (!dma_fence_is_signaled(tmp)) { ++count; - } else if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, - &tmp->flags)) { - if (ktime_after(tmp->timestamp, timestamp)) - timestamp = tmp->timestamp; } else { - /* - * Use the current time if the fence is - * currently signaling. - */ - timestamp = ktime_get(); + ktime_t t = dma_fence_timestamp(tmp); + + if (ktime_after(t, timestamp)) + timestamp = t; } } } diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c index af57799c86ce..2e9a316c596a 100644 --- a/drivers/dma-buf/sync_file.c +++ b/drivers/dma-buf/sync_file.c @@ -268,13 +268,10 @@ static int sync_fill_fence_info(struct dma_fence *fence, sizeof(info->driver_name)); info->status = dma_fence_get_status(fence); - while (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && - !test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) - cpu_relax(); info->timestamp_ns = - test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags) ? - ktime_to_ns(fence->timestamp) : - ktime_set(0, 0); + dma_fence_is_signaled(fence) ? + ktime_to_ns(dma_fence_timestamp(fence)) : + ktime_set(0, 0); return info->status; } diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 506371c42745..5a3a622fc672 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -929,7 +929,7 @@ drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched) if (next) { next->s_fence->scheduled.timestamp = - job->s_fence->finished.timestamp; + dma_fence_timestamp(&job->s_fence->finished); /* start TO timer for next job */ drm_sched_start_timeout(sched); } diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 0d678e9a7b24..ebe78bd3d121 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -568,6 +568,25 @@ static inline void dma_fence_set_error(struct dma_fence *fence, fence->error = error; } +/** + * dma_fence_timestamp - helper to get the completion timestamp of a fence + * @fence: fence to get the timestamp from. + * + * After a fence is signaled the timestamp is updated with the signaling time, + * but setting the timestamp can race with tasks waiting for the signaling. This + * helper busy waits for the correct timestamp to appear. + */ +static inline ktime_t dma_fence_timestamp(struct dma_fence *fence) +{ + if (WARN_ON(!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) + return ktime_get(); + + while (!test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) + cpu_relax(); + + return fence->timestamp; +} + signed long dma_fence_wait_timeout(struct dma_fence *, bool intr, signed long timeout); signed long dma_fence_wait_any_timeout(struct dma_fence **fences, -- cgit v1.2.3 From 869b6ea1609f655a43251bf41757aa44e5350a8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Oct 2023 15:32:01 +0200 Subject: quota: Fix slow quotaoff Eric has reported that commit dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") heavily increases runtime of generic/270 xfstest for ext4 in nojournal mode. The reason for this is that ext4 in nojournal mode leaves dquots dirty until the last dqput() and thus the cleanup done in quota_release_workfn() has to write them all. Due to the way quota_release_workfn() is written this results in synchronize_srcu() call for each dirty dquot which makes the dquot cleanup when turning quotas off extremely slow. To be able to avoid synchronize_srcu() for each dirty dquot we need to rework how we track dquots to be cleaned up. Instead of keeping the last dquot reference while it is on releasing_dquots list, we drop it right away and mark the dquot with new DQ_RELEASING_B bit instead. This way we can we can remove dquot from releasing_dquots list when new reference to it is acquired and thus there's no need to call synchronize_srcu() each time we drop dq_list_lock. References: https://lore.kernel.org/all/ZRytn6CxFK2oECUt@debian-BULLSEYE-live-builder-AMD64 Reported-by: Eric Whitney Fixes: dabc8b207566 ("quota: fix dqput() to follow the guarantees dquot_srcu should provide") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- fs/quota/dquot.c | 66 ++++++++++++++++++++++++++++-------------------- include/linux/quota.h | 4 ++- include/linux/quotaops.h | 2 +- 3 files changed, 43 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 9e72bfe8bbad..31e897ad5e6a 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -233,19 +233,18 @@ static void put_quota_format(struct quota_format_type *fmt) * All dquots are placed to the end of inuse_list when first created, and this * list is used for invalidate operation, which must look at every dquot. * - * When the last reference of a dquot will be dropped, the dquot will be - * added to releasing_dquots. We'd then queue work item which would call + * When the last reference of a dquot is dropped, the dquot is added to + * releasing_dquots. We'll then queue work item which will call * synchronize_srcu() and after that perform the final cleanup of all the - * dquots on the list. Both releasing_dquots and free_dquots use the - * dq_free list_head in the dquot struct. When a dquot is removed from - * releasing_dquots, a reference count is always subtracted, and if - * dq_count == 0 at that point, the dquot will be added to the free_dquots. + * dquots on the list. Each cleaned up dquot is moved to free_dquots list. + * Both releasing_dquots and free_dquots use the dq_free list_head in the dquot + * struct. * - * Unused dquots (dq_count == 0) are added to the free_dquots list when freed, - * and this list is searched whenever we need an available dquot. Dquots are - * removed from the list as soon as they are used again, and - * dqstats.free_dquots gives the number of dquots on the list. When - * dquot is invalidated it's completely released from memory. + * Unused and cleaned up dquots are in the free_dquots list and this list is + * searched whenever we need an available dquot. Dquots are removed from the + * list as soon as they are used again and dqstats.free_dquots gives the number + * of dquots on the list. When dquot is invalidated it's completely released + * from memory. * * Dirty dquots are added to the dqi_dirty_list of quota_info when mark * dirtied, and this list is searched when writing dirty dquots back to @@ -321,6 +320,7 @@ static inline void put_dquot_last(struct dquot *dquot) static inline void put_releasing_dquots(struct dquot *dquot) { list_add_tail(&dquot->dq_free, &releasing_dquots); + set_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void remove_free_dquot(struct dquot *dquot) @@ -328,8 +328,10 @@ static inline void remove_free_dquot(struct dquot *dquot) if (list_empty(&dquot->dq_free)) return; list_del_init(&dquot->dq_free); - if (!atomic_read(&dquot->dq_count)) + if (!test_bit(DQ_RELEASING_B, &dquot->dq_flags)) dqstats_dec(DQST_FREE_DQUOTS); + else + clear_bit(DQ_RELEASING_B, &dquot->dq_flags); } static inline void put_inuse(struct dquot *dquot) @@ -581,12 +583,6 @@ restart: continue; /* Wait for dquot users */ if (atomic_read(&dquot->dq_count)) { - /* dquot in releasing_dquots, flush and retry */ - if (!list_empty(&dquot->dq_free)) { - spin_unlock(&dq_list_lock); - goto restart; - } - atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); /* @@ -605,6 +601,15 @@ restart: * restart. */ goto restart; } + /* + * The last user already dropped its reference but dquot didn't + * get fully cleaned up yet. Restart the scan which flushes the + * work cleaning up released dquots. + */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + goto restart; + } /* * Quota now has no users and it has been written on last * dqput() @@ -696,6 +701,13 @@ int dquot_writeback_dquots(struct super_block *sb, int type) dq_dirty); WARN_ON(!dquot_active(dquot)); + /* If the dquot is releasing we should not touch it */ + if (test_bit(DQ_RELEASING_B, &dquot->dq_flags)) { + spin_unlock(&dq_list_lock); + flush_delayed_work("a_release_work); + spin_lock(&dq_list_lock); + continue; + } /* Now we have active dquot from which someone is * holding reference so we can safely just increase @@ -809,18 +821,18 @@ static void quota_release_workfn(struct work_struct *work) /* Exchange the list head to avoid livelock. */ list_replace_init(&releasing_dquots, &rls_head); spin_unlock(&dq_list_lock); + synchronize_srcu(&dquot_srcu); restart: - synchronize_srcu(&dquot_srcu); spin_lock(&dq_list_lock); while (!list_empty(&rls_head)) { dquot = list_first_entry(&rls_head, struct dquot, dq_free); - /* Dquot got used again? */ - if (atomic_read(&dquot->dq_count) > 1) { - remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); - continue; - } + WARN_ON_ONCE(atomic_read(&dquot->dq_count)); + /* + * Note that DQ_RELEASING_B protects us from racing with + * invalidate_dquots() calls so we are safe to work with the + * dquot even after we drop dq_list_lock. + */ if (dquot_dirty(dquot)) { spin_unlock(&dq_list_lock); /* Commit dquot before releasing */ @@ -834,7 +846,6 @@ restart: } /* Dquot is inactive and clean, now move it to free list */ remove_free_dquot(dquot); - atomic_dec(&dquot->dq_count); put_dquot_last(dquot); } spin_unlock(&dq_list_lock); @@ -875,6 +886,7 @@ void dqput(struct dquot *dquot) BUG_ON(!list_empty(&dquot->dq_free)); #endif put_releasing_dquots(dquot); + atomic_dec(&dquot->dq_count); spin_unlock(&dq_list_lock); queue_delayed_work(system_unbound_wq, "a_release_work, 1); } @@ -963,7 +975,7 @@ we_slept: dqstats_inc(DQST_LOOKUPS); } /* Wait for dq_lock - after this we know that either dquot_release() is - * already finished or it will be canceled due to dq_count > 1 test */ + * already finished or it will be canceled due to dq_count > 0 test */ wait_on_dquot(dquot); /* Read the dquot / allocate space in quota file */ if (!dquot_active(dquot)) { diff --git a/include/linux/quota.h b/include/linux/quota.h index fd692b4a41d5..07071e64abf3 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -285,7 +285,9 @@ static inline void dqstats_dec(unsigned int type) #define DQ_FAKE_B 3 /* no limits only usage */ #define DQ_READ_B 4 /* dquot was read into memory */ #define DQ_ACTIVE_B 5 /* dquot is active (dquot_release not called) */ -#define DQ_LASTSET_B 6 /* Following 6 bits (see QIF_) are reserved\ +#define DQ_RELEASING_B 6 /* dquot is in releasing_dquots list waiting + * to be cleaned up */ +#define DQ_LASTSET_B 7 /* Following 6 bits (see QIF_) are reserved\ * for the mask of entries set via SETQUOTA\ * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 11a4becff3a9..4fa4ef0a173a 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -57,7 +57,7 @@ static inline bool dquot_is_busy(struct dquot *dquot) { if (test_bit(DQ_MOD_B, &dquot->dq_flags)) return true; - if (atomic_read(&dquot->dq_count) > 1) + if (atomic_read(&dquot->dq_count) > 0) return true; return false; } -- cgit v1.2.3 From 8527ca7735ef4cdad32c45853b0138f46ab2df58 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 8 Oct 2023 14:41:21 -0700 Subject: net: skbuff: fix kernel-doc typos Correct punctuation and drop an extraneous word. Signed-off-by: Randy Dunlap Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231008214121.25940-1-rdunlap@infradead.org Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4174c4b82d13..97bfef071255 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1309,7 +1309,7 @@ struct sk_buff_fclones { * * Returns true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), - * so we also check that this didnt happen. + * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) @@ -2016,7 +2016,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. + * a packet that's being forwarded. */ /** -- cgit v1.2.3 From c4dd854f740c21ae8dd9903fc67969c5497cb14b Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Mon, 25 Sep 2023 17:28:39 +0100 Subject: cpu-hotplug: Provide prototypes for arch CPU registration Provide common prototypes for arch_register_cpu() and arch_unregister_cpu(). These are called by acpi_processor.c, with weak versions, so the prototype for this is already set. It is generally not necessary for function prototypes to be conditional on preprocessor macros. Some architectures (e.g. Loongarch) are missing the prototype for this, and rather than add it to Loongarch's asm/cpu.h, do the job once for everyone. Since this covers everyone, remove the now unnecessary prototypes in asm/cpu.h, and therefore remove the 'static' from one of ia64's arch_register_cpu() definitions. [ tglx: Bring back the ia64 part and remove the ACPI prototypes ] Signed-off-by: Russell King (Oracle) Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/E1qkoRr-0088Q8-Da@rmk-PC.armlinux.org.uk --- arch/ia64/include/asm/cpu.h | 5 ----- arch/ia64/kernel/topology.c | 2 +- arch/x86/include/asm/cpu.h | 2 -- arch/x86/kernel/topology.c | 2 +- drivers/acpi/acpi_processor.c | 1 + include/acpi/processor.h | 5 ----- include/linux/cpu.h | 2 ++ 7 files changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h index db125df9e088..642d71675ddb 100644 --- a/arch/ia64/include/asm/cpu.h +++ b/arch/ia64/include/asm/cpu.h @@ -15,9 +15,4 @@ DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); DECLARE_PER_CPU(int, cpu_state); -#ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); -#endif - #endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 94a848b06f15..741863a187a6 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -59,7 +59,7 @@ void __ref arch_unregister_cpu(int num) } EXPORT_SYMBOL(arch_unregister_cpu); #else -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&sysfs_cpus[num].cpu, num); } diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h index 3a233ebff712..25050d953eee 100644 --- a/arch/x86/include/asm/cpu.h +++ b/arch/x86/include/asm/cpu.h @@ -28,8 +28,6 @@ struct x86_cpu { }; #ifdef CONFIG_HOTPLUG_CPU -extern int arch_register_cpu(int num); -extern void arch_unregister_cpu(int); extern void soft_restart_cpu(void); #endif diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c index ca004e2e4469..0bab03130033 100644 --- a/arch/x86/kernel/topology.c +++ b/arch/x86/kernel/topology.c @@ -54,7 +54,7 @@ void arch_unregister_cpu(int num) EXPORT_SYMBOL(arch_unregister_cpu); #else /* CONFIG_HOTPLUG_CPU */ -static int __init arch_register_cpu(int num) +int __init arch_register_cpu(int num) { return register_cpu(&per_cpu(cpu_devices, num).cpu, num); } diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index c711db8a9c33..0f5218e361df 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -12,6 +12,7 @@ #define pr_fmt(fmt) "ACPI: " fmt #include +#include #include #include #include diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 94181fe9780a..3f34ebb27525 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -465,9 +465,4 @@ extern int acpi_processor_ffh_lpi_probe(unsigned int cpu); extern int acpi_processor_ffh_lpi_enter(struct acpi_lpi_state *lpi); #endif -#ifdef CONFIG_ACPI_HOTPLUG_CPU -extern int arch_register_cpu(int cpu); -extern void arch_unregister_cpu(int cpu); -#endif - #endif diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 0abd60a7987b..eb768a866fe3 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -80,6 +80,8 @@ extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, const struct attribute_group **groups, const char *fmt, ...); +extern int arch_register_cpu(int cpu); +extern void arch_unregister_cpu(int cpu); #ifdef CONFIG_HOTPLUG_CPU extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); -- cgit v1.2.3 From e001d1447cd4585d7f23a44ff668ba2bc624badb Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 12 Oct 2023 15:24:17 +0300 Subject: fs: factor out vfs_parse_monolithic_sep() helper Factor out vfs_parse_monolithic_sep() from generic_parse_monolithic(), so filesystems could use it with a custom option separator callback. Acked-by: Christian Brauner Signed-off-by: Amir Goldstein --- fs/fs_context.c | 34 +++++++++++++++++++++++++++++----- include/linux/fs_context.h | 2 ++ 2 files changed, 31 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/fs_context.c b/fs/fs_context.c index a0ad7a0c4680..98589aae5208 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -192,17 +192,19 @@ int vfs_parse_fs_string(struct fs_context *fc, const char *key, EXPORT_SYMBOL(vfs_parse_fs_string); /** - * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * vfs_parse_monolithic_sep - Parse key[=val][,key[=val]]* mount data * @fc: The superblock configuration to fill in. * @data: The data to parse + * @sep: callback for separating next option * - * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be - * called from the ->monolithic_mount_data() fs_context operation. + * Parse a blob of data that's in key[=val][,key[=val]]* form with a custom + * option separator callback. * * Returns 0 on success or the error returned by the ->parse_option() fs_context * operation on failure. */ -int generic_parse_monolithic(struct fs_context *fc, void *data) +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)) { char *options = data, *key; int ret = 0; @@ -214,7 +216,7 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) if (ret) return ret; - while ((key = strsep(&options, ",")) != NULL) { + while ((key = sep(&options)) != NULL) { if (*key) { size_t v_len = 0; char *value = strchr(key, '='); @@ -233,6 +235,28 @@ int generic_parse_monolithic(struct fs_context *fc, void *data) return ret; } +EXPORT_SYMBOL(vfs_parse_monolithic_sep); + +static char *vfs_parse_comma_sep(char **s) +{ + return strsep(s, ","); +} + +/** + * generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data + * @fc: The superblock configuration to fill in. + * @data: The data to parse + * + * Parse a blob of data that's in key[=val][,key[=val]]* form. This can be + * called from the ->monolithic_mount_data() fs_context operation. + * + * Returns 0 on success or the error returned by the ->parse_option() fs_context + * operation on failure. + */ +int generic_parse_monolithic(struct fs_context *fc, void *data) +{ + return vfs_parse_monolithic_sep(fc, data, vfs_parse_comma_sep); +} EXPORT_SYMBOL(generic_parse_monolithic); /** diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 96332db693d5..c13e99cbbf81 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -136,6 +136,8 @@ extern struct fs_context *vfs_dup_fs_context(struct fs_context *fc); extern int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); extern int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); +int vfs_parse_monolithic_sep(struct fs_context *fc, void *data, + char *(*sep)(char **)); extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); -- cgit v1.2.3 From 13cc9ee8f8ed58e563294d87d74a62006be40f21 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 12 Oct 2023 13:09:02 -0400 Subject: cgroup: Fix incorrect css_set_rwsem reference in comment Since commit f0d9a5f17575 ("cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock"), css_set_rwsem has been replaced by css_set_lock. That commit, however, missed the css_set_rwsem reference in include/linux/cgroup-defs.h. Fix that by changing it to css_set_lock as well. Signed-off-by: Waiman Long Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index f1b3151ac30b..265da00a1a8b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -238,7 +238,7 @@ struct css_set { * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by - * css_set_rwsem, but, during migration, once tasks are moved to + * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; -- cgit v1.2.3