From 200f091c95bbc4b8660636bd345805c45d6eced7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 28 Sep 2024 14:08:31 -0700 Subject: coredump: Do not lock during 'comm' reporting The 'comm' member will always be NUL terminated, and this is not fast-path, so we can just perform a direct memcpy during a coredump instead of potentially deadlocking while holding the task struct lock. Reported-by: Vegard Nossum Closes: https://lore.kernel.org/all/d122ece6-3606-49de-ae4d-8da88846bef2@oracle.com Fixes: c114e9948c2b ("coredump: Standartize and fix logging") Tested-by: Vegard Nossum Link: https://lore.kernel.org/r/20240928210830.work.307-kees@kernel.org Signed-off-by: Kees Cook --- include/linux/coredump.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 45e598fe3476..77e6e195d1d6 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -52,8 +52,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); #define __COREDUMP_PRINTK(Level, Format, ...) \ do { \ char comm[TASK_COMM_LEN]; \ - \ - get_task_comm(comm, current); \ + /* This will always be NUL terminated. */ \ + memcpy(comm, current->comm, sizeof(comm)); \ printk_ratelimited(Level "coredump: %d(%*pE): " Format "\n", \ task_tgid_vnr(current), (int)strlen(comm), comm, ##__VA_ARGS__); \ } while (0) \ -- cgit v1.2.3 From b9c44b91476b67327a521568a854babecc4070ab Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Wed, 15 May 2024 12:36:07 -0700 Subject: perf/core: Save raw sample data conditionally based on sample type Currently, space for raw sample data is always allocated within sample records for both BPF output and tracepoint events. This leads to unused space in sample records when raw sample data is not requested. This patch enforces checking sample type of an event in perf_sample_save_raw_data(). So raw sample data will only be saved if explicitly requested, reducing overhead when it is not needed. Fixes: 0a9081cf0a11 ("perf/core: Add perf_sample_save_raw_data() helper") Signed-off-by: Yabin Cui Signed-off-by: Ingo Molnar Reviewed-by: Ian Rogers Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20240515193610.2350456-2-yabinc@google.com --- arch/s390/kernel/perf_cpum_cf.c | 2 +- arch/s390/kernel/perf_pai_crypto.c | 2 +- arch/s390/kernel/perf_pai_ext.c | 2 +- arch/x86/events/amd/ibs.c | 2 +- include/linux/perf_event.h | 6 ++++++ kernel/events/core.c | 35 ++++++++++++++++++----------------- kernel/trace/bpf_trace.c | 11 ++++++----- 7 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index e2e0aa463fbd..c3075e4a8efc 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -981,7 +981,7 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index fa7325454266..10725f5a6f0f 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -478,7 +478,7 @@ static int paicrypt_push_sample(size_t rawsize, struct paicrypt_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 7f462bef1fc0..a8f0bad99cf0 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -503,7 +503,7 @@ static int paiext_push_sample(size_t rawsize, struct paiext_map *cpump, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index e91970b01d62..c3a2f6f57770 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1118,7 +1118,7 @@ fail: .data = ibs_data.data, }, }; - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); } if (perf_ibs == &perf_ibs_op) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cb99ec8c9e96..f7c0a3f2f502 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1287,12 +1287,18 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, } static inline void perf_sample_save_raw_data(struct perf_sample_data *data, + struct perf_event *event, struct perf_raw_record *raw) { struct perf_raw_frag *frag = &raw->frag; u32 sum = 0; int size; + if (!(event->attr.sample_type & PERF_SAMPLE_RAW)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_RAW)) + return; + do { sum += frag->size; if (perf_raw_frag_last(frag)) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1869164a4e99..3670b91f182f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10451,9 +10451,9 @@ static struct pmu perf_tracepoint = { }; static int perf_tp_filter_match(struct perf_event *event, - struct perf_sample_data *data) + struct perf_raw_record *raw) { - void *record = data->raw->frag.data; + void *record = raw->frag.data; /* only top level events have filters set */ if (event->parent) @@ -10465,7 +10465,7 @@ static int perf_tp_filter_match(struct perf_event *event, } static int perf_tp_event_match(struct perf_event *event, - struct perf_sample_data *data, + struct perf_raw_record *raw, struct pt_regs *regs) { if (event->hw.state & PERF_HES_STOPPED) @@ -10476,7 +10476,7 @@ static int perf_tp_event_match(struct perf_event *event, if (event->attr.exclude_kernel && !user_mode(regs)) return 0; - if (!perf_tp_filter_match(event, data)) + if (!perf_tp_filter_match(event, raw)) return 0; return 1; @@ -10502,6 +10502,7 @@ EXPORT_SYMBOL_GPL(perf_trace_run_bpf_submit); static void __perf_tp_event_target_task(u64 count, void *record, struct pt_regs *regs, struct perf_sample_data *data, + struct perf_raw_record *raw, struct perf_event *event) { struct trace_entry *entry = record; @@ -10511,13 +10512,17 @@ static void __perf_tp_event_target_task(u64 count, void *record, /* Cannot deliver synchronous signal to other task. */ if (event->attr.sigtrap) return; - if (perf_tp_event_match(event, data, regs)) + if (perf_tp_event_match(event, raw, regs)) { + perf_sample_data_init(data, 0, 0); + perf_sample_save_raw_data(data, event, raw); perf_swevent_event(event, count, data, regs); + } } static void perf_tp_event_target_task(u64 count, void *record, struct pt_regs *regs, struct perf_sample_data *data, + struct perf_raw_record *raw, struct perf_event_context *ctx) { unsigned int cpu = smp_processor_id(); @@ -10525,15 +10530,15 @@ static void perf_tp_event_target_task(u64 count, void *record, struct perf_event *event, *sibling; perf_event_groups_for_cpu_pmu(event, &ctx->pinned_groups, cpu, pmu) { - __perf_tp_event_target_task(count, record, regs, data, event); + __perf_tp_event_target_task(count, record, regs, data, raw, event); for_each_sibling_event(sibling, event) - __perf_tp_event_target_task(count, record, regs, data, sibling); + __perf_tp_event_target_task(count, record, regs, data, raw, sibling); } perf_event_groups_for_cpu_pmu(event, &ctx->flexible_groups, cpu, pmu) { - __perf_tp_event_target_task(count, record, regs, data, event); + __perf_tp_event_target_task(count, record, regs, data, raw, event); for_each_sibling_event(sibling, event) - __perf_tp_event_target_task(count, record, regs, data, sibling); + __perf_tp_event_target_task(count, record, regs, data, raw, sibling); } } @@ -10551,15 +10556,10 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, }, }; - perf_sample_data_init(&data, 0, 0); - perf_sample_save_raw_data(&data, &raw); - perf_trace_buf_update(record, event_type); hlist_for_each_entry_rcu(event, head, hlist_entry) { - if (perf_tp_event_match(event, &data, regs)) { - perf_swevent_event(event, count, &data, regs); - + if (perf_tp_event_match(event, &raw, regs)) { /* * Here use the same on-stack perf_sample_data, * some members in data are event-specific and @@ -10569,7 +10569,8 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, * because data->sample_flags is set. */ perf_sample_data_init(&data, 0, 0); - perf_sample_save_raw_data(&data, &raw); + perf_sample_save_raw_data(&data, event, &raw); + perf_swevent_event(event, count, &data, regs); } } @@ -10586,7 +10587,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, goto unlock; raw_spin_lock(&ctx->lock); - perf_tp_event_target_task(count, record, regs, &data, ctx); + perf_tp_event_target_task(count, record, regs, &data, &raw, ctx); raw_spin_unlock(&ctx->lock); unlock: rcu_read_unlock(); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fdab7ecd8dfa..162bacf8aa5d 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -619,7 +619,8 @@ static const struct bpf_func_proto bpf_perf_event_read_value_proto = { static __always_inline u64 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, - u64 flags, struct perf_sample_data *sd) + u64 flags, struct perf_raw_record *raw, + struct perf_sample_data *sd) { struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); @@ -644,6 +645,8 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, if (unlikely(event->oncpu != cpu)) return -EOPNOTSUPP; + perf_sample_save_raw_data(sd, event, raw); + return perf_event_output(event, sd, regs); } @@ -687,9 +690,8 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, } perf_sample_data_init(sd, 0, 0); - perf_sample_save_raw_data(sd, &raw); - err = __bpf_perf_event_output(regs, map, flags, sd); + err = __bpf_perf_event_output(regs, map, flags, &raw, sd); out: this_cpu_dec(bpf_trace_nest_level); preempt_enable(); @@ -748,9 +750,8 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, perf_fetch_caller_regs(regs); perf_sample_data_init(sd, 0, 0); - perf_sample_save_raw_data(sd, &raw); - ret = __bpf_perf_event_output(regs, map, flags, sd); + ret = __bpf_perf_event_output(regs, map, flags, &raw, sd); out: this_cpu_dec(bpf_event_output_nest_level); preempt_enable(); -- cgit v1.2.3 From f226805bc5f60adf03783d8e4cbfe303ccecd64e Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Wed, 15 May 2024 12:36:08 -0700 Subject: perf/core: Check sample_type in perf_sample_save_callchain Check sample_type in perf_sample_save_callchain() to prevent saving callchain data when it isn't required. Suggested-by: Namhyung Kim Signed-off-by: Yabin Cui Signed-off-by: Ingo Molnar Reviewed-by: Ian Rogers Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20240515193610.2350456-3-yabinc@google.com --- arch/x86/events/amd/ibs.c | 3 +-- arch/x86/events/intel/ds.c | 6 ++---- include/linux/perf_event.h | 5 +++++ 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index c3a2f6f57770..f02939655b2a 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1129,8 +1129,7 @@ fail: * recorded as part of interrupt regs. Thus we need to use rip from * interrupt regs while unwinding call stack. */ - if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) - perf_sample_save_callchain(&data, event, iregs); + perf_sample_save_callchain(&data, event, iregs); throttle = perf_event_overflow(event, &data, ®s); out: diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8afc4ad3cd16..4990a2409807 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1789,8 +1789,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) - perf_sample_save_callchain(data, event, iregs); + perf_sample_save_callchain(data, event, iregs); /* * We use the interrupt regs as a base because the PEBS record does not @@ -1957,8 +1956,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, * previous PMI context or an (I)RET happened between the record and * PMI. */ - if (sample_type & PERF_SAMPLE_CALLCHAIN) - perf_sample_save_callchain(data, event, iregs); + perf_sample_save_callchain(data, event, iregs); *regs = *iregs; /* The ip in basic is EventingIP */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f7c0a3f2f502..3ac202d971fb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1279,6 +1279,11 @@ static inline void perf_sample_save_callchain(struct perf_sample_data *data, { int size = 1; + if (!(event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_CALLCHAIN)) + return; + data->callchain = perf_callchain(event, regs); size += data->callchain->nr; -- cgit v1.2.3 From faac6f105ef169e2e5678c14e1ffebf2a7d780b6 Mon Sep 17 00:00:00 2001 From: Yabin Cui Date: Wed, 15 May 2024 12:36:09 -0700 Subject: perf/core: Check sample_type in perf_sample_save_brstack Check sample_type in perf_sample_save_brstack() to prevent saving branch stack data when it isn't required. Suggested-by: Namhyung Kim Signed-off-by: Yabin Cui Signed-off-by: Ingo Molnar Reviewed-by: Ian Rogers Acked-by: Namhyung Kim Link: https://lore.kernel.org/r/20240515193610.2350456-4-yabinc@google.com --- arch/x86/events/amd/core.c | 3 +-- arch/x86/events/core.c | 3 +-- arch/x86/events/intel/ds.c | 3 +-- include/linux/perf_event.h | 15 ++++++++++----- 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index b4a1a2576510..30d6ceb4c8ad 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1001,8 +1001,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; - if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index c75c482d4c52..8f218ac0d445 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1707,8 +1707,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) perf_sample_data_init(&data, 0, event->hw.last_period); - if (has_branch_stack(event)) - perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); + perf_sample_save_brstack(&data, event, &cpuc->lbr_stack, NULL); if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 4990a2409807..acfd87207547 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1888,8 +1888,7 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, if (x86_pmu.intel_cap.pebs_format >= 3) setup_pebs_time(event, data, pebs->tsc); - if (has_branch_stack(event)) - perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); + perf_sample_save_brstack(data, event, &cpuc->lbr_stack, NULL); } static void adaptive_pebs_save_regs(struct pt_regs *regs, diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 3ac202d971fb..bf831b1485ff 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1320,6 +1320,11 @@ static inline void perf_sample_save_raw_data(struct perf_sample_data *data, data->sample_flags |= PERF_SAMPLE_RAW; } +static inline bool has_branch_stack(struct perf_event *event) +{ + return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; +} + static inline void perf_sample_save_brstack(struct perf_sample_data *data, struct perf_event *event, struct perf_branch_stack *brs, @@ -1327,6 +1332,11 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data, { int size = sizeof(u64); /* nr */ + if (!has_branch_stack(event)) + return; + if (WARN_ON_ONCE(data->sample_flags & PERF_SAMPLE_BRANCH_STACK)) + return; + if (branch_sample_hw_index(event)) size += sizeof(u64); size += brs->nr * sizeof(struct perf_branch_entry); @@ -1716,11 +1726,6 @@ static inline unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs) # define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs) #endif -static inline bool has_branch_stack(struct perf_event *event) -{ - return event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK; -} - static inline bool needs_branch_stack(struct perf_event *event) { return event->attr.branch_sample_type != 0; -- cgit v1.2.3 From 2815a56e4b7252a836969f5674ee356ea1ce482c Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 14 Nov 2024 10:26:17 -0500 Subject: x86/mm/tlb: Add tracepoint for TLB flush IPI to stale CPU Add a tracepoint when we send a TLB flush IPI to a CPU that used to be in the mm_cpumask, but isn't any more. Suggested-by: Dave Hansen Signed-off-by: Rik van Riel Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20241114152723.1294686-3-riel@surriel.com --- arch/x86/mm/tlb.c | 1 + include/linux/mm_types.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index cc4e57ae690f..1aac4fa90d3d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -760,6 +760,7 @@ static void flush_tlb_func(void *info) /* Can only happen on remote CPUs */ if (f->mm && f->mm != loaded_mm) { cpumask_clear_cpu(raw_smp_processor_id(), mm_cpumask(f->mm)); + trace_tlb_flush(TLB_REMOTE_WRONG_CPU, 0); return; } } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 6e3bdf8e38bc..6b6f05404304 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1335,6 +1335,7 @@ enum tlb_flush_reason { TLB_LOCAL_SHOOTDOWN, TLB_LOCAL_MM_SHOOTDOWN, TLB_REMOTE_SEND_IPI, + TLB_REMOTE_WRONG_CPU, NR_TLB_FLUSH_REASONS, }; -- cgit v1.2.3 From 0a499a7e9819e7a0980408f18df68160a0b55f2e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:08:26 -0800 Subject: lib/crc32: drop leading underscores from __crc32c_le_base Remove the leading underscores from __crc32c_le_base(). This is in preparation for adding crc32c_le_arch() and eventually renaming __crc32c_le() to crc32c_le(). Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20241202010844.144356-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm64/lib/crc32-glue.c | 2 +- arch/riscv/lib/crc32.c | 2 +- crypto/crc32c_generic.c | 8 ++++---- include/linux/crc32.h | 2 +- lib/crc32.c | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index 295ae3e6b997..ad015223d15d 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -44,7 +44,7 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) - return __crc32c_le_base(crc, p, len); + return crc32c_le_base(crc, p, len); if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) { kernel_neon_begin(); diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c index d7dc599af3ef..333fb7af1192 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32.c @@ -226,7 +226,7 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, __crc32c_le_base); + CRC32C_POLY_QT_LE, crc32c_le_base); } static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 7c2357c30fdf..635599b255ec 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -85,7 +85,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data, { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); - ctx->crc = __crc32c_le_base(ctx->crc, data, length); + ctx->crc = crc32c_le_base(ctx->crc, data, length); return 0; } @@ -108,7 +108,7 @@ static int chksum_final(struct shash_desc *desc, u8 *out) static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { - put_unaligned_le32(~__crc32c_le_base(*crcp, data, len), out); + put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out); return 0; } @@ -200,12 +200,12 @@ static struct shash_alg algs[] = {{ static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&__crc32c_le != &__crc32c_le_base)); + return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base)); } static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &__crc32c_le_base)); + crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base)); } subsys_initcall(crc32c_mod_init); diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 87f788c0d607..5b07fc9081c4 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -39,7 +39,7 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) } u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); /** * __crc32c_le_combine - Combine two crc32c check values into one. For two diff --git a/lib/crc32.c b/lib/crc32.c index ff587fee3893..c67059b0082b 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -207,8 +207,8 @@ EXPORT_SYMBOL(__crc32c_le); u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); EXPORT_SYMBOL(crc32_le_base); -u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); -EXPORT_SYMBOL(__crc32c_le_base); +u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); +EXPORT_SYMBOL(crc32c_le_base); u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); -- cgit v1.2.3 From d36cebe03c3ae4ea1fde20cfc797fab8729c3ab5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:08:27 -0800 Subject: lib/crc32: improve support for arch-specific overrides Currently the CRC32 library functions are defined as weak symbols, and the arm64 and riscv architectures override them. This method of arch-specific overrides has the limitation that it only works when both the base and arch code is built-in. Also, it makes the arch-specific code be silently not used if it is accidentally built with lib-y instead of obj-y; unfortunately the RISC-V code does this. This commit reorganizes the code to have explicit *_arch() functions that are called when they are enabled, similar to how some of the crypto library code works (e.g. chacha_crypt() calls chacha_crypt_arch()). Make the existing kconfig choice for the CRC32 implementation also control whether the arch-optimized implementation (if one is available) is enabled or not. Make it enabled by default if CRC32 is also enabled. The result is that arch-optimized CRC32 library functions will be included automatically when appropriate, but it is now possible to disable them. They can also now be built as a loadable module if the CRC32 library functions happen to be used only by loadable modules, in which case the arch and base CRC32 modules will be automatically loaded via direct symbol dependency when appropriate. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20241202010844.144356-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm64/Kconfig | 1 + arch/arm64/lib/Makefile | 3 +- arch/arm64/lib/crc32-glue.c | 13 +- arch/riscv/Kconfig | 1 + arch/riscv/lib/Makefile | 3 +- arch/riscv/lib/crc32-riscv.c | 301 +++++++++++++++++++++++++++++++++++++++++++ arch/riscv/lib/crc32.c | 294 ------------------------------------------ crypto/crc32_generic.c | 4 +- crypto/crc32c_generic.c | 4 +- include/linux/crc32.h | 35 ++++- lib/Kconfig | 70 +++++++--- lib/crc32.c | 22 +--- 12 files changed, 409 insertions(+), 342 deletions(-) create mode 100644 arch/riscv/lib/crc32-riscv.c delete mode 100644 arch/riscv/lib/crc32.c (limited to 'include') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 100570a048c5..71f6310c8240 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -21,6 +21,7 @@ config ARM64 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CC_PLATFORM + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 8e882f479d98..5fbcf0d56665 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -13,7 +13,8 @@ endif lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o -obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o +obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o +crc32-arm64-y := crc32.o crc32-glue.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index ad015223d15d..d7f6e1cbf0d2 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -21,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len); asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_le_base(crc, p, len); @@ -40,8 +41,9 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) return crc32_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_le_arch); -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32c_le_base(crc, p, len); @@ -60,8 +62,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) return crc32c_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32c_le_arch); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_be_base(crc, p, len); @@ -80,3 +83,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) return crc32_be_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d4a7ca0388c0..7d5718667e39 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -24,6 +24,7 @@ config RISCV select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CRC32 if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 8eec6b69a875..79368a895fee 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -15,8 +15,7 @@ endif lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o -lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o - +obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o lib-$(CONFIG_RISCV_ISA_V) += xor.o lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c new file mode 100644 index 000000000000..a3ff7db2a1ce --- /dev/null +++ b/arch/riscv/lib/crc32-riscv.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC32 implementation with Zbc extension. + * + * Copyright (C) 2024 Intel Corporation + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for + * better understanding of how this math works. + * + * let "+" denotes polynomial add (XOR) + * let "-" denotes polynomial sub (XOR) + * let "*" denotes polynomial multiplication + * let "/" denotes polynomial floor division + * let "S" denotes source data, XLEN bit wide + * let "P" denotes CRC32 polynomial + * let "T" denotes 2^(XLEN+32) + * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit + * + * crc32(S, P) + * => S * (2^32) - S * (2^32) / P * P + * => lowest 32 bits of: S * (2^32) / P * P + * => lowest 32 bits of: S * (2^32) * (T / P) / T * P + * => lowest 32 bits of: S * (2^32) * quotient / T * P + * => lowest 32 bits of: S * quotient / 2^XLEN * P + * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P + * => clmul_low_part(clmul_high_part(S, QT) + S, P) + * + * In terms of below implementations, the BE case is more intuitive, since the + * higher order bit sits at more significant position. + */ + +#if __riscv_xlen == 64 +/* Slide by XLEN bits per iteration */ +# define STEP_ORDER 3 + +/* Each below polynomial quotient has an implicit bit for 2^XLEN */ + +/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ +# define CRC32_POLY_QT_LE 0x5a72d812fb808b20 + +/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ +# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 + +/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be + * the same as the bit-reversed version of CRC32_POLY_QT_LE + */ +# define CRC32_POLY_QT_BE 0x04d101df481b4e5a + +static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) +{ + return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); +} + +static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) +{ + u32 crc; + + /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ + asm volatile (".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + "slli %0, %0, 1\n" + "xor %0, %0, %1\n" + "clmulr %0, %0, %3\n" + "srli %0, %0, 32\n" + ".option pop\n" + : "=&r" (crc) + : "r" (s), + "r" (poly_qt), + "r" ((u64)poly << 32) + :); + return crc; +} + +static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) +{ + return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); +} + +#elif __riscv_xlen == 32 +# define STEP_ORDER 2 +/* Each quotient should match the upper half of its analog in RV64 */ +# define CRC32_POLY_QT_LE 0xfb808b20 +# define CRC32C_POLY_QT_LE 0x6f5389f8 +# define CRC32_POLY_QT_BE 0x04d101df + +static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) +{ + return crc ^ (__force u32)__cpu_to_le32(*ptr); +} + +static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) +{ + u32 crc; + + /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ + asm volatile (".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + "slli %0, %0, 1\n" + "xor %0, %0, %1\n" + "clmulr %0, %0, %3\n" + ".option pop\n" + : "=&r" (crc) + : "r" (s), + "r" (poly_qt), + "r" (poly) + :); + return crc; +} + +static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) +{ + return crc ^ (__force u32)__cpu_to_be32(*ptr); +} + +#else +# error "Unexpected __riscv_xlen" +#endif + +static inline u32 crc32_be_zbc(unsigned long s) +{ + u32 crc; + + asm volatile (".option push\n" + ".option arch,+zbc\n" + "clmulh %0, %1, %2\n" + "xor %0, %0, %1\n" + "clmul %0, %0, %3\n" + ".option pop\n" + : "=&r" (crc) + : "r" (s), + "r" (CRC32_POLY_QT_BE), + "r" (CRC32_POLY_BE) + :); + return crc; +} + +#define STEP (1 << STEP_ORDER) +#define OFFSET_MASK (STEP - 1) + +typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); + +static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, + size_t len, u32 poly, + unsigned long poly_qt) +{ + size_t bits = len * 8; + unsigned long s = 0; + u32 crc_low = 0; + + for (int i = 0; i < len; i++) + s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); + + s ^= (unsigned long)crc << (__riscv_xlen - bits); + if (__riscv_xlen == 32 || len < sizeof(u32)) + crc_low = crc >> bits; + + crc = crc32_le_zbc(s, poly, poly_qt); + crc ^= crc_low; + + return crc; +} + +static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, + size_t len, u32 poly, + unsigned long poly_qt, + fallback crc_fb) +{ + size_t offset, head_len, tail_len; + unsigned long const *p_ul; + unsigned long s; + + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBC, 1) + : : : : legacy); + + /* Handle the unaligned head. */ + offset = (unsigned long)p & OFFSET_MASK; + if (offset && len) { + head_len = min(STEP - offset, len); + crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); + p += head_len; + len -= head_len; + } + + tail_len = len & OFFSET_MASK; + len = len >> STEP_ORDER; + p_ul = (unsigned long const *)p; + + for (int i = 0; i < len; i++) { + s = crc32_le_prep(crc, p_ul); + crc = crc32_le_zbc(s, poly, poly_qt); + p_ul++; + } + + /* Handle the tail bytes. */ + p = (unsigned char const *)p_ul; + if (tail_len) + crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); + + return crc; + +legacy: + return crc_fb(crc, p, len); +} + +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, + crc32_le_base); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, + CRC32C_POLY_QT_LE, crc32c_le_base); +} +EXPORT_SYMBOL(crc32c_le_arch); + +static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, + size_t len) +{ + size_t bits = len * 8; + unsigned long s = 0; + u32 crc_low = 0; + + s = 0; + for (int i = 0; i < len; i++) + s = *p++ | (s << 8); + + if (__riscv_xlen == 32 || len < sizeof(u32)) { + s ^= crc >> (32 - bits); + crc_low = crc << bits; + } else { + s ^= (unsigned long)crc << (bits - 32); + } + + crc = crc32_be_zbc(s); + crc ^= crc_low; + + return crc; +} + +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + size_t offset, head_len, tail_len; + unsigned long const *p_ul; + unsigned long s; + + asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, + RISCV_ISA_EXT_ZBC, 1) + : : : : legacy); + + /* Handle the unaligned head. */ + offset = (unsigned long)p & OFFSET_MASK; + if (offset && len) { + head_len = min(STEP - offset, len); + crc = crc32_be_unaligned(crc, p, head_len); + p += head_len; + len -= head_len; + } + + tail_len = len & OFFSET_MASK; + len = len >> STEP_ORDER; + p_ul = (unsigned long const *)p; + + for (int i = 0; i < len; i++) { + s = crc32_be_prep(crc, p_ul); + crc = crc32_be_zbc(s); + p_ul++; + } + + /* Handle the tail bytes. */ + p = (unsigned char const *)p_ul; + if (tail_len) + crc = crc32_be_unaligned(crc, p, tail_len); + + return crc; + +legacy: + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c deleted file mode 100644 index 333fb7af1192..000000000000 --- a/arch/riscv/lib/crc32.c +++ /dev/null @@ -1,294 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC32 implementation with Zbc extension. - * - * Copyright (C) 2024 Intel Corporation - */ - -#include -#include -#include - -#include -#include -#include -#include -#include - -/* - * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for - * better understanding of how this math works. - * - * let "+" denotes polynomial add (XOR) - * let "-" denotes polynomial sub (XOR) - * let "*" denotes polynomial multiplication - * let "/" denotes polynomial floor division - * let "S" denotes source data, XLEN bit wide - * let "P" denotes CRC32 polynomial - * let "T" denotes 2^(XLEN+32) - * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit - * - * crc32(S, P) - * => S * (2^32) - S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) / P * P - * => lowest 32 bits of: S * (2^32) * (T / P) / T * P - * => lowest 32 bits of: S * (2^32) * quotient / T * P - * => lowest 32 bits of: S * quotient / 2^XLEN * P - * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P - * => clmul_low_part(clmul_high_part(S, QT) + S, P) - * - * In terms of below implementations, the BE case is more intuitive, since the - * higher order bit sits at more significant position. - */ - -#if __riscv_xlen == 64 -/* Slide by XLEN bits per iteration */ -# define STEP_ORDER 3 - -/* Each below polynomial quotient has an implicit bit for 2^XLEN */ - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */ -# define CRC32_POLY_QT_LE 0x5a72d812fb808b20 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ -# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 - -/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be - * the same as the bit-reversed version of CRC32_POLY_QT_LE - */ -# define CRC32_POLY_QT_BE 0x04d101df481b4e5a - -static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - "srli %0, %0, 32\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" ((u64)poly << 32) - :); - return crc; -} - -static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr); -} - -#elif __riscv_xlen == 32 -# define STEP_ORDER 2 -/* Each quotient should match the upper half of its analog in RV64 */ -# define CRC32_POLY_QT_LE 0xfb808b20 -# define CRC32C_POLY_QT_LE 0x6f5389f8 -# define CRC32_POLY_QT_BE 0x04d101df - -static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_le32(*ptr); -} - -static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt) -{ - u32 crc; - - /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmul %0, %1, %2\n" - "slli %0, %0, 1\n" - "xor %0, %0, %1\n" - "clmulr %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (poly_qt), - "r" (poly) - :); - return crc; -} - -static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr) -{ - return crc ^ (__force u32)__cpu_to_be32(*ptr); -} - -#else -# error "Unexpected __riscv_xlen" -#endif - -static inline u32 crc32_be_zbc(unsigned long s) -{ - u32 crc; - - asm volatile (".option push\n" - ".option arch,+zbc\n" - "clmulh %0, %1, %2\n" - "xor %0, %0, %1\n" - "clmul %0, %0, %3\n" - ".option pop\n" - : "=&r" (crc) - : "r" (s), - "r" (CRC32_POLY_QT_BE), - "r" (CRC32_POLY_BE) - :); - return crc; -} - -#define STEP (1 << STEP_ORDER) -#define OFFSET_MASK (STEP - 1) - -typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len); - -static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - for (int i = 0; i < len; i++) - s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); - - s ^= (unsigned long)crc << (__riscv_xlen - bits); - if (__riscv_xlen == 32 || len < sizeof(u32)) - crc_low = crc >> bits; - - crc = crc32_le_zbc(s, poly, poly_qt); - crc ^= crc_low; - - return crc; -} - -static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, - size_t len, u32 poly, - unsigned long poly_qt, - fallback crc_fb) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_le_prep(crc, p_ul); - crc = crc32_le_zbc(s, poly, poly_qt); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); - - return crc; - -legacy: - return crc_fb(crc, p, len); -} - -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, - crc32_le_base); -} - -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) -{ - return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, crc32c_le_base); -} - -static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, - size_t len) -{ - size_t bits = len * 8; - unsigned long s = 0; - u32 crc_low = 0; - - s = 0; - for (int i = 0; i < len; i++) - s = *p++ | (s << 8); - - if (__riscv_xlen == 32 || len < sizeof(u32)) { - s ^= crc >> (32 - bits); - crc_low = crc << bits; - } else { - s ^= (unsigned long)crc << (bits - 32); - } - - crc = crc32_be_zbc(s); - crc ^= crc_low; - - return crc; -} - -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) -{ - size_t offset, head_len, tail_len; - unsigned long const *p_ul; - unsigned long s; - - asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0, - RISCV_ISA_EXT_ZBC, 1) - : : : : legacy); - - /* Handle the unaligned head. */ - offset = (unsigned long)p & OFFSET_MASK; - if (offset && len) { - head_len = min(STEP - offset, len); - crc = crc32_be_unaligned(crc, p, head_len); - p += head_len; - len -= head_len; - } - - tail_len = len & OFFSET_MASK; - len = len >> STEP_ORDER; - p_ul = (unsigned long const *)p; - - for (int i = 0; i < len; i++) { - s = crc32_be_prep(crc, p_ul); - crc = crc32_be_zbc(s); - p_ul++; - } - - /* Handle the tail bytes. */ - p = (unsigned char const *)p_ul; - if (tail_len) - crc = crc32_be_unaligned(crc, p, tail_len); - - return crc; - -legacy: - return crc32_be_base(crc, p, len); -} diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index 6a55d206fab3..cc064ea8240e 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -160,12 +160,12 @@ static struct shash_alg algs[] = {{ static int __init crc32_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&crc32_le != &crc32_le_base)); + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } static void __exit crc32_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&crc32_le != &crc32_le_base)); + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } subsys_initcall(crc32_mod_init); diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 635599b255ec..04b03d825cf4 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -200,12 +200,12 @@ static struct shash_alg algs[] = {{ static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base)); + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base)); + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } subsys_initcall(crc32c_mod_init); diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 5b07fc9081c4..58c632533b08 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -8,10 +8,34 @@ #include #include -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len); +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len); + +static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_le_arch(crc, p, len); + return crc32_le_base(crc, p, len); +} + +static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_be_arch(crc, p, len); + return crc32_be_base(crc, p, len); +} + +/* TODO: leading underscores should be dropped once callers have been updated */ +static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32c_le_arch(crc, p, len); + return crc32c_le_base(crc, p, len); +} /** * crc32_le_combine - Combine two crc32 check values into one. For two @@ -38,9 +62,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) return crc32_le_shift(crc1, len2) ^ crc2; } -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); - /** * __crc32c_le_combine - Combine two crc32c check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 diff --git a/lib/Kconfig b/lib/Kconfig index 5a318f753b2f..8858030bebae 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -190,6 +190,9 @@ config CRC32 the kernel tree does. Such modules that use library CRC32/CRC32c functions require M here. +config ARCH_HAS_CRC32 + bool + config CRC32_SELFTEST tristate "CRC32 perform self test on init" depends on CRC32 @@ -202,24 +205,39 @@ config CRC32_SELFTEST choice prompt "CRC32 implementation" depends on CRC32 - default CRC32_SLICEBY8 + default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32 + default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32 help - This option allows a kernel builder to override the default choice - of CRC32 algorithm. Choose the default ("slice by 8") unless you - know that you need one of the others. + This option allows you to override the default choice of CRC32 + implementation. Choose the default unless you know that you need one + of the others. -config CRC32_SLICEBY8 +config CRC32_IMPL_ARCH_PLUS_SLICEBY8 + bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32 + help + Use architecture-optimized implementation of CRC32. Fall back to + slice-by-8 in cases where the arch-optimized implementation cannot be + used, e.g. if the CPU lacks support for the needed instructions. + + This is the default when an arch-optimized implementation exists. + +config CRC32_IMPL_ARCH_PLUS_SLICEBY1 + bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32 + help + Use architecture-optimized implementation of CRC32, but fall back to + slice-by-1 instead of slice-by-8 in order to reduce the binary size. + +config CRC32_IMPL_SLICEBY8 bool "Slice by 8 bytes" help Calculate checksum 8 bytes at a time with a clever slicing algorithm. - This is the fastest algorithm, but comes with a 8KiB lookup table. - Most modern processors have enough cache to hold this table without - thrashing the cache. - - This is the default implementation choice. Choose this one unless - you have a good reason not to. + This is much slower than the architecture-optimized implementation of + CRC32 (if the selected arch has one), but it is portable and is the + fastest implementation when no arch-optimized implementation is + available. It uses an 8KiB lookup table. Most modern processors have + enough cache to hold this table without thrashing the cache. -config CRC32_SLICEBY4 +config CRC32_IMPL_SLICEBY4 bool "Slice by 4 bytes" help Calculate checksum 4 bytes at a time with a clever slicing algorithm. @@ -228,15 +246,15 @@ config CRC32_SLICEBY4 Only choose this option if you know what you are doing. -config CRC32_SARWATE - bool "Sarwate's Algorithm (one byte at a time)" +config CRC32_IMPL_SLICEBY1 + bool "Slice by 1 byte (Sarwate's algorithm)" help Calculate checksum a byte at a time using Sarwate's algorithm. This - is not particularly fast, but has a small 256 byte lookup table. + is not particularly fast, but has a small 1KiB lookup table. Only choose this option if you know what you are doing. -config CRC32_BIT +config CRC32_IMPL_BIT bool "Classic Algorithm (one bit at a time)" help Calculate checksum one bit at a time. This is VERY slow, but has @@ -246,6 +264,26 @@ config CRC32_BIT endchoice +config CRC32_ARCH + tristate + default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 + +config CRC32_SLICEBY8 + bool + default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8 + +config CRC32_SLICEBY4 + bool + default y if CRC32_IMPL_SLICEBY4 + +config CRC32_SARWATE + bool + default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 + +config CRC32_BIT + bool + default y if CRC32_IMPL_BIT + config CRC64 tristate "CRC64 functions" help diff --git a/lib/crc32.c b/lib/crc32.c index c67059b0082b..47151624332e 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -183,35 +183,27 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, } #if CRC_LE_BITS == 1 -u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); } -u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); } #endif -EXPORT_SYMBOL(crc32_le); -EXPORT_SYMBOL(__crc32c_le); - -u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); EXPORT_SYMBOL(crc32_le_base); - -u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); EXPORT_SYMBOL(crc32c_le_base); -u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); - /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit @@ -335,14 +327,14 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, } #if CRC_BE_BITS == 1 -u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else -u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } #endif -EXPORT_SYMBOL(crc32_be); +EXPORT_SYMBOL(crc32_be_base); -- cgit v1.2.3 From b5ae12e0ee099e4c458f7814f0317f4e2cbf105e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:08:28 -0800 Subject: lib/crc32: expose whether the lib is really optimized at runtime Make the CRC32 library export a function crc32_optimizations() which returns flags that indicate which CRC32 functions are actually executing optimized code at runtime. This will be used to determine whether the crc32[c]-$arch shash algorithms should be registered in the crypto API. btrfs could also start using these flags instead of the hack that it currently uses where it parses the crypto_shash_driver_name. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20241202010844.144356-4-ebiggers@kernel.org Signed-off-by: Eric Biggers --- arch/arm64/lib/crc32-glue.c | 10 ++++++++++ arch/riscv/lib/crc32-riscv.c | 10 ++++++++++ include/linux/crc32.h | 15 +++++++++++++++ 3 files changed, 35 insertions(+) (limited to 'include') diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index d7f6e1cbf0d2..15c4c9db573e 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -85,5 +85,15 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) } EXPORT_SYMBOL(crc32_be_arch); +u32 crc32_optimizations(void) +{ + if (alternative_has_cap_likely(ARM64_HAS_CRC32)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c index a3ff7db2a1ce..53d56ab422c7 100644 --- a/arch/riscv/lib/crc32-riscv.c +++ b/arch/riscv/lib/crc32-riscv.c @@ -297,5 +297,15 @@ legacy: } EXPORT_SYMBOL(crc32_be_arch); +u32 crc32_optimizations(void) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 58c632533b08..e9bd40056687 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -37,6 +37,21 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) return crc32c_le_base(crc, p, len); } +/* + * crc32_optimizations() returns flags that indicate which CRC32 library + * functions are using architecture-specific optimizations. Unlike + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 + * variants and also whether any needed CPU features are available at runtime. + */ +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ +#if IS_ENABLED(CONFIG_CRC32_ARCH) +u32 crc32_optimizations(void); +#else +static inline u32 crc32_optimizations(void) { return 0; } +#endif + /** * crc32_le_combine - Combine two crc32 check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 -- cgit v1.2.3 From 38a9a5121c3bcf2ed857430a92e493568b247c35 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:08:40 -0800 Subject: lib/crc32: make crc32c() go directly to lib Now that the lower level __crc32c_le() library function is optimized for each architecture, make crc32c() just call that instead of taking an inefficient and error-prone detour through the shash API. Note: a future cleanup should make crc32c_le() be the actual library function instead of __crc32c_le(). That will require updating callers of __crc32c_le() to use crc32c_le() instead, and updating callers of crc32c_le() that expect a 'const void *' arg to expect 'const u8 *' instead. Similarly, a future cleanup should remove LIBCRC32C by making everyone who is selecting it just select CRC32 directly instead. Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20241202010844.144356-16-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc32c.h | 7 +++-- lib/Kconfig | 10 ++----- lib/Makefile | 1 - lib/libcrc32c.c | 74 -------------------------------------------------- 4 files changed, 8 insertions(+), 84 deletions(-) delete mode 100644 lib/libcrc32c.c (limited to 'include') diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 357ae4611a45..47eb78003c26 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -2,9 +2,12 @@ #ifndef _LINUX_CRC32C_H #define _LINUX_CRC32C_H -#include +#include -extern u32 crc32c(u32 crc, const void *address, unsigned int length); +static inline u32 crc32c(u32 crc, const void *address, unsigned int length) +{ + return __crc32c_le(crc, address, length); +} /* This macro exists for backwards-compatibility. */ #define crc32c_le crc32c diff --git a/lib/Kconfig b/lib/Kconfig index 8858030bebae..d0c2dc0dac32 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -310,14 +310,10 @@ config CRC7 config LIBCRC32C tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check" - select CRYPTO - select CRYPTO_CRC32C + select CRC32 help - This option is provided for the case where no in-kernel-tree - modules require CRC32c functions, but a module built outside the - kernel tree does. Such modules that use library CRC32c functions - require M here. See Castagnoli93. - Module will be libcrc32c. + This option just selects CRC32 and is provided for compatibility + purposes until the users are updated to select CRC32 directly. config CRC8 tristate "CRC8 function" diff --git a/lib/Makefile b/lib/Makefile index a8155c972f02..6573163d24e4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -167,7 +167,6 @@ obj-$(CONFIG_CRC64) += crc64.o obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o obj-$(CONFIG_CRC4) += crc4.o obj-$(CONFIG_CRC7) += crc7.o -obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o obj-$(CONFIG_XXHASH) += xxhash.o diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c deleted file mode 100644 index 649e687413a0..000000000000 --- a/lib/libcrc32c.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * CRC32C - *@Article{castagnoli-crc, - * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman}, - * title = {{Optimization of Cyclic Redundancy-Check Codes with 24 - * and 32 Parity Bits}}, - * journal = IEEE Transactions on Communication, - * year = {1993}, - * volume = {41}, - * number = {6}, - * pages = {}, - * month = {June}, - *} - * Used by the iSCSI driver, possibly others, and derived from - * the iscsi-crc.c module of the linux-iscsi driver at - * http://linux-iscsi.sourceforge.net. - * - * Following the example of lib/crc32, this function is intended to be - * flexible and useful for all users. Modules that currently have their - * own crc32c, but hopefully may be able to use this one are: - * net/sctp (please add all your doco to here if you change to - * use this one!) - * - * - * Copyright (c) 2004 Cisco Systems, Inc. - */ - -#include -#include -#include -#include -#include -#include - -static struct crypto_shash *tfm; - -u32 crc32c(u32 crc, const void *address, unsigned int length) -{ - SHASH_DESC_ON_STACK(shash, tfm); - u32 ret, *ctx = (u32 *)shash_desc_ctx(shash); - int err; - - shash->tfm = tfm; - *ctx = crc; - - err = crypto_shash_update(shash, address, length); - BUG_ON(err); - - ret = *ctx; - barrier_data(ctx); - return ret; -} - -EXPORT_SYMBOL(crc32c); - -static int __init libcrc32c_mod_init(void) -{ - tfm = crypto_alloc_shash("crc32c", 0, 0); - return PTR_ERR_OR_ZERO(tfm); -} - -static void __exit libcrc32c_mod_fini(void) -{ - crypto_free_shash(tfm); -} - -module_init(libcrc32c_mod_init); -module_exit(libcrc32c_mod_fini); - -MODULE_AUTHOR("Clay Haapala "); -MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); -MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); -- cgit v1.2.3 From dd348f054b24a3f57cbcdc2c8e7ebc22c62eb72f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:08:42 -0800 Subject: jbd2: switch to using the crc32c library Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Reviewed-by: Ard Biesheuvel Reviewed-by: Darrick J. Wong Acked-by: Theodore Ts'o Link: https://lore.kernel.org/r/20241202010844.144356-18-ebiggers@kernel.org Signed-off-by: Eric Biggers --- fs/jbd2/Kconfig | 2 -- fs/jbd2/journal.c | 30 +++--------------------------- include/linux/jbd2.h | 33 +++------------------------------ 3 files changed, 6 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 4ad2c67f93f1..9c19e1512101 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig @@ -2,8 +2,6 @@ config JBD2 tristate select CRC32 - select CRYPTO - select CRYPTO_CRC32C help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 7e49d912b091..d8084b31b361 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1369,20 +1369,12 @@ static int journal_check_superblock(journal_t *journal) return err; } - /* Load the checksum driver */ if (jbd2_journal_has_csum_v2or3_feature(journal)) { if (sb->s_checksum_type != JBD2_CRC32C_CHKSUM) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); return err; } - journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); - err = PTR_ERR(journal->j_chksum_driver); - journal->j_chksum_driver = NULL; - return err; - } /* Check superblock checksum */ if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) { printk(KERN_ERR "JBD2: journal checksum error\n"); @@ -1608,8 +1600,6 @@ static journal_t *journal_init_common(struct block_device *bdev, err_cleanup: percpu_counter_destroy(&journal->j_checkpoint_jh_count); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_wbuf); jbd2_journal_destroy_revoke(journal); journal_fail_superblock(journal); @@ -2191,8 +2181,6 @@ int jbd2_journal_destroy(journal_t *journal) iput(journal->j_inode); if (journal->j_revoke) jbd2_journal_destroy_revoke(journal); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); @@ -2337,27 +2325,15 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, } } - /* Load the checksum driver if necessary */ - if ((journal->j_chksum_driver == NULL) && - INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { - journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); - journal->j_chksum_driver = NULL; - return 0; - } - /* Precompute checksum seed for all metadata */ - journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, - sizeof(sb->s_uuid)); - } - lock_buffer(journal->j_sb_buffer); - /* If enabling v3 checksums, update superblock */ + /* If enabling v3 checksums, update superblock and precompute seed */ if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { sb->s_checksum_type = JBD2_CRC32C_CHKSUM; sb->s_feature_compat &= ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); + journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, + sizeof(sb->s_uuid)); } /* If enabling v1 checksums, downgrade superblock */ diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 50f7ea8714bf..561025b4f3d9 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -28,7 +28,7 @@ #include #include #include -#include +#include #endif #define journal_oom_retry 1 @@ -1241,13 +1241,6 @@ struct journal_s */ void *j_private; - /** - * @j_chksum_driver: - * - * Reference to checksum algorithm driver via cryptoapi. - */ - struct crypto_shash *j_chksum_driver; - /** * @j_csum_seed: * @@ -1750,10 +1743,7 @@ static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && - journal->j_chksum_driver == NULL); - - return journal->j_chksum_driver != NULL; + return jbd2_journal_has_csum_v2or3_feature(journal); } static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) @@ -1790,27 +1780,10 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -/* JBD uses a CRC32 checksum */ -#define JBD_MAX_CHECKSUM_SIZE 4 - static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { - DEFINE_RAW_FLEX(struct shash_desc, desc, __ctx, - DIV_ROUND_UP(JBD_MAX_CHECKSUM_SIZE, - sizeof(*((struct shash_desc *)0)->__ctx))); - int err; - - BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > - JBD_MAX_CHECKSUM_SIZE); - - desc->tfm = journal->j_chksum_driver; - *(u32 *)desc->__ctx = crc; - - err = crypto_shash_update(desc, address, length); - BUG_ON(err); - - return *(u32 *)desc->__ctx; + return crc32c(crc, address, length); } /* Return most recent uncommitted transaction */ -- cgit v1.2.3 From 31e4cdde4d8b4bc358f3e6b44647ead3cba13aba Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:08:44 -0800 Subject: scsi: target: iscsi: switch to using the crc32c library Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Reviewed-by: Ard Biesheuvel Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20241202010844.144356-20-ebiggers@kernel.org Signed-off-by: Eric Biggers --- drivers/target/iscsi/Kconfig | 3 +- drivers/target/iscsi/iscsi_target.c | 153 +++++++++--------------------- drivers/target/iscsi/iscsi_target_login.c | 50 ---------- drivers/target/iscsi/iscsi_target_login.h | 1 - drivers/target/iscsi/iscsi_target_nego.c | 21 +--- include/target/iscsi/iscsi_target_core.h | 3 - 6 files changed, 49 insertions(+), 182 deletions(-) (limited to 'include') diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 1c0517a12571..70d76f3dd693 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -2,8 +2,9 @@ config ISCSI_TARGET tristate "SCSI Target Mode Stack" depends on INET + select CRC32 select CRYPTO - select CRYPTO_CRC32C + select CRYPTO_HASH help Say M to enable the SCSI target mode stack. A SCSI target mode stack is software that makes local storage available over a storage network diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index 6002283cbeba..091c1efccfb7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -8,7 +8,7 @@ * ******************************************************************************/ -#include +#include #include #include #include @@ -490,8 +490,8 @@ void iscsit_aborted_task(struct iscsit_conn *conn, struct iscsit_cmd *cmd) } EXPORT_SYMBOL(iscsit_aborted_task); -static void iscsit_do_crypto_hash_buf(struct ahash_request *, const void *, - u32, u32, const void *, void *); +static u32 iscsit_crc_buf(const void *buf, u32 payload_length, + u32 padding, const void *pad_bytes); static void iscsit_tx_thread_wait_for_tcp(struct iscsit_conn *); static int @@ -510,9 +510,7 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, - header_digest); + *header_digest = iscsit_crc_buf(hdr, ISCSI_HDR_LEN, 0, NULL); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; @@ -537,11 +535,9 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, } if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, - data_buf, data_buf_len, - padding, &cmd->pad_bytes, - &cmd->data_crc); - + cmd->data_crc = iscsit_crc_buf(data_buf, data_buf_len, + padding, + &cmd->pad_bytes); iov[niov].iov_base = &cmd->data_crc; iov[niov++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; @@ -566,8 +562,8 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, static int iscsit_map_iovec(struct iscsit_cmd *cmd, struct kvec *iov, int nvec, u32 data_offset, u32 data_length); static void iscsit_unmap_iovec(struct iscsit_cmd *); -static u32 iscsit_do_crypto_hash_sg(struct ahash_request *, struct iscsit_cmd *, - u32, u32, u32, u8 *); +static u32 iscsit_crc_sglist(const struct iscsit_cmd *cmd, u32 data_length, + u32 padding, const u8 *pad_bytes); static int iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, const struct iscsi_datain *datain) @@ -584,10 +580,8 @@ iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, if (conn->conn_ops->HeaderDigest) { u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->pdu, - ISCSI_HDR_LEN, 0, NULL, - header_digest); - + *header_digest = iscsit_crc_buf(cmd->pdu, ISCSI_HDR_LEN, 0, + NULL); iov[0].iov_len += ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; @@ -614,12 +608,8 @@ iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, } if (conn->conn_ops->DataDigest) { - cmd->data_crc = iscsit_do_crypto_hash_sg(conn->conn_tx_hash, - cmd, datain->offset, - datain->length, - cmd->padding, - cmd->pad_bytes); - + cmd->data_crc = iscsit_crc_sglist(cmd, datain->length, + cmd->padding, cmd->pad_bytes); iov[iov_count].iov_base = &cmd->data_crc; iov[iov_count++].iov_len = ISCSI_CRC_LEN; tx_size += ISCSI_CRC_LEN; @@ -1404,77 +1394,45 @@ iscsit_handle_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, return iscsit_get_immediate_data(cmd, hdr, dump_payload); } -static u32 iscsit_do_crypto_hash_sg( - struct ahash_request *hash, - struct iscsit_cmd *cmd, - u32 data_offset, - u32 data_length, - u32 padding, - u8 *pad_bytes) +static u32 iscsit_crc_sglist(const struct iscsit_cmd *cmd, u32 data_length, + u32 padding, const u8 *pad_bytes) { - u32 data_crc; - struct scatterlist *sg; - unsigned int page_off; - - crypto_ahash_init(hash); - - sg = cmd->first_data_sg; - page_off = cmd->first_data_sg_off; - - if (data_length && page_off) { - struct scatterlist first_sg; - u32 len = min_t(u32, data_length, sg->length - page_off); - - sg_init_table(&first_sg, 1); - sg_set_page(&first_sg, sg_page(sg), len, sg->offset + page_off); - - ahash_request_set_crypt(hash, &first_sg, NULL, len); - crypto_ahash_update(hash); - - data_length -= len; - sg = sg_next(sg); - } + struct scatterlist *sg = cmd->first_data_sg; + unsigned int page_off = cmd->first_data_sg_off; + u32 crc = ~0; while (data_length) { - u32 cur_len = min_t(u32, data_length, sg->length); + u32 cur_len = min_t(u32, data_length, sg->length - page_off); + const void *virt; - ahash_request_set_crypt(hash, sg, NULL, cur_len); - crypto_ahash_update(hash); + virt = kmap_local_page(sg_page(sg)) + sg->offset + page_off; + crc = crc32c(crc, virt, cur_len); + kunmap_local(virt); - data_length -= cur_len; /* iscsit_map_iovec has already checked for invalid sg pointers */ sg = sg_next(sg); - } - if (padding) { - struct scatterlist pad_sg; - - sg_init_one(&pad_sg, pad_bytes, padding); - ahash_request_set_crypt(hash, &pad_sg, (u8 *)&data_crc, - padding); - crypto_ahash_finup(hash); - } else { - ahash_request_set_crypt(hash, NULL, (u8 *)&data_crc, 0); - crypto_ahash_final(hash); + page_off = 0; + data_length -= cur_len; } - return data_crc; + if (padding) + crc = crc32c(crc, pad_bytes, padding); + + return ~crc; } -static void iscsit_do_crypto_hash_buf(struct ahash_request *hash, - const void *buf, u32 payload_length, u32 padding, - const void *pad_bytes, void *data_crc) +static u32 iscsit_crc_buf(const void *buf, u32 payload_length, + u32 padding, const void *pad_bytes) { - struct scatterlist sg[2]; + u32 crc = ~0; - sg_init_table(sg, ARRAY_SIZE(sg)); - sg_set_buf(sg, buf, payload_length); - if (padding) - sg_set_buf(sg + 1, pad_bytes, padding); + crc = crc32c(crc, buf, payload_length); - ahash_request_set_crypt(hash, sg, data_crc, payload_length + padding); + if (padding) + crc = crc32c(crc, pad_bytes, padding); - crypto_ahash_digest(hash); + return ~crc; } int @@ -1662,11 +1620,8 @@ iscsit_get_dataout(struct iscsit_conn *conn, struct iscsit_cmd *cmd, if (conn->conn_ops->DataDigest) { u32 data_crc; - data_crc = iscsit_do_crypto_hash_sg(conn->conn_rx_hash, cmd, - be32_to_cpu(hdr->offset), - payload_length, padding, - cmd->pad_bytes); - + data_crc = iscsit_crc_sglist(cmd, payload_length, padding, + cmd->pad_bytes); if (checksum != data_crc) { pr_err("ITT: 0x%08x, Offset: %u, Length: %u," " DataSN: 0x%08x, CRC32C DataDigest 0x%08x" @@ -1925,10 +1880,8 @@ static int iscsit_handle_nop_out(struct iscsit_conn *conn, struct iscsit_cmd *cm } if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_rx_hash, ping_data, - payload_length, padding, - cmd->pad_bytes, &data_crc); - + data_crc = iscsit_crc_buf(ping_data, payload_length, + padding, cmd->pad_bytes); if (checksum != data_crc) { pr_err("Ping data CRC32C DataDigest" " 0x%08x does not match computed 0x%08x\n", @@ -2328,10 +2281,7 @@ iscsit_handle_text_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd, goto reject; if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_rx_hash, - text_in, rx_size, 0, NULL, - &data_crc); - + data_crc = iscsit_crc_buf(text_in, rx_size, 0, NULL); if (checksum != data_crc) { pr_err("Text data CRC32C DataDigest" " 0x%08x does not match computed" @@ -2688,10 +2638,8 @@ static int iscsit_handle_immediate_data( if (conn->conn_ops->DataDigest) { u32 data_crc; - data_crc = iscsit_do_crypto_hash_sg(conn->conn_rx_hash, cmd, - cmd->write_data_done, length, padding, - cmd->pad_bytes); - + data_crc = iscsit_crc_sglist(cmd, length, padding, + cmd->pad_bytes); if (checksum != data_crc) { pr_err("ImmediateData CRC32C DataDigest 0x%08x" " does not match computed 0x%08x\n", checksum, @@ -4116,10 +4064,8 @@ static void iscsit_get_rx_pdu(struct iscsit_conn *conn) break; } - iscsit_do_crypto_hash_buf(conn->conn_rx_hash, buffer, - ISCSI_HDR_LEN, 0, NULL, - &checksum); - + checksum = iscsit_crc_buf(buffer, ISCSI_HDR_LEN, 0, + NULL); if (digest != checksum) { pr_err("HeaderDigest CRC32C failed," " received 0x%08x, computed 0x%08x\n", @@ -4406,15 +4352,6 @@ int iscsit_close_connection( */ iscsit_check_conn_usage_count(conn); - ahash_request_free(conn->conn_tx_hash); - if (conn->conn_rx_hash) { - struct crypto_ahash *tfm; - - tfm = crypto_ahash_reqtfm(conn->conn_rx_hash); - ahash_request_free(conn->conn_rx_hash); - crypto_free_ahash(tfm); - } - if (conn->sock) sock_release(conn->sock); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index 90b870f234f0..c2ac9a99ebbb 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -8,7 +8,6 @@ * ******************************************************************************/ -#include #include #include #include @@ -71,46 +70,6 @@ out_login: return NULL; } -/* - * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup - * per struct iscsit_conn libcrypto contexts for crc32c and crc32-intel - */ -int iscsi_login_setup_crypto(struct iscsit_conn *conn) -{ - struct crypto_ahash *tfm; - - /* - * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts - * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback - * to software 1x8 byte slicing from crc32c.ko - */ - tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) { - pr_err("crypto_alloc_ahash() failed\n"); - return -ENOMEM; - } - - conn->conn_rx_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!conn->conn_rx_hash) { - pr_err("ahash_request_alloc() failed for conn_rx_hash\n"); - crypto_free_ahash(tfm); - return -ENOMEM; - } - ahash_request_set_callback(conn->conn_rx_hash, 0, NULL, NULL); - - conn->conn_tx_hash = ahash_request_alloc(tfm, GFP_KERNEL); - if (!conn->conn_tx_hash) { - pr_err("ahash_request_alloc() failed for conn_tx_hash\n"); - ahash_request_free(conn->conn_rx_hash); - conn->conn_rx_hash = NULL; - crypto_free_ahash(tfm); - return -ENOMEM; - } - ahash_request_set_callback(conn->conn_tx_hash, 0, NULL, NULL); - - return 0; -} - static int iscsi_login_check_initiator_version( struct iscsit_conn *conn, u8 version_max, @@ -1165,15 +1124,6 @@ old_sess_out: iscsit_dec_session_usage_count(conn->sess); } - ahash_request_free(conn->conn_tx_hash); - if (conn->conn_rx_hash) { - struct crypto_ahash *tfm; - - tfm = crypto_ahash_reqtfm(conn->conn_rx_hash); - ahash_request_free(conn->conn_rx_hash); - crypto_free_ahash(tfm); - } - if (conn->param_list) { iscsi_release_param_list(conn->param_list); conn->param_list = NULL; diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h index e8760735486b..03c7d695d58f 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -9,7 +9,6 @@ struct iscsi_login; struct iscsi_np; struct sockaddr_storage; -extern int iscsi_login_setup_crypto(struct iscsit_conn *); extern int iscsi_check_for_session_reinstatement(struct iscsit_conn *); extern int iscsi_login_post_auth_non_zero_tsih(struct iscsit_conn *, u16, u32); extern int iscsit_setup_np(struct iscsi_np *, diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index fa3fb5f4e6bc..16e3ded98c32 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1194,14 +1194,7 @@ int iscsi_target_locate_portal( goto get_target; sess->sess_ops->SessionType = 1; - /* - * Setup crc32c modules from libcrypto - */ - if (iscsi_login_setup_crypto(conn) < 0) { - pr_err("iscsi_login_setup_crypto() failed\n"); - ret = -1; - goto out; - } + /* * Serialize access across the discovery struct iscsi_portal_group to * process login attempt. @@ -1258,17 +1251,7 @@ get_target: } conn->tpg_np = tpg_np; pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); - /* - * Setup crc32c modules from libcrypto - */ - if (iscsi_login_setup_crypto(conn) < 0) { - pr_err("iscsi_login_setup_crypto() failed\n"); - kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); - iscsit_put_tiqn_for_login(tiqn); - conn->tpg = NULL; - ret = -1; - goto out; - } + /* * Serialize access across the struct iscsi_portal_group to * process login attempt. diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscsi/iscsi_target_core.h index 60af7c63b34e..51ca80abacf7 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -576,9 +576,6 @@ struct iscsit_conn { spinlock_t state_lock; spinlock_t login_timer_lock; spinlock_t login_worker_lock; - /* libcrypto RX and TX contexts for crc32c */ - struct ahash_request *conn_rx_hash; - struct ahash_request *conn_tx_hash; /* Used for scheduling TX and RX connection kthreads */ cpumask_var_t conn_cpumask; cpumask_var_t allowed_cpumask; -- cgit v1.2.3 From be3c45b070cba3be4dd248b38d4798e3e2859451 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:20:45 -0800 Subject: lib/crc-t10dif: stop wrapping the crypto API In preparation for making the CRC-T10DIF library directly optimized for each architecture, like what has been done for CRC32, get rid of the weird layering where crc_t10dif_update() calls into the crypto API. Instead, move crc_t10dif_generic() into the crc-t10dif library module, and make crc_t10dif_update() just call crc_t10dif_generic(). Acceleration will be reintroduced via crc_t10dif_arch() in the following patches. Reviewed-by: Ard Biesheuvel Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20241202012056.209768-2-ebiggers@kernel.org Signed-off-by: Eric Biggers --- crypto/Kconfig | 1 + crypto/Makefile | 2 +- crypto/crct10dif_common.c | 82 ------------------------ include/linux/crc-t10dif.h | 16 +++-- lib/Kconfig | 2 - lib/crc-t10dif.c | 156 +++++++++++++-------------------------------- 6 files changed, 58 insertions(+), 201 deletions(-) delete mode 100644 crypto/crct10dif_common.c (limited to 'include') diff --git a/crypto/Kconfig b/crypto/Kconfig index 6b0bfbccac08..b459e8a23acc 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1102,6 +1102,7 @@ config CRYPTO_CRC32 config CRYPTO_CRCT10DIF tristate "CRCT10DIF" select CRYPTO_HASH + select CRC_T10DIF help CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF) diff --git a/crypto/Makefile b/crypto/Makefile index 77abca715445..0537df1719d3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -157,7 +157,7 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o CFLAGS_crc32c_generic.o += -DARCH=$(ARCH) CFLAGS_crc32_generic.o += -DARCH=$(ARCH) -obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o +obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_generic.o obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o diff --git a/crypto/crct10dif_common.c b/crypto/crct10dif_common.c deleted file mode 100644 index b2fab366f518..000000000000 --- a/crypto/crct10dif_common.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Cryptographic API. - * - * T10 Data Integrity Field CRC16 Crypto Transform - * - * Copyright (c) 2007 Oracle Corporation. All rights reserved. - * Written by Martin K. Petersen - * Copyright (C) 2013 Intel Corporation - * Author: Tim Chen - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#include -#include -#include - -/* Table generated using the following polynomium: - * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 - * gt: 0x8bb7 - */ -static const __u16 t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; - -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len) -{ - unsigned int i; - - for (i = 0 ; i < len ; i++) - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; - - return crc; -} -EXPORT_SYMBOL(crc_t10dif_generic); - -MODULE_DESCRIPTION("T10 DIF CRC calculation common code"); -MODULE_LICENSE("GPL"); diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index 6bb0c0bf357b..206ba2305483 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -6,11 +6,17 @@ #define CRC_T10DIF_DIGEST_SIZE 2 #define CRC_T10DIF_BLOCK_SIZE 1 -#define CRC_T10DIF_STRING "crct10dif" -extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, - size_t len); -extern __u16 crc_t10dif(unsigned char const *, size_t); -extern __u16 crc_t10dif_update(__u16 crc, unsigned char const *, size_t); +u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); + +static inline u16 crc_t10dif_update(u16 crc, const u8 *p, size_t len) +{ + return crc_t10dif_generic(crc, p, len); +} + +static inline u16 crc_t10dif(const u8 *p, size_t len) +{ + return crc_t10dif_update(0, p, len); +} #endif diff --git a/lib/Kconfig b/lib/Kconfig index d0c2dc0dac32..be59f7cdf448 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -156,8 +156,6 @@ config CRC16 config CRC_T10DIF tristate "CRC calculation for the T10 Data Integrity Field" - select CRYPTO - select CRYPTO_CRCT10DIF help This option is only needed if a module that's not in the kernel tree needs to calculate CRC checks for use with the diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index 1ed2ed487097..311c2ab829f1 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -9,123 +9,57 @@ #include #include #include -#include -#include -#include -#include -#include -#include -static struct crypto_shash __rcu *crct10dif_tfm; -static DEFINE_STATIC_KEY_TRUE(crct10dif_fallback); -static DEFINE_MUTEX(crc_t10dif_mutex); -static struct work_struct crct10dif_rehash_work; - -static int crc_t10dif_notify(struct notifier_block *self, unsigned long val, void *data) -{ - struct crypto_alg *alg = data; - - if (val != CRYPTO_MSG_ALG_LOADED || - strcmp(alg->cra_name, CRC_T10DIF_STRING)) - return NOTIFY_DONE; - - schedule_work(&crct10dif_rehash_work); - return NOTIFY_OK; -} - -static void crc_t10dif_rehash(struct work_struct *work) -{ - struct crypto_shash *new, *old; - - mutex_lock(&crc_t10dif_mutex); - old = rcu_dereference_protected(crct10dif_tfm, - lockdep_is_held(&crc_t10dif_mutex)); - new = crypto_alloc_shash(CRC_T10DIF_STRING, 0, 0); - if (IS_ERR(new)) { - mutex_unlock(&crc_t10dif_mutex); - return; - } - rcu_assign_pointer(crct10dif_tfm, new); - mutex_unlock(&crc_t10dif_mutex); - - if (old) { - synchronize_rcu(); - crypto_free_shash(old); - } else { - static_branch_disable(&crct10dif_fallback); - } -} - -static struct notifier_block crc_t10dif_nb = { - .notifier_call = crc_t10dif_notify, +/* + * Table generated using the following polynomial: + * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + * gt: 0x8bb7 + */ +static const u16 t10_dif_crc_table[256] = { + 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, + 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, + 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, + 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, + 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, + 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, + 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, + 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, + 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, + 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, + 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, + 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, + 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, + 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, + 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, + 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, + 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, + 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, + 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, + 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, + 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, + 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, + 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, + 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, + 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, + 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, + 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, + 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, + 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, + 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, + 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, + 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 }; -__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len) -{ - struct { - struct shash_desc shash; - __u16 crc; - } desc; - int err; - - if (static_branch_unlikely(&crct10dif_fallback)) - return crc_t10dif_generic(crc, buffer, len); - - rcu_read_lock(); - desc.shash.tfm = rcu_dereference(crct10dif_tfm); - desc.crc = crc; - err = crypto_shash_update(&desc.shash, buffer, len); - rcu_read_unlock(); - - BUG_ON(err); - - return desc.crc; -} -EXPORT_SYMBOL(crc_t10dif_update); - -__u16 crc_t10dif(const unsigned char *buffer, size_t len) -{ - return crc_t10dif_update(0, buffer, len); -} -EXPORT_SYMBOL(crc_t10dif); - -static int __init crc_t10dif_mod_init(void) -{ - INIT_WORK(&crct10dif_rehash_work, crc_t10dif_rehash); - crypto_register_notifier(&crc_t10dif_nb); - crc_t10dif_rehash(&crct10dif_rehash_work); - return 0; -} - -static void __exit crc_t10dif_mod_fini(void) -{ - crypto_unregister_notifier(&crc_t10dif_nb); - cancel_work_sync(&crct10dif_rehash_work); - crypto_free_shash(rcu_dereference_protected(crct10dif_tfm, 1)); -} - -module_init(crc_t10dif_mod_init); -module_exit(crc_t10dif_mod_fini); - -static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp) +u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len) { - struct crypto_shash *tfm; - int len; + size_t i; - if (static_branch_unlikely(&crct10dif_fallback)) - return sprintf(buffer, "fallback\n"); + for (i = 0; i < len; i++) + crc = (crc << 8) ^ t10_dif_crc_table[(crc >> 8) ^ p[i]]; - rcu_read_lock(); - tfm = rcu_dereference(crct10dif_tfm); - len = snprintf(buffer, PAGE_SIZE, "%s\n", - crypto_shash_driver_name(tfm)); - rcu_read_unlock(); - - return len; + return crc; } +EXPORT_SYMBOL(crc_t10dif_generic); -module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0444); - -MODULE_DESCRIPTION("T10 DIF CRC calculation (library API)"); +MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crct10dif"); -- cgit v1.2.3 From 0961c3bcefa64d5f0999e2b703391862c733bb52 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 1 Dec 2024 17:20:46 -0800 Subject: lib/crc-t10dif: add support for arch overrides Following what was done for CRC32, add support for architecture-specific override of the CRC-T10DIF library. This will allow the CRC-T10DIF library functions to access architecture-optimized code directly. Reviewed-by: Ard Biesheuvel Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20241202012056.209768-3-ebiggers@kernel.org Signed-off-by: Eric Biggers --- include/linux/crc-t10dif.h | 12 ++++++++++++ lib/Kconfig | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) (limited to 'include') diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index 206ba2305483..16787c1cee21 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -7,10 +7,13 @@ #define CRC_T10DIF_DIGEST_SIZE 2 #define CRC_T10DIF_BLOCK_SIZE 1 +u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len); u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len); static inline u16 crc_t10dif_update(u16 crc, const u8 *p, size_t len) { + if (IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)) + return crc_t10dif_arch(crc, p, len); return crc_t10dif_generic(crc, p, len); } @@ -19,4 +22,13 @@ static inline u16 crc_t10dif(const u8 *p, size_t len) return crc_t10dif_update(0, p, len); } +#if IS_ENABLED(CONFIG_CRC_T10DIF_ARCH) +bool crc_t10dif_is_optimized(void); +#else +static inline bool crc_t10dif_is_optimized(void) +{ + return false; +} +#endif + #endif diff --git a/lib/Kconfig b/lib/Kconfig index be59f7cdf448..e52a38d8d783 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -161,6 +161,38 @@ config CRC_T10DIF kernel tree needs to calculate CRC checks for use with the SCSI data integrity subsystem. +config ARCH_HAS_CRC_T10DIF + bool + +choice + prompt "CRC-T10DIF implementation" + depends on CRC_T10DIF + default CRC_T10DIF_IMPL_ARCH if ARCH_HAS_CRC_T10DIF + default CRC_T10DIF_IMPL_GENERIC if !ARCH_HAS_CRC_T10DIF + help + This option allows you to override the default choice of CRC-T10DIF + implementation. + +config CRC_T10DIF_IMPL_ARCH + bool "Architecture-optimized" if ARCH_HAS_CRC_T10DIF + help + Use the optimized implementation of CRC-T10DIF for the selected + architecture. It is recommended to keep this enabled, as it can + greatly improve CRC-T10DIF performance. + +config CRC_T10DIF_IMPL_GENERIC + bool "Generic implementation" + help + Use the generic table-based implementation of CRC-T10DIF. Selecting + this will reduce code size slightly but can greatly reduce CRC-T10DIF + performance. + +endchoice + +config CRC_T10DIF_ARCH + tristate + default CRC_T10DIF if CRC_T10DIF_IMPL_ARCH + config CRC64_ROCKSOFT tristate "CRC calculation for the Rocksoft model CRC64" select CRC64 -- cgit v1.2.3 From f4d3d7340e719dd3d2c23ce8d6c360e2f93ba7e4 Mon Sep 17 00:00:00 2001 From: Taniya Das Date: Tue, 22 Oct 2024 17:22:52 +0530 Subject: dt-bindings: clock: qcom: Add QCS615 GCC clocks Add device tree bindings for global clock controller on QCS615 SoCs. Reviewed-by: Krzysztof Kozlowski Signed-off-by: Taniya Das Link: https://lore.kernel.org/r/20241022-qcs615-clock-driver-v4-3-3d716ad0d987@quicinc.com Signed-off-by: Bjorn Andersson --- .../devicetree/bindings/clock/qcom,qcs615-gcc.yaml | 59 ++++++ include/dt-bindings/clock/qcom,qcs615-gcc.h | 211 +++++++++++++++++++++ 2 files changed, 270 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml create mode 100644 include/dt-bindings/clock/qcom,qcs615-gcc.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml new file mode 100644 index 000000000000..4a828e102d25 --- /dev/null +++ b/Documentation/devicetree/bindings/clock/qcom,qcs615-gcc.yaml @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/qcom,qcs615-gcc.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Global Clock & Reset Controller on QCS615 + +maintainers: + - Taniya Das + +description: | + Qualcomm global clock control module provides the clocks, resets and power + domains on QCS615. + + See also: include/dt-bindings/clock/qcom,qcs615-gcc.h + +properties: + compatible: + const: qcom,qcs615-gcc + + clocks: + items: + - description: Board XO source + - description: Board active XO source + - description: Sleep clock source + + clock-names: + items: + - const: bi_tcxo + - const: bi_tcxo_ao + - const: sleep_clk + +required: + - compatible + - clocks + - clock-names + - '#power-domain-cells' + +allOf: + - $ref: qcom,gcc.yaml# + +unevaluatedProperties: false + +examples: + - | + #include + clock-controller@100000 { + compatible = "qcom,qcs615-gcc"; + reg = <0x00100000 0x1f0000>; + clocks = <&rpmhcc RPMH_CXO_CLK>, + <&rpmhcc RPMH_CXO_CLK_A>, + <&sleep_clk>; + clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk"; + #clock-cells = <1>; + #reset-cells = <1>; + #power-domain-cells = <1>; + }; +... diff --git a/include/dt-bindings/clock/qcom,qcs615-gcc.h b/include/dt-bindings/clock/qcom,qcs615-gcc.h new file mode 100644 index 000000000000..9704091636b8 --- /dev/null +++ b/include/dt-bindings/clock/qcom,qcs615-gcc.h @@ -0,0 +1,211 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef _DT_BINDINGS_CLK_QCOM_GCC_QCS615_H +#define _DT_BINDINGS_CLK_QCOM_GCC_QCS615_H + +/* GCC clocks */ +#define GPLL0_OUT_AUX2_DIV 0 +#define GPLL3_OUT_AUX2_DIV 1 +#define GPLL0 2 +#define GPLL3 3 +#define GPLL4 4 +#define GPLL6 5 +#define GPLL6_OUT_MAIN 6 +#define GPLL7 7 +#define GPLL8 8 +#define GPLL8_OUT_MAIN 9 +#define GCC_AGGRE_UFS_PHY_AXI_CLK 10 +#define GCC_AGGRE_USB2_SEC_AXI_CLK 11 +#define GCC_AGGRE_USB3_PRIM_AXI_CLK 12 +#define GCC_AHB2PHY_EAST_CLK 13 +#define GCC_AHB2PHY_WEST_CLK 14 +#define GCC_BOOT_ROM_AHB_CLK 15 +#define GCC_CAMERA_AHB_CLK 16 +#define GCC_CAMERA_HF_AXI_CLK 17 +#define GCC_CAMERA_XO_CLK 18 +#define GCC_CE1_AHB_CLK 19 +#define GCC_CE1_AXI_CLK 20 +#define GCC_CE1_CLK 21 +#define GCC_CFG_NOC_USB2_SEC_AXI_CLK 22 +#define GCC_CFG_NOC_USB3_PRIM_AXI_CLK 23 +#define GCC_CPUSS_AHB_CLK 24 +#define GCC_CPUSS_AHB_CLK_SRC 25 +#define GCC_CPUSS_GNOC_CLK 26 +#define GCC_DDRSS_GPU_AXI_CLK 27 +#define GCC_DISP_AHB_CLK 28 +#define GCC_DISP_GPLL0_DIV_CLK_SRC 29 +#define GCC_DISP_HF_AXI_CLK 30 +#define GCC_DISP_XO_CLK 31 +#define GCC_EMAC_AXI_CLK 32 +#define GCC_EMAC_PTP_CLK 33 +#define GCC_EMAC_PTP_CLK_SRC 34 +#define GCC_EMAC_RGMII_CLK 35 +#define GCC_EMAC_RGMII_CLK_SRC 36 +#define GCC_EMAC_SLV_AHB_CLK 37 +#define GCC_GP1_CLK 38 +#define GCC_GP1_CLK_SRC 39 +#define GCC_GP2_CLK 40 +#define GCC_GP2_CLK_SRC 41 +#define GCC_GP3_CLK 42 +#define GCC_GP3_CLK_SRC 43 +#define GCC_GPU_CFG_AHB_CLK 44 +#define GCC_GPU_GPLL0_CLK_SRC 45 +#define GCC_GPU_GPLL0_DIV_CLK_SRC 46 +#define GCC_GPU_IREF_CLK 47 +#define GCC_GPU_MEMNOC_GFX_CLK 48 +#define GCC_GPU_SNOC_DVM_GFX_CLK 49 +#define GCC_PCIE0_PHY_REFGEN_CLK 50 +#define GCC_PCIE_0_AUX_CLK 51 +#define GCC_PCIE_0_AUX_CLK_SRC 52 +#define GCC_PCIE_0_CFG_AHB_CLK 53 +#define GCC_PCIE_0_CLKREF_CLK 54 +#define GCC_PCIE_0_MSTR_AXI_CLK 55 +#define GCC_PCIE_0_PIPE_CLK 56 +#define GCC_PCIE_0_SLV_AXI_CLK 57 +#define GCC_PCIE_0_SLV_Q2A_AXI_CLK 58 +#define GCC_PCIE_PHY_AUX_CLK 59 +#define GCC_PCIE_PHY_REFGEN_CLK_SRC 60 +#define GCC_PDM2_CLK 61 +#define GCC_PDM2_CLK_SRC 62 +#define GCC_PDM_AHB_CLK 63 +#define GCC_PDM_XO4_CLK 64 +#define GCC_PRNG_AHB_CLK 65 +#define GCC_QMIP_CAMERA_NRT_AHB_CLK 66 +#define GCC_QMIP_DISP_AHB_CLK 67 +#define GCC_QMIP_PCIE_AHB_CLK 68 +#define GCC_QMIP_VIDEO_VCODEC_AHB_CLK 69 +#define GCC_QSPI_CNOC_PERIPH_AHB_CLK 70 +#define GCC_QSPI_CORE_CLK 71 +#define GCC_QSPI_CORE_CLK_SRC 72 +#define GCC_QUPV3_WRAP0_CORE_2X_CLK 73 +#define GCC_QUPV3_WRAP0_CORE_CLK 74 +#define GCC_QUPV3_WRAP0_S0_CLK 75 +#define GCC_QUPV3_WRAP0_S0_CLK_SRC 76 +#define GCC_QUPV3_WRAP0_S1_CLK 77 +#define GCC_QUPV3_WRAP0_S1_CLK_SRC 78 +#define GCC_QUPV3_WRAP0_S2_CLK 79 +#define GCC_QUPV3_WRAP0_S2_CLK_SRC 80 +#define GCC_QUPV3_WRAP0_S3_CLK 81 +#define GCC_QUPV3_WRAP0_S3_CLK_SRC 82 +#define GCC_QUPV3_WRAP0_S4_CLK 83 +#define GCC_QUPV3_WRAP0_S4_CLK_SRC 84 +#define GCC_QUPV3_WRAP0_S5_CLK 85 +#define GCC_QUPV3_WRAP0_S5_CLK_SRC 86 +#define GCC_QUPV3_WRAP1_CORE_2X_CLK 87 +#define GCC_QUPV3_WRAP1_CORE_CLK 88 +#define GCC_QUPV3_WRAP1_S0_CLK 89 +#define GCC_QUPV3_WRAP1_S0_CLK_SRC 90 +#define GCC_QUPV3_WRAP1_S1_CLK 91 +#define GCC_QUPV3_WRAP1_S1_CLK_SRC 92 +#define GCC_QUPV3_WRAP1_S2_CLK 93 +#define GCC_QUPV3_WRAP1_S2_CLK_SRC 94 +#define GCC_QUPV3_WRAP1_S3_CLK 95 +#define GCC_QUPV3_WRAP1_S3_CLK_SRC 96 +#define GCC_QUPV3_WRAP1_S4_CLK 97 +#define GCC_QUPV3_WRAP1_S4_CLK_SRC 98 +#define GCC_QUPV3_WRAP1_S5_CLK 99 +#define GCC_QUPV3_WRAP1_S5_CLK_SRC 100 +#define GCC_QUPV3_WRAP_0_M_AHB_CLK 101 +#define GCC_QUPV3_WRAP_0_S_AHB_CLK 102 +#define GCC_QUPV3_WRAP_1_M_AHB_CLK 103 +#define GCC_QUPV3_WRAP_1_S_AHB_CLK 104 +#define GCC_RX1_USB2_CLKREF_CLK 105 +#define GCC_RX3_USB2_CLKREF_CLK 106 +#define GCC_SDCC1_AHB_CLK 107 +#define GCC_SDCC1_APPS_CLK 108 +#define GCC_SDCC1_APPS_CLK_SRC 109 +#define GCC_SDCC1_ICE_CORE_CLK 110 +#define GCC_SDCC1_ICE_CORE_CLK_SRC 111 +#define GCC_SDCC2_AHB_CLK 112 +#define GCC_SDCC2_APPS_CLK 113 +#define GCC_SDCC2_APPS_CLK_SRC 114 +#define GCC_SDR_CORE_CLK 115 +#define GCC_SDR_CSR_HCLK 116 +#define GCC_SDR_PRI_MI2S_CLK 117 +#define GCC_SDR_SEC_MI2S_CLK 118 +#define GCC_SDR_WR0_MEM_CLK 119 +#define GCC_SDR_WR1_MEM_CLK 120 +#define GCC_SDR_WR2_MEM_CLK 121 +#define GCC_SYS_NOC_CPUSS_AHB_CLK 122 +#define GCC_UFS_CARD_CLKREF_CLK 123 +#define GCC_UFS_MEM_CLKREF_CLK 124 +#define GCC_UFS_PHY_AHB_CLK 125 +#define GCC_UFS_PHY_AXI_CLK 126 +#define GCC_UFS_PHY_AXI_CLK_SRC 127 +#define GCC_UFS_PHY_ICE_CORE_CLK 128 +#define GCC_UFS_PHY_ICE_CORE_CLK_SRC 129 +#define GCC_UFS_PHY_PHY_AUX_CLK 130 +#define GCC_UFS_PHY_PHY_AUX_CLK_SRC 131 +#define GCC_UFS_PHY_RX_SYMBOL_0_CLK 132 +#define GCC_UFS_PHY_TX_SYMBOL_0_CLK 133 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK 134 +#define GCC_UFS_PHY_UNIPRO_CORE_CLK_SRC 135 +#define GCC_USB20_SEC_MASTER_CLK 136 +#define GCC_USB20_SEC_MASTER_CLK_SRC 137 +#define GCC_USB20_SEC_MOCK_UTMI_CLK 138 +#define GCC_USB20_SEC_MOCK_UTMI_CLK_SRC 139 +#define GCC_USB20_SEC_SLEEP_CLK 140 +#define GCC_USB2_PRIM_CLKREF_CLK 141 +#define GCC_USB2_SEC_CLKREF_CLK 142 +#define GCC_USB2_SEC_PHY_AUX_CLK 143 +#define GCC_USB2_SEC_PHY_AUX_CLK_SRC 144 +#define GCC_USB2_SEC_PHY_COM_AUX_CLK 145 +#define GCC_USB2_SEC_PHY_PIPE_CLK 146 +#define GCC_USB30_PRIM_MASTER_CLK 147 +#define GCC_USB30_PRIM_MASTER_CLK_SRC 148 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK 149 +#define GCC_USB30_PRIM_MOCK_UTMI_CLK_SRC 150 +#define GCC_USB30_PRIM_SLEEP_CLK 151 +#define GCC_USB3_PRIM_CLKREF_CLK 152 +#define GCC_USB3_PRIM_PHY_AUX_CLK 153 +#define GCC_USB3_PRIM_PHY_AUX_CLK_SRC 154 +#define GCC_USB3_PRIM_PHY_COM_AUX_CLK 155 +#define GCC_USB3_PRIM_PHY_PIPE_CLK 156 +#define GCC_USB3_SEC_CLKREF_CLK 157 +#define GCC_VIDEO_AHB_CLK 158 +#define GCC_VIDEO_AXI0_CLK 159 +#define GCC_VIDEO_XO_CLK 160 +#define GCC_VSENSOR_CLK_SRC 161 +#define GCC_AGGRE_UFS_PHY_AXI_HW_CTL_CLK 162 +#define GCC_UFS_PHY_AXI_HW_CTL_CLK 163 +#define GCC_UFS_PHY_ICE_CORE_HW_CTL_CLK 164 +#define GCC_UFS_PHY_PHY_AUX_HW_CTL_CLK 165 +#define GCC_UFS_PHY_UNIPRO_CORE_HW_CTL_CLK 166 + +/* GCC Resets */ +#define GCC_EMAC_BCR 0 +#define GCC_QUSB2PHY_PRIM_BCR 1 +#define GCC_QUSB2PHY_SEC_BCR 2 +#define GCC_USB30_PRIM_BCR 3 +#define GCC_USB2_PHY_SEC_BCR 4 +#define GCC_USB3_DP_PHY_SEC_BCR 5 +#define GCC_USB3PHY_PHY_SEC_BCR 6 +#define GCC_PCIE_0_BCR 7 +#define GCC_PCIE_0_PHY_BCR 8 +#define GCC_PCIE_PHY_BCR 9 +#define GCC_PCIE_PHY_COM_BCR 10 +#define GCC_UFS_PHY_BCR 11 +#define GCC_USB20_SEC_BCR 12 +#define GCC_USB3_PHY_PRIM_SP0_BCR 13 +#define GCC_USB3PHY_PHY_PRIM_SP0_BCR 14 +#define GCC_SDCC1_BCR 15 +#define GCC_SDCC2_BCR 16 + +/* GCC power domains */ +#define EMAC_GDSC 0 +#define PCIE_0_GDSC 1 +#define UFS_PHY_GDSC 2 +#define USB20_SEC_GDSC 3 +#define USB30_PRIM_GDSC 4 +#define HLOS1_VOTE_AGGRE_NOC_MMU_AUDIO_TBU_GDSC 5 +#define HLOS1_VOTE_AGGRE_NOC_MMU_TBU1_GDSC 6 +#define HLOS1_VOTE_AGGRE_NOC_MMU_TBU2_GDSC 7 +#define HLOS1_VOTE_AGGRE_NOC_MMU_PCIE_TBU_GDSC 8 +#define HLOS1_VOTE_MMNOC_MMU_TBU_HF0_GDSC 9 +#define HLOS1_VOTE_MMNOC_MMU_TBU_SF_GDSC 10 +#define HLOS1_VOTE_MMNOC_MMU_TBU_HF1_GDSC 11 + +#endif -- cgit v1.2.3 From 0a670e151a71434765de69590944e18c08ee08cf Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Nov 2024 15:09:57 +0100 Subject: tree-wide: s/override_creds()/override_creds_light(get_new_cred())/g Convert all callers from override_creds() to override_creds_light(get_new_cred()) in preparation of making override_creds() not take a separate reference at all. Link: https://lore.kernel.org/r/20241125-work-cred-v2-1-68b9d38bb5b2@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/base/firmware_loader/main.c | 2 +- drivers/crypto/ccp/sev-dev.c | 2 +- drivers/target/target_core_configfs.c | 2 +- fs/aio.c | 2 +- fs/binfmt_misc.c | 2 +- fs/cachefiles/internal.h | 2 +- fs/coredump.c | 2 +- fs/nfs/localio.c | 4 ++-- fs/nfs/nfs4idmap.c | 2 +- fs/nfsd/auth.c | 2 +- fs/nfsd/nfs4recover.c | 2 +- fs/nfsd/nfsfh.c | 2 +- fs/open.c | 2 +- fs/overlayfs/copy_up.c | 2 +- fs/smb/client/cifs_spnego.c | 2 +- fs/smb/client/cifsacl.c | 4 ++-- fs/smb/server/smb_common.c | 2 +- include/linux/cred.h | 5 +++-- io_uring/io_uring.c | 2 +- io_uring/sqpoll.c | 2 +- kernel/acct.c | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/trace/trace_events_user.c | 2 +- net/dns_resolver/dns_query.c | 2 +- 24 files changed, 28 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index cb0912ea3e62..75eb6fd75927 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -911,7 +911,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, ret = -ENOMEM; goto out; } - old_cred = override_creds(kern_cred); + old_cred = override_creds_light(get_new_cred(kern_cred)); ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index af018afd9cd7..2ad6e41af085 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -244,7 +244,7 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m if (!cred) return ERR_PTR(-ENOMEM); cred->fsuid = GLOBAL_ROOT_UID; - old_cred = override_creds(cred); + old_cred = override_creds_light(get_new_cred(cred)); fp = file_open_root(&root, filename, flags, mode); path_put(&root); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index c40217f44b1b..be98d16b2c57 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3756,7 +3756,7 @@ static int __init target_core_init_configfs(void) ret = -ENOMEM; goto out; } - old_cred = override_creds(kern_cred); + old_cred = override_creds_light(get_new_cred(kern_cred)); target_init_dbroot(); revert_creds(old_cred); put_cred(kern_cred); diff --git a/fs/aio.c b/fs/aio.c index 50671640b588..a52fe2e999e7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1639,7 +1639,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, static void aio_fsync_work(struct work_struct *work) { struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); - const struct cred *old_cred = override_creds(iocb->fsync.creds); + const struct cred *old_cred = override_creds_light(get_new_cred(iocb->fsync.creds)); iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); revert_creds(old_cred); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 6a3a16f91051..21eb501ae03d 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -829,7 +829,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, * didn't matter much as only a privileged process could open * the register file. */ - old_cred = override_creds(file->f_cred); + old_cred = override_creds_light(get_new_cred(file->f_cred)); f = open_exec(e->interpreter); revert_creds(old_cred); if (IS_ERR(f)) { diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 7b99bd98de75..b156cc2e0e63 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -393,7 +393,7 @@ extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, const struct cred **_saved_cred) { - *_saved_cred = override_creds(cache->cache_cred); + *_saved_cred = override_creds_light(get_new_cred(cache->cache_cred)); } static inline void cachefiles_end_secure(struct cachefiles_cache *cache, diff --git a/fs/coredump.c b/fs/coredump.c index d48edb37bc35..b6aae41b80d2 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -576,7 +576,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (retval < 0) goto fail_creds; - old_cred = override_creds(cred); + old_cred = override_creds_light(get_new_cred(cred)); ispipe = format_corename(&cn, &cprm, &argv, &argc); diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 4b8618cf114c..98393c2efb75 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -374,7 +374,7 @@ static void nfs_local_call_read(struct work_struct *work) struct iov_iter iter; ssize_t status; - save_cred = override_creds(filp->f_cred); + save_cred = override_creds_light(get_new_cred(filp->f_cred)); nfs_local_iter_init(&iter, iocb, READ); @@ -545,7 +545,7 @@ static void nfs_local_call_write(struct work_struct *work) ssize_t status; current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; - save_cred = override_creds(filp->f_cred); + save_cred = override_creds_light(get_new_cred(filp->f_cred)); nfs_local_iter_init(&iter, iocb, WRITE); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 25a7c771cfd8..b9442f70271d 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -311,7 +311,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, const struct user_key_payload *payload; ssize_t ret; - saved_cred = override_creds(id_resolver_cache); + saved_cred = override_creds_light(get_new_cred(id_resolver_cache)); rkey = nfs_idmap_request_key(name, namelen, type, idmap); revert_creds(saved_cred); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 93e33d1ee891..614a5ec4824b 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -79,7 +79,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - put_cred(override_creds(new)); + put_cred(override_creds_light(get_new_cred(new))); put_cred(new); return 0; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4a765555bf84..886d03953d7a 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -81,7 +81,7 @@ nfs4_save_creds(const struct cred **original_creds) new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; - *original_creds = override_creds(new); + *original_creds = override_creds_light(get_new_cred(new)); put_cred(new); return 0; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 6a831cb242df..4819364190d3 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -221,7 +221,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - put_cred(override_creds(new)); + put_cred(override_creds_light(get_new_cred(new))); put_cred(new); } else { error = nfsd_setuser_and_check_port(rqstp, cred, exp); diff --git a/fs/open.c b/fs/open.c index e6911101fe71..2459cd061f47 100644 --- a/fs/open.c +++ b/fs/open.c @@ -448,7 +448,7 @@ static const struct cred *access_override_creds(void) */ override_cred->non_rcu = 1; - old_cred = override_creds(override_cred); + old_cred = override_creds_light(get_new_cred(override_cred)); /* override_cred() gets its own ref */ put_cred(override_cred); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 3601ddfeddc2..527b041213c8 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -741,7 +741,7 @@ static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc) return err; if (cc->new) - cc->old = override_creds(cc->new); + cc->old = override_creds_light(get_new_cred(cc->new)); return 0; } diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 28f568b5fc27..721d8b1254b6 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -173,7 +173,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, } cifs_dbg(FYI, "key description = %s\n", description); - saved_cred = override_creds(spnego_cred); + saved_cred = override_creds_light(get_new_cred(spnego_cred)); spnego_key = request_key(&cifs_spnego_key_type, description, ""); revert_creds(saved_cred); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index ba79aa2107cc..b1ea4ea3de4b 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -292,7 +292,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) return -EINVAL; rc = 0; - saved_cred = override_creds(root_cred); + saved_cred = override_creds_light(get_new_cred(root_cred)); sidkey = request_key(&cifs_idmap_key_type, desc, ""); if (IS_ERR(sidkey)) { rc = -EINVAL; @@ -398,7 +398,7 @@ try_upcall_to_get_id: if (!sidstr) return -ENOMEM; - saved_cred = override_creds(root_cred); + saved_cred = override_creds_light(get_new_cred(root_cred)); sidkey = request_key(&cifs_idmap_key_type, sidstr, ""); if (IS_ERR(sidkey)) { cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n", diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 4e6f169fcf83..5662f2651b8e 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -780,7 +780,7 @@ int __ksmbd_override_fsids(struct ksmbd_work *work, cred->cap_effective = cap_drop_fs_set(cred->cap_effective); WARN_ON(work->saved_cred); - work->saved_cred = override_creds(cred); + work->saved_cred = override_creds_light(get_new_cred(cred)); if (!work->saved_cred) { abort_creds(cred); return -EINVAL; diff --git a/include/linux/cred.h b/include/linux/cred.h index e4a3155fe409..b0bc1fea9ca0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -211,9 +211,10 @@ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) * Get a reference on the specified set of new credentials. The caller must * release the reference. */ -static inline struct cred *get_new_cred(struct cred *cred) +static inline struct cred *get_new_cred(const struct cred *cred) { - return get_new_cred_many(cred, 1); + struct cred *nonconst_cred = (struct cred *) cred; + return get_new_cred_many(nonconst_cred, 1); } /** diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 06ff41484e29..0b25ddea943e 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1728,7 +1728,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) return -EBADF; if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds(req->creds); + creds = override_creds_light(get_new_cred(req->creds)); if (!def->audit_skip) audit_uring_entry(req->opcode); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 6df5e649c413..58a76d581895 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -174,7 +174,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) const struct cred *creds = NULL; if (ctx->sq_creds != current_cred()) - creds = override_creds(ctx->sq_creds); + creds = override_creds_light(get_new_cred(ctx->sq_creds)); mutex_lock(&ctx->uring_lock); if (!wq_list_empty(&ctx->iopoll_list)) diff --git a/kernel/acct.c b/kernel/acct.c index 179848ad33e9..8f18eb02dd41 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -501,7 +501,7 @@ static void do_acct_process(struct bsd_acct_struct *acct) flim = rlimit(RLIMIT_FSIZE); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; /* Perform file operations on behalf of whoever enabled accounting */ - orig_cred = override_creds(file->f_cred); + orig_cred = override_creds_light(get_new_cred(file->f_cred)); /* * First check to see if there is enough free_space to continue diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index d9061bd55436..97329b4fe502 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5216,7 +5216,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, * permissions using the credentials from file open to protect against * inherited fd attacks. */ - saved_cred = override_creds(of->file->f_cred); + saved_cred = override_creds_light(get_new_cred(of->file->f_cred)); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup, ctx->ns); diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 17bcad8f79de..4dd7c45d227e 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1469,7 +1469,7 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) */ cred->fsuid = GLOBAL_ROOT_UID; - old_cred = override_creds(cred); + old_cred = override_creds_light(get_new_cred(cred)); if (visible) ret = trace_add_event_call(&user->call); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 82b084cc1cc6..a54f5f841cea 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -124,7 +124,7 @@ int dns_query(struct net *net, /* make the upcall, using special credentials to prevent the use of * add_key() to preinstall malicious redirections */ - saved_cred = override_creds(dns_resolver_cache); + saved_cred = override_creds_light(get_new_cred(dns_resolver_cache)); rkey = request_key_net(&key_type_dns_resolver, desc, net, options); revert_creds(saved_cred); kfree(desc); -- cgit v1.2.3 From 95c54bc81791c210b131f2b1013942487e74896f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Nov 2024 15:09:58 +0100 Subject: cred: return old creds from revert_creds_light() So we can easily convert revert_creds() callers over to drop the reference count explicitly. Link: https://lore.kernel.org/r/20241125-work-cred-v2-2-68b9d38bb5b2@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/cred.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index b0bc1fea9ca0..57cf0256ea29 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -185,9 +185,12 @@ static inline const struct cred *override_creds_light(const struct cred *overrid return old; } -static inline void revert_creds_light(const struct cred *revert_cred) +static inline const struct cred *revert_creds_light(const struct cred *revert_cred) { + const struct cred *override_cred = current->cred; + rcu_assign_pointer(current->cred, revert_cred); + return override_cred; } /** -- cgit v1.2.3 From a51a1d6bcaa345cc88e738cad468083c4e13aa3b Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Nov 2024 15:10:00 +0100 Subject: cred: remove old {override,revert}_creds() helpers They are now unused. Link: https://lore.kernel.org/r/20241125-work-cred-v2-4-68b9d38bb5b2@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/cred.h | 7 ------- kernel/cred.c | 50 -------------------------------------------------- 2 files changed, 57 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 57cf0256ea29..80dcc18ef6e4 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -155,8 +155,6 @@ extern struct cred *prepare_creds(void); extern struct cred *prepare_exec_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); -extern const struct cred *override_creds(const struct cred *); -extern void revert_creds(const struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); @@ -172,11 +170,6 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } -/* - * Override creds without bumping reference count. Caller must ensure - * reference remains valid or has taken reference. Almost always not the - * interface you want. Use override_creds()/revert_creds() instead. - */ static inline const struct cred *override_creds_light(const struct cred *override_cred) { const struct cred *old = current->cred; diff --git a/kernel/cred.c b/kernel/cred.c index da7da250f7c8..9676965c0981 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -476,56 +476,6 @@ void abort_creds(struct cred *new) } EXPORT_SYMBOL(abort_creds); -/** - * override_creds - Override the current process's subjective credentials - * @new: The credentials to be assigned - * - * Install a set of temporary override subjective credentials on the current - * process, returning the old set for later reversion. - */ -const struct cred *override_creds(const struct cred *new) -{ - const struct cred *old; - - kdebug("override_creds(%p{%ld})", new, - atomic_long_read(&new->usage)); - - /* - * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. - * - * That means that we do not clear the 'non_rcu' flag, since - * we are only installing the cred into the thread-synchronous - * '->cred' pointer, not the '->real_cred' pointer that is - * visible to other threads under RCU. - */ - get_new_cred((struct cred *)new); - old = override_creds_light(new); - - kdebug("override_creds() = %p{%ld}", old, - atomic_long_read(&old->usage)); - return old; -} -EXPORT_SYMBOL(override_creds); - -/** - * revert_creds - Revert a temporary subjective credentials override - * @old: The credentials to be restored - * - * Revert a temporary set of override subjective credentials to an old set, - * discarding the override set. - */ -void revert_creds(const struct cred *old) -{ - const struct cred *override = current->cred; - - kdebug("revert_creds(%p{%ld})", old, - atomic_long_read(&old->usage)); - - revert_creds_light(old); - put_cred(override); -} -EXPORT_SYMBOL(revert_creds); - /** * cred_fscmp - Compare two credentials with respect to filesystem access. * @a: The first credential -- cgit v1.2.3 From 6771e004b40962402d0e973fc7d2e0e61364fdfb Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Nov 2024 15:10:01 +0100 Subject: tree-wide: s/override_creds_light()/override_creds()/g Rename all calls to override_creds_light() back to overrid_creds(). Link: https://lore.kernel.org/r/20241125-work-cred-v2-5-68b9d38bb5b2@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/base/firmware_loader/main.c | 2 +- drivers/crypto/ccp/sev-dev.c | 2 +- drivers/target/target_core_configfs.c | 2 +- fs/aio.c | 2 +- fs/backing-file.c | 10 +++++----- fs/binfmt_misc.c | 2 +- fs/cachefiles/internal.h | 2 +- fs/coredump.c | 2 +- fs/nfs/localio.c | 4 ++-- fs/nfs/nfs4idmap.c | 2 +- fs/nfsd/auth.c | 2 +- fs/nfsd/nfs4recover.c | 2 +- fs/nfsd/nfsfh.c | 2 +- fs/open.c | 2 +- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/dir.c | 2 +- fs/overlayfs/util.c | 2 +- fs/smb/client/cifs_spnego.c | 2 +- fs/smb/client/cifsacl.c | 4 ++-- fs/smb/server/smb_common.c | 2 +- include/linux/cred.h | 2 +- io_uring/io_uring.c | 2 +- io_uring/sqpoll.c | 2 +- kernel/acct.c | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/trace/trace_events_user.c | 2 +- net/dns_resolver/dns_query.c | 2 +- 27 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index b5677e173f91..294c75025dcb 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -911,7 +911,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, ret = -ENOMEM; goto out; } - old_cred = override_creds_light(get_new_cred(kern_cred)); + old_cred = override_creds(get_new_cred(kern_cred)); ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 9111a51d53e0..ffae20fd52bc 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -244,7 +244,7 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m if (!cred) return ERR_PTR(-ENOMEM); cred->fsuid = GLOBAL_ROOT_UID; - old_cred = override_creds_light(get_new_cred(cred)); + old_cred = override_creds(get_new_cred(cred)); fp = file_open_root(&root, filename, flags, mode); path_put(&root); diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 564bc71d2d09..7788e1fe2633 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3756,7 +3756,7 @@ static int __init target_core_init_configfs(void) ret = -ENOMEM; goto out; } - old_cred = override_creds_light(get_new_cred(kern_cred)); + old_cred = override_creds(get_new_cred(kern_cred)); target_init_dbroot(); put_cred(revert_creds_light(old_cred)); put_cred(kern_cred); diff --git a/fs/aio.c b/fs/aio.c index 6b987c48b671..7e0ec687f480 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1639,7 +1639,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, static void aio_fsync_work(struct work_struct *work) { struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); - const struct cred *old_cred = override_creds_light(get_new_cred(iocb->fsync.creds)); + const struct cred *old_cred = override_creds(get_new_cred(iocb->fsync.creds)); iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); put_cred(revert_creds_light(old_cred)); diff --git a/fs/backing-file.c b/fs/backing-file.c index cbdad8b68474..37c5a66e5dad 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -176,7 +176,7 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, !(file->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL; - old_cred = override_creds_light(ctx->cred); + old_cred = override_creds(ctx->cred); if (is_sync_kiocb(iocb)) { rwf_t rwf = iocb_to_rw_flags(flags); @@ -233,7 +233,7 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, */ flags &= ~IOCB_DIO_CALLER_COMP; - old_cred = override_creds_light(ctx->cred); + old_cred = override_creds(ctx->cred); if (is_sync_kiocb(iocb)) { rwf_t rwf = iocb_to_rw_flags(flags); @@ -281,7 +281,7 @@ ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb, if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) return -EIO; - old_cred = override_creds_light(ctx->cred); + old_cred = override_creds(ctx->cred); ret = vfs_splice_read(in, &iocb->ki_pos, pipe, len, flags); revert_creds_light(old_cred); @@ -310,7 +310,7 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, if (ret) return ret; - old_cred = override_creds_light(ctx->cred); + old_cred = override_creds(ctx->cred); file_start_write(out); ret = out->f_op->splice_write(pipe, out, &iocb->ki_pos, len, flags); file_end_write(out); @@ -338,7 +338,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, vma_set_file(vma, file); - old_cred = override_creds_light(ctx->cred); + old_cred = override_creds(ctx->cred); ret = call_mmap(vma->vm_file, vma); revert_creds_light(old_cred); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 586862ca8738..5756ec49f79e 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -829,7 +829,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, * didn't matter much as only a privileged process could open * the register file. */ - old_cred = override_creds_light(get_new_cred(file->f_cred)); + old_cred = override_creds(get_new_cred(file->f_cred)); f = open_exec(e->interpreter); put_cred(revert_creds_light(old_cred)); if (IS_ERR(f)) { diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 809305dd5317..05b1d4cfb55a 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -393,7 +393,7 @@ extern int cachefiles_determine_cache_security(struct cachefiles_cache *cache, static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, const struct cred **_saved_cred) { - *_saved_cred = override_creds_light(get_new_cred(cache->cache_cred)); + *_saved_cred = override_creds(get_new_cred(cache->cache_cred)); } static inline void cachefiles_end_secure(struct cachefiles_cache *cache, diff --git a/fs/coredump.c b/fs/coredump.c index ff119aaa5c31..4eae37892da5 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -576,7 +576,7 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (retval < 0) goto fail_creds; - old_cred = override_creds_light(get_new_cred(cred)); + old_cred = override_creds(get_new_cred(cred)); ispipe = format_corename(&cn, &cprm, &argv, &argc); diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index deca5559a56b..682d951ed69a 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -374,7 +374,7 @@ static void nfs_local_call_read(struct work_struct *work) struct iov_iter iter; ssize_t status; - save_cred = override_creds_light(get_new_cred(filp->f_cred)); + save_cred = override_creds(get_new_cred(filp->f_cred)); nfs_local_iter_init(&iter, iocb, READ); @@ -545,7 +545,7 @@ static void nfs_local_call_write(struct work_struct *work) ssize_t status; current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; - save_cred = override_creds_light(get_new_cred(filp->f_cred)); + save_cred = override_creds(get_new_cred(filp->f_cred)); nfs_local_iter_init(&iter, iocb, WRITE); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 629979b20e98..3cae4057f8ba 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -311,7 +311,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, const struct user_key_payload *payload; ssize_t ret; - saved_cred = override_creds_light(get_new_cred(id_resolver_cache)); + saved_cred = override_creds(get_new_cred(id_resolver_cache)); rkey = nfs_idmap_request_key(name, namelen, type, idmap); put_cred(revert_creds_light(saved_cred)); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index dda14811d092..dafea9183b4e 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -79,7 +79,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - put_cred(override_creds_light(get_new_cred(new))); + put_cred(override_creds(get_new_cred(new))); put_cred(new); return 0; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bf166a23d8b4..0ab22b4e940f 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -81,7 +81,7 @@ nfs4_save_creds(const struct cred **original_creds) new->fsuid = GLOBAL_ROOT_UID; new->fsgid = GLOBAL_ROOT_GID; - *original_creds = override_creds_light(get_new_cred(new)); + *original_creds = override_creds(get_new_cred(new)); put_cred(new); return 0; } diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 4819364190d3..1cf52f3d5412 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -221,7 +221,7 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct net *net, new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - put_cred(override_creds_light(get_new_cred(new))); + put_cred(override_creds(get_new_cred(new))); put_cred(new); } else { error = nfsd_setuser_and_check_port(rqstp, cred, exp); diff --git a/fs/open.c b/fs/open.c index 23c414c10883..bd0a34653f0e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -448,7 +448,7 @@ static const struct cred *access_override_creds(void) */ override_cred->non_rcu = 1; - old_cred = override_creds_light(get_new_cred(override_cred)); + old_cred = override_creds(get_new_cred(override_cred)); /* override_cred() gets its own ref */ put_cred(override_cred); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 0f19bdbc78a4..7805667b2e05 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -741,7 +741,7 @@ static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc) return err; if (cc->new) - cc->old = override_creds_light(get_new_cred(cc->new)); + cc->old = override_creds(get_new_cred(cc->new)); return 0; } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 08e683917d12..151271f0586c 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -580,7 +580,7 @@ static const struct cred *ovl_setup_cred_for_create(struct dentry *dentry, * We must be called with creator creds already, otherwise we risk * leaking creds. */ - old_cred = override_creds_light(override_cred); + old_cred = override_creds(override_cred); WARN_ON_ONCE(old_cred != ovl_creds(dentry->d_sb)); return override_cred; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 9aa7493b1e10..2513a79a10b0 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -65,7 +65,7 @@ const struct cred *ovl_override_creds(struct super_block *sb) { struct ovl_fs *ofs = OVL_FS(sb); - return override_creds_light(ofs->creator_cred); + return override_creds(ofs->creator_cred); } void ovl_revert_creds(const struct cred *old_cred) diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index f2353bccc9f5..f22dc0be357f 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -173,7 +173,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, } cifs_dbg(FYI, "key description = %s\n", description); - saved_cred = override_creds_light(get_new_cred(spnego_cred)); + saved_cred = override_creds(get_new_cred(spnego_cred)); spnego_key = request_key(&cifs_spnego_key_type, description, ""); put_cred(revert_creds_light(saved_cred)); diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index 81d8d9802a56..d65e094b97cb 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -292,7 +292,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) return -EINVAL; rc = 0; - saved_cred = override_creds_light(get_new_cred(root_cred)); + saved_cred = override_creds(get_new_cred(root_cred)); sidkey = request_key(&cifs_idmap_key_type, desc, ""); if (IS_ERR(sidkey)) { rc = -EINVAL; @@ -398,7 +398,7 @@ try_upcall_to_get_id: if (!sidstr) return -ENOMEM; - saved_cred = override_creds_light(get_new_cred(root_cred)); + saved_cred = override_creds(get_new_cred(root_cred)); sidkey = request_key(&cifs_idmap_key_type, sidstr, ""); if (IS_ERR(sidkey)) { cifs_dbg(FYI, "%s: Can't map SID %s to a %cid\n", diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 1bd9b9c9db70..4a4cb9e4d45a 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -780,7 +780,7 @@ int __ksmbd_override_fsids(struct ksmbd_work *work, cred->cap_effective = cap_drop_fs_set(cred->cap_effective); WARN_ON(work->saved_cred); - work->saved_cred = override_creds_light(get_new_cred(cred)); + work->saved_cred = override_creds(get_new_cred(cred)); if (!work->saved_cred) { abort_creds(cred); return -EINVAL; diff --git a/include/linux/cred.h b/include/linux/cred.h index 80dcc18ef6e4..a073e6163c4e 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -170,7 +170,7 @@ static inline bool cap_ambient_invariant_ok(const struct cred *cred) cred->cap_inheritable)); } -static inline const struct cred *override_creds_light(const struct cred *override_cred) +static inline const struct cred *override_creds(const struct cred *override_cred) { const struct cred *old = current->cred; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 66b5bd9d26ab..5b1cc024deea 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1728,7 +1728,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) return -EBADF; if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds_light(get_new_cred(req->creds)); + creds = override_creds(get_new_cred(req->creds)); if (!def->audit_skip) audit_uring_entry(req->opcode); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 42ca6e07e0f7..0fd424442118 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -174,7 +174,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) const struct cred *creds = NULL; if (ctx->sq_creds != current_cred()) - creds = override_creds_light(get_new_cred(ctx->sq_creds)); + creds = override_creds(get_new_cred(ctx->sq_creds)); mutex_lock(&ctx->uring_lock); if (!wq_list_empty(&ctx->iopoll_list)) diff --git a/kernel/acct.c b/kernel/acct.c index 4e28aa9e1ef2..a51a3b483fd9 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -501,7 +501,7 @@ static void do_acct_process(struct bsd_acct_struct *acct) flim = rlimit(RLIMIT_FSIZE); current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY; /* Perform file operations on behalf of whoever enabled accounting */ - orig_cred = override_creds_light(get_new_cred(file->f_cred)); + orig_cred = override_creds(get_new_cred(file->f_cred)); /* * First check to see if there is enough free_space to continue diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 68b816955c9c..2d618b577e52 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5216,7 +5216,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, * permissions using the credentials from file open to protect against * inherited fd attacks. */ - saved_cred = override_creds_light(get_new_cred(of->file->f_cred)); + saved_cred = override_creds(get_new_cred(of->file->f_cred)); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup, ctx->ns); diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 2fdadb2e8547..857124d81f12 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1469,7 +1469,7 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) */ cred->fsuid = GLOBAL_ROOT_UID; - old_cred = override_creds_light(get_new_cred(cred)); + old_cred = override_creds(get_new_cred(cred)); if (visible) ret = trace_add_event_call(&user->call); diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 297059b7e2a3..f8749d688d66 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -124,7 +124,7 @@ int dns_query(struct net *net, /* make the upcall, using special credentials to prevent the use of * add_key() to preinstall malicious redirections */ - saved_cred = override_creds_light(get_new_cred(dns_resolver_cache)); + saved_cred = override_creds(get_new_cred(dns_resolver_cache)); rkey = request_key_net(&key_type_dns_resolver, desc, net, options); put_cred(revert_creds_light(saved_cred)); kfree(desc); -- cgit v1.2.3 From 51c0bcf0973a3836adfc46f30f876f412478e376 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Nov 2024 15:10:02 +0100 Subject: tree-wide: s/revert_creds_light()/revert_creds()/g Rename all calls to revert_creds_light() back to revert_creds(). Link: https://lore.kernel.org/r/20241125-work-cred-v2-6-68b9d38bb5b2@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/base/firmware_loader/main.c | 2 +- drivers/crypto/ccp/sev-dev.c | 2 +- drivers/target/target_core_configfs.c | 2 +- fs/aio.c | 2 +- fs/backing-file.c | 10 +++++----- fs/binfmt_misc.c | 2 +- fs/cachefiles/internal.h | 2 +- fs/coredump.c | 2 +- fs/nfs/localio.c | 4 ++-- fs/nfs/nfs4idmap.c | 2 +- fs/nfsd/auth.c | 2 +- fs/nfsd/filecache.c | 2 +- fs/nfsd/nfs4recover.c | 2 +- fs/open.c | 2 +- fs/overlayfs/copy_up.c | 2 +- fs/overlayfs/dir.c | 2 +- fs/overlayfs/util.c | 2 +- fs/smb/client/cifs_spnego.c | 2 +- fs/smb/client/cifsacl.c | 4 ++-- fs/smb/server/smb_common.c | 2 +- include/linux/cred.h | 2 +- io_uring/io_uring.c | 2 +- io_uring/sqpoll.c | 2 +- kernel/acct.c | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/trace/trace_events_user.c | 2 +- net/dns_resolver/dns_query.c | 2 +- 27 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 294c75025dcb..a97fa36ee4bd 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -943,7 +943,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, } else ret = assign_fw(fw, device); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); put_cred(kern_cred); out: diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index ffae20fd52bc..187c34b02442 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -249,7 +249,7 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m fp = file_open_root(&root, filename, flags, mode); path_put(&root); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); return fp; } diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 7788e1fe2633..ec7a55987193 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3758,7 +3758,7 @@ static int __init target_core_init_configfs(void) } old_cred = override_creds(get_new_cred(kern_cred)); target_init_dbroot(); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); put_cred(kern_cred); return 0; diff --git a/fs/aio.c b/fs/aio.c index 7e0ec687f480..5e57dcaed7f1 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1642,7 +1642,7 @@ static void aio_fsync_work(struct work_struct *work) const struct cred *old_cred = override_creds(get_new_cred(iocb->fsync.creds)); iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); put_cred(iocb->fsync.creds); iocb_put(iocb); } diff --git a/fs/backing-file.c b/fs/backing-file.c index 37c5a66e5dad..763fbe9b72b2 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -197,7 +197,7 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, backing_aio_cleanup(aio, ret); } out: - revert_creds_light(old_cred); + revert_creds(old_cred); if (ctx->accessed) ctx->accessed(iocb->ki_filp); @@ -264,7 +264,7 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, backing_aio_cleanup(aio, ret); } out: - revert_creds_light(old_cred); + revert_creds(old_cred); return ret; } @@ -283,7 +283,7 @@ ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb, old_cred = override_creds(ctx->cred); ret = vfs_splice_read(in, &iocb->ki_pos, pipe, len, flags); - revert_creds_light(old_cred); + revert_creds(old_cred); if (ctx->accessed) ctx->accessed(iocb->ki_filp); @@ -314,7 +314,7 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, file_start_write(out); ret = out->f_op->splice_write(pipe, out, &iocb->ki_pos, len, flags); file_end_write(out); - revert_creds_light(old_cred); + revert_creds(old_cred); if (ctx->end_write) ctx->end_write(iocb, ret); @@ -340,7 +340,7 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, old_cred = override_creds(ctx->cred); ret = call_mmap(vma->vm_file, vma); - revert_creds_light(old_cred); + revert_creds(old_cred); if (ctx->accessed) ctx->accessed(user_file); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 5756ec49f79e..3270c2158552 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -831,7 +831,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, */ old_cred = override_creds(get_new_cred(file->f_cred)); f = open_exec(e->interpreter); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); if (IS_ERR(f)) { pr_notice("register: failed to install interpreter file %s\n", e->interpreter); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 05b1d4cfb55a..1cfeb3b38319 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -399,7 +399,7 @@ static inline void cachefiles_begin_secure(struct cachefiles_cache *cache, static inline void cachefiles_end_secure(struct cachefiles_cache *cache, const struct cred *saved_cred) { - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); } /* diff --git a/fs/coredump.c b/fs/coredump.c index 4eae37892da5..0d3a65cac546 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -781,7 +781,7 @@ fail_unlock: kfree(argv); kfree(cn.corename); coredump_finish(core_dumped); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); fail_creds: put_cred(cred); fail: diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 682d951ed69a..720a4a99bd8a 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -384,7 +384,7 @@ static void nfs_local_call_read(struct work_struct *work) nfs_local_read_done(iocb, status); nfs_local_pgio_release(iocb); - put_cred(revert_creds_light(save_cred)); + put_cred(revert_creds(save_cred)); } static int @@ -558,7 +558,7 @@ static void nfs_local_call_write(struct work_struct *work) nfs_local_vfs_getattr(iocb); nfs_local_pgio_release(iocb); - put_cred(revert_creds_light(save_cred)); + put_cred(revert_creds(save_cred)); current->flags = old_flags; } diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 3cae4057f8ba..25b6a8920a65 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -313,7 +313,7 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, saved_cred = override_creds(get_new_cred(id_resolver_cache)); rkey = nfs_idmap_request_key(name, namelen, type, idmap); - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index dafea9183b4e..c399a5f030af 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,7 +27,7 @@ int nfsd_setuser(struct svc_cred *cred, struct svc_export *exp) int flags = nfsexp_flags(cred, exp); /* discard any old override before preparing the new set */ - put_cred(revert_creds_light(get_cred(current_real_cred()))); + put_cred(revert_creds(get_cred(current_real_cred()))); new = prepare_creds(); if (!new) return -ENOMEM; diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index c05cd2ae8139..dc5c9d8e8202 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -1248,7 +1248,7 @@ nfsd_file_acquire_local(struct net *net, struct svc_cred *cred, beres = nfsd_file_do_acquire(NULL, net, cred, client, fhp, may_flags, NULL, pnf, true); - put_cred(revert_creds_light(save_cred)); + put_cred(revert_creds(save_cred)); return beres; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 0ab22b4e940f..f3837167b6a1 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -89,7 +89,7 @@ nfs4_save_creds(const struct cred **original_creds) static void nfs4_reset_creds(const struct cred *original) { - put_cred(revert_creds_light(original)); + put_cred(revert_creds(original)); } static void diff --git a/fs/open.c b/fs/open.c index bd0a34653f0e..0a5cd8e74fb9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -523,7 +523,7 @@ out_path_release: } out: if (old_cred) - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); return res; } diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 7805667b2e05..439bd9a5ceec 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -749,7 +749,7 @@ static int ovl_prep_cu_creds(struct dentry *dentry, struct ovl_cu_creds *cc) static void ovl_revert_cu_creds(struct ovl_cu_creds *cc) { if (cc->new) { - put_cred(revert_creds_light(cc->old)); + put_cred(revert_creds(cc->old)); put_cred(cc->new); } } diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 151271f0586c..c9993ff66fc2 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -575,7 +575,7 @@ static const struct cred *ovl_setup_cred_for_create(struct dentry *dentry, } /* - * Caller is going to match this with revert_creds_light() and drop + * Caller is going to match this with revert_creds() and drop * referenec on the returned creds. * We must be called with creator creds already, otherwise we risk * leaking creds. diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 2513a79a10b0..0819c739cc2f 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -70,7 +70,7 @@ const struct cred *ovl_override_creds(struct super_block *sb) void ovl_revert_creds(const struct cred *old_cred) { - revert_creds_light(old_cred); + revert_creds(old_cred); } /* diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index f22dc0be357f..6284d924fdb1 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -175,7 +175,7 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, cifs_dbg(FYI, "key description = %s\n", description); saved_cred = override_creds(get_new_cred(spnego_cred)); spnego_key = request_key(&cifs_spnego_key_type, description, ""); - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); #ifdef CONFIG_CIFS_DEBUG2 if (cifsFYI && !IS_ERR(spnego_key)) { diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c index d65e094b97cb..5718906369a9 100644 --- a/fs/smb/client/cifsacl.c +++ b/fs/smb/client/cifsacl.c @@ -327,7 +327,7 @@ id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid) out_key_put: key_put(sidkey); out_revert_creds: - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); return rc; invalidate_key: @@ -438,7 +438,7 @@ try_upcall_to_get_id: out_key_put: key_put(sidkey); out_revert_creds: - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); kfree(sidstr); /* diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 4a4cb9e4d45a..ec4106aa1945 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -800,7 +800,7 @@ void ksmbd_revert_fsids(struct ksmbd_work *work) WARN_ON(!work->saved_cred); cred = current_cred(); - put_cred(revert_creds_light(work->saved_cred)); + put_cred(revert_creds(work->saved_cred)); put_cred(cred); work->saved_cred = NULL; } diff --git a/include/linux/cred.h b/include/linux/cred.h index a073e6163c4e..a7df1c759ef0 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -178,7 +178,7 @@ static inline const struct cred *override_creds(const struct cred *override_cred return old; } -static inline const struct cred *revert_creds_light(const struct cred *revert_cred) +static inline const struct cred *revert_creds(const struct cred *revert_cred) { const struct cred *override_cred = current->cred; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5b1cc024deea..3e408c8442d4 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1739,7 +1739,7 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) audit_uring_exit(!ret, ret); if (creds) - put_cred(revert_creds_light(creds)); + put_cred(revert_creds(creds)); if (ret == IOU_OK) { if (issue_flags & IO_URING_F_COMPLETE_DEFER) diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index 0fd424442118..1ca963474336 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -192,7 +192,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) wake_up(&ctx->sqo_sq_wait); if (creds) - put_cred(revert_creds_light(creds)); + put_cred(revert_creds(creds)); } return ret; diff --git a/kernel/acct.c b/kernel/acct.c index a51a3b483fd9..ea8c94887b58 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -541,7 +541,7 @@ static void do_acct_process(struct bsd_acct_struct *acct) } out: current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim; - put_cred(revert_creds_light(orig_cred)); + put_cred(revert_creds(orig_cred)); } /** diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2d618b577e52..1a94e8b154be 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5220,7 +5220,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup, ctx->ns); - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); if (ret) goto out_finish; diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 857124d81f12..c54ae15f425c 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1476,7 +1476,7 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) else ret = trace_remove_event_call(&user->call); - put_cred(revert_creds_light(old_cred)); + put_cred(revert_creds(old_cred)); put_cred(cred); return ret; diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index f8749d688d66..0b0789fe2194 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -126,7 +126,7 @@ int dns_query(struct net *net, */ saved_cred = override_creds(get_new_cred(dns_resolver_cache)); rkey = request_key_net(&key_type_dns_resolver, desc, net, options); - put_cred(revert_creds_light(saved_cred)); + put_cred(revert_creds(saved_cred)); kfree(desc); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); -- cgit v1.2.3 From 6efbb80490a545cfd9f87ebd9225879d8cdbed93 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 25 Nov 2024 15:10:25 +0100 Subject: cred: remove unused get_new_cred() This helper is not used anymore so remove it. Link: https://lore.kernel.org/r/20241125-work-cred-v2-29-68b9d38bb5b2@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- Documentation/security/credentials.rst | 5 ----- include/linux/cred.h | 13 ------------- 2 files changed, 18 deletions(-) (limited to 'include') diff --git a/Documentation/security/credentials.rst b/Documentation/security/credentials.rst index 357328d566c8..2aa0791bcefe 100644 --- a/Documentation/security/credentials.rst +++ b/Documentation/security/credentials.rst @@ -527,11 +527,6 @@ There are some functions to help manage credentials: This gets a reference on a live set of credentials, returning a pointer to that set of credentials. - - ``struct cred *get_new_cred(struct cred *cred);`` - - This gets a reference on a set of credentials that is under construction - and is thus still mutable, returning a pointer to that set of credentials. - Open File Credentials ===================== diff --git a/include/linux/cred.h b/include/linux/cred.h index a7df1c759ef0..360f5fd3854b 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -200,19 +200,6 @@ static inline struct cred *get_new_cred_many(struct cred *cred, int nr) return cred; } -/** - * get_new_cred - Get a reference on a new set of credentials - * @cred: The new credentials to reference - * - * Get a reference on the specified set of new credentials. The caller must - * release the reference. - */ -static inline struct cred *get_new_cred(const struct cred *cred) -{ - struct cred *nonconst_cred = (struct cred *) cred; - return get_new_cred_many(nonconst_cred, 1); -} - /** * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference -- cgit v1.2.3 From a6babf4cbeaaa1c97a205382cdc958571f668ea8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 26 Nov 2024 14:22:16 +0100 Subject: cred: fold get_new_cred_many() into get_cred_many() There's no need for this to be a separate helper. Link: https://lore.kernel.org/r/20241126-zaunpfahl-wovon-c3979b990a63@brauner Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/cred.h | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/cred.h b/include/linux/cred.h index 360f5fd3854b..0c3c4b16b469 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -186,20 +186,6 @@ static inline const struct cred *revert_creds(const struct cred *revert_cred) return override_cred; } -/** - * get_new_cred_many - Get references on a new set of credentials - * @cred: The new credentials to reference - * @nr: Number of references to acquire - * - * Get references on the specified set of new credentials. The caller must - * release all acquired references. - */ -static inline struct cred *get_new_cred_many(struct cred *cred, int nr) -{ - atomic_long_add(nr, &cred->usage); - return cred; -} - /** * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference @@ -220,7 +206,8 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) if (!cred) return cred; nonconst_cred->non_rcu = 0; - return get_new_cred_many(nonconst_cred, nr); + atomic_long_add(nr, &nonconst_cred->usage); + return cred; } /* -- cgit v1.2.3 From 7863dcc72d0f4b13a641065670426435448b3d80 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 22 Nov 2024 14:24:58 +0100 Subject: pid: allow pid_max to be set per pid namespace The pid_max sysctl is a global value. For a long time the default value has been 65535 and during the pidfd dicussions Linus proposed to bump pid_max by default (cf. [1]). Based on this discussion systemd started bumping pid_max to 2^22. So all new systems now run with a very high pid_max limit with some distros having also backported that change. The decision to bump pid_max is obviously correct. It just doesn't make a lot of sense nowadays to enforce such a low pid number. There's sufficient tooling to make selecting specific processes without typing really large pid numbers available. In any case, there are workloads that have expections about how large pid numbers they accept. Either for historical reasons or architectural reasons. One concreate example is the 32-bit version of Android's bionic libc which requires pid numbers less than 65536. There are workloads where it is run in a 32-bit container on a 64-bit kernel. If the host has a pid_max value greater than 65535 the libc will abort thread creation because of size assumptions of pthread_mutex_t. That's a fairly specific use-case however, in general specific workloads that are moved into containers running on a host with a new kernel and a new systemd can run into issues with large pid_max values. Obviously making assumptions about the size of the allocated pid is suboptimal but we have userspace that does it. Of course, giving containers the ability to restrict the number of processes in their respective pid namespace indepent of the global limit through pid_max is something desirable in itself and comes in handy in general. Independent of motivating use-cases the existence of pid namespaces makes this also a good semantical extension and there have been prior proposals pushing in a similar direction. The trick here is to minimize the risk of regressions which I think is doable. The fact that pid namespaces are hierarchical will help us here. What we mostly care about is that when the host sets a low pid_max limit, say (crazy number) 100 that no descendant pid namespace can allocate a higher pid number in its namespace. Since pid allocation is hierarchial this can be ensured by checking each pid allocation against the pid namespace's pid_max limit. This means if the allocation in the descendant pid namespace succeeds, the ancestor pid namespace can reject it. If the ancestor pid namespace has a higher limit than the descendant pid namespace the descendant pid namespace will reject the pid allocation. The ancestor pid namespace will obviously not care about this. All in all this means pid_max continues to enforce a system wide limit on the number of processes but allows pid namespaces sufficient leeway in handling workloads with assumptions about pid values and allows containers to restrict the number of processes in a pid namespace through the pid_max interface. [1]: https://lore.kernel.org/linux-api/CAHk-=wiZ40LVjnXSi9iHLE_-ZBsWFGCgdmNiYZUXn1-V5YBg2g@mail.gmail.com - rebased from 5.14-rc1 - a few fixes (missing ns_free_inum on error path, missing initialization, etc) - permission check changes in pid_table_root_permissions - unsigned int pid_max -> int pid_max (keep pid_max type as it was) - add READ_ONCE in alloc_pid() as suggested by Christian - rebased from 6.7 and take into account: * sysctl: treewide: drop unused argument ctl_table_root::set_ownership(table) * sysctl: treewide: constify ctl_table_header::ctl_table_arg * pidfd: add pidfs * tracing: Move saved_cmdline code into trace_sched_switch.c Signed-off-by: Alexander Mikhalitsyn Link: https://lore.kernel.org/r/20241122132459.135120-2-aleksandr.mikhalitsyn@canonical.com Signed-off-by: Christian Brauner --- include/linux/pid.h | 3 - include/linux/pid_namespace.h | 10 ++- kernel/pid.c | 125 +++++++++++++++++++++++++++++++++++--- kernel/pid_namespace.c | 43 +++++++++---- kernel/sysctl.c | 9 --- kernel/trace/pid_list.c | 2 +- kernel/trace/trace.h | 2 - kernel/trace/trace_sched_switch.c | 2 +- 8 files changed, 161 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/include/linux/pid.h b/include/linux/pid.h index a3aad9b4074c..c800cbee584b 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -106,9 +106,6 @@ extern void exchange_tids(struct task_struct *task, struct task_struct *old); extern void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type); -extern int pid_max; -extern int pid_max_min, pid_max_max; - /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index f9f9931e02d6..7c67a5811199 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -30,6 +30,7 @@ struct pid_namespace { struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; + int pid_max; struct pid_namespace *parent; #ifdef CONFIG_BSD_PROCESS_ACCT struct fs_pin *bacct; @@ -38,9 +39,14 @@ struct pid_namespace { struct ucounts *ucounts; int reboot; /* group exit code if this pidns was rebooted */ struct ns_common ns; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) + struct work_struct work; +#ifdef CONFIG_SYSCTL + struct ctl_table_set set; + struct ctl_table_header *sysctls; +#if defined(CONFIG_MEMFD_CREATE) int memfd_noexec_scope; #endif +#endif } __randomize_layout; extern struct pid_namespace init_pid_ns; @@ -117,6 +123,8 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +int register_pidns_sysctls(struct pid_namespace *pidns); +void unregister_pidns_sysctls(struct pid_namespace *pidns); static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) { diff --git a/kernel/pid.c b/kernel/pid.c index 115448e89c3e..ce3e94e26a0f 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -60,10 +60,8 @@ struct pid init_struct_pid = { }, } }; -int pid_max = PID_MAX_DEFAULT; - -int pid_max_min = RESERVED_PIDS + 1; -int pid_max_max = PID_MAX_LIMIT; +static int pid_max_min = RESERVED_PIDS + 1; +static int pid_max_max = PID_MAX_LIMIT; /* * Pseudo filesystems start inode numbering after one. We use Reserved * PIDs as a natural offset. @@ -87,6 +85,7 @@ struct pid_namespace init_pid_ns = { #ifdef CONFIG_PID_NS .ns.ops = &pidns_operations, #endif + .pid_max = PID_MAX_DEFAULT, #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC, #endif @@ -193,6 +192,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, for (i = ns->level; i >= 0; i--) { int tid = 0; + int pid_max = READ_ONCE(tmp->pid_max); if (set_tid_size) { tid = set_tid[ns->level - i]; @@ -644,17 +644,118 @@ SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags) return fd; } +#ifdef CONFIG_SYSCTL +static struct ctl_table_set *pid_table_root_lookup(struct ctl_table_root *root) +{ + return &task_active_pid_ns(current)->set; +} + +static int set_is_seen(struct ctl_table_set *set) +{ + return &task_active_pid_ns(current)->set == set; +} + +static int pid_table_root_permissions(struct ctl_table_header *head, + const struct ctl_table *table) +{ + struct pid_namespace *pidns = + container_of(head->set, struct pid_namespace, set); + int mode = table->mode; + + if (ns_capable(pidns->user_ns, CAP_SYS_ADMIN) || + uid_eq(current_euid(), make_kuid(pidns->user_ns, 0))) + mode = (mode & S_IRWXU) >> 6; + else if (in_egroup_p(make_kgid(pidns->user_ns, 0))) + mode = (mode & S_IRWXG) >> 3; + else + mode = mode & S_IROTH; + return (mode << 6) | (mode << 3) | mode; +} + +static void pid_table_root_set_ownership(struct ctl_table_header *head, + kuid_t *uid, kgid_t *gid) +{ + struct pid_namespace *pidns = + container_of(head->set, struct pid_namespace, set); + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ns_root_uid = make_kuid(pidns->user_ns, 0); + if (uid_valid(ns_root_uid)) + *uid = ns_root_uid; + + ns_root_gid = make_kgid(pidns->user_ns, 0); + if (gid_valid(ns_root_gid)) + *gid = ns_root_gid; +} + +static struct ctl_table_root pid_table_root = { + .lookup = pid_table_root_lookup, + .permissions = pid_table_root_permissions, + .set_ownership = pid_table_root_set_ownership, +}; + +static struct ctl_table pid_table[] = { + { + .procname = "pid_max", + .data = &init_pid_ns.pid_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &pid_max_min, + .extra2 = &pid_max_max, + }, +}; +#endif + +int register_pidns_sysctls(struct pid_namespace *pidns) +{ +#ifdef CONFIG_SYSCTL + struct ctl_table *tbl; + + setup_sysctl_set(&pidns->set, &pid_table_root, set_is_seen); + + tbl = kmemdup(pid_table, sizeof(pid_table), GFP_KERNEL); + if (!tbl) + return -ENOMEM; + tbl->data = &pidns->pid_max; + pidns->pid_max = min(pid_max_max, max_t(int, pidns->pid_max, + PIDS_PER_CPU_DEFAULT * num_possible_cpus())); + + pidns->sysctls = __register_sysctl_table(&pidns->set, "kernel", tbl, + ARRAY_SIZE(pid_table)); + if (!pidns->sysctls) { + kfree(tbl); + retire_sysctl_set(&pidns->set); + return -ENOMEM; + } +#endif + return 0; +} + +void unregister_pidns_sysctls(struct pid_namespace *pidns) +{ +#ifdef CONFIG_SYSCTL + const struct ctl_table *tbl; + + tbl = pidns->sysctls->ctl_table_arg; + unregister_sysctl_table(pidns->sysctls); + retire_sysctl_set(&pidns->set); + kfree(tbl); +#endif +} + void __init pid_idr_init(void) { /* Verify no one has done anything silly: */ BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING); /* bump default and minimum pid_max based on number of cpus */ - pid_max = min(pid_max_max, max_t(int, pid_max, - PIDS_PER_CPU_DEFAULT * num_possible_cpus())); + init_pid_ns.pid_max = min(pid_max_max, max_t(int, init_pid_ns.pid_max, + PIDS_PER_CPU_DEFAULT * num_possible_cpus())); pid_max_min = max_t(int, pid_max_min, PIDS_PER_CPU_MIN * num_possible_cpus()); - pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min); + pr_info("pid_max: default: %u minimum: %u\n", init_pid_ns.pid_max, pid_max_min); idr_init(&init_pid_ns.idr); @@ -665,6 +766,16 @@ void __init pid_idr_init(void) NULL); } +static __init int pid_namespace_sysctl_init(void) +{ +#ifdef CONFIG_SYSCTL + /* "kernel" directory will have already been initialized. */ + BUG_ON(register_pidns_sysctls(&init_pid_ns)); +#endif + return 0; +} +subsys_initcall(pid_namespace_sysctl_init); + static struct file *__pidfd_fget(struct task_struct *task, int fd) { struct file *file; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index d70ab49d5b4a..f1ffa032fc32 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -70,6 +70,8 @@ static void dec_pid_namespaces(struct ucounts *ucounts) dec_ucount(ucounts, UCOUNT_PID_NAMESPACES); } +static void destroy_pid_namespace_work(struct work_struct *work); + static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns, struct pid_namespace *parent_pid_ns) { @@ -105,17 +107,27 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns goto out_free_idr; ns->ns.ops = &pidns_operations; + ns->pid_max = parent_pid_ns->pid_max; + err = register_pidns_sysctls(ns); + if (err) + goto out_free_inum; + refcount_set(&ns->ns.count, 1); ns->level = level; ns->parent = get_pid_ns(parent_pid_ns); ns->user_ns = get_user_ns(user_ns); ns->ucounts = ucounts; ns->pid_allocated = PIDNS_ADDING; + INIT_WORK(&ns->work, destroy_pid_namespace_work); + #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE) ns->memfd_noexec_scope = pidns_memfd_noexec_scope(parent_pid_ns); #endif + return ns; +out_free_inum: + ns_free_inum(&ns->ns); out_free_idr: idr_destroy(&ns->idr); kmem_cache_free(pid_ns_cachep, ns); @@ -137,12 +149,28 @@ static void delayed_free_pidns(struct rcu_head *p) static void destroy_pid_namespace(struct pid_namespace *ns) { + unregister_pidns_sysctls(ns); + ns_free_inum(&ns->ns); idr_destroy(&ns->idr); call_rcu(&ns->rcu, delayed_free_pidns); } +static void destroy_pid_namespace_work(struct work_struct *work) +{ + struct pid_namespace *ns = + container_of(work, struct pid_namespace, work); + + do { + struct pid_namespace *parent; + + parent = ns->parent; + destroy_pid_namespace(ns); + ns = parent; + } while (ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count)); +} + struct pid_namespace *copy_pid_ns(unsigned long flags, struct user_namespace *user_ns, struct pid_namespace *old_ns) { @@ -155,15 +183,8 @@ struct pid_namespace *copy_pid_ns(unsigned long flags, void put_pid_ns(struct pid_namespace *ns) { - struct pid_namespace *parent; - - while (ns != &init_pid_ns) { - parent = ns->parent; - if (!refcount_dec_and_test(&ns->ns.count)) - break; - destroy_pid_namespace(ns); - ns = parent; - } + if (ns && ns != &init_pid_ns && refcount_dec_and_test(&ns->ns.count)) + schedule_work(&ns->work); } EXPORT_SYMBOL_GPL(put_pid_ns); @@ -274,6 +295,7 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write, next = idr_get_cursor(&pid_ns->idr) - 1; tmp.data = &next; + tmp.extra2 = &pid_ns->pid_max; ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (!ret && write) idr_set_cursor(&pid_ns->idr, next + 1); @@ -281,7 +303,6 @@ static int pid_ns_ctl_handler(const struct ctl_table *table, int write, return ret; } -extern int pid_max; static struct ctl_table pid_ns_ctl_table[] = { { .procname = "ns_last_pid", @@ -289,7 +310,7 @@ static struct ctl_table pid_ns_ctl_table[] = { .mode = 0666, /* permissions are checked in the handler */ .proc_handler = pid_ns_ctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &pid_max, + .extra2 = &init_pid_ns.pid_max, }, }; #endif /* CONFIG_CHECKPOINT_RESTORE */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 5c9202cb8f59..7ae7a4136855 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1803,15 +1803,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif - { - .procname = "pid_max", - .data = &pid_max, - .maxlen = sizeof (int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &pid_max_min, - .extra2 = &pid_max_max, - }, { .procname = "panic_on_oops", .data = &panic_on_oops, diff --git a/kernel/trace/pid_list.c b/kernel/trace/pid_list.c index 4966e6bbdf6f..c62b9b3cfb3d 100644 --- a/kernel/trace/pid_list.c +++ b/kernel/trace/pid_list.c @@ -414,7 +414,7 @@ struct trace_pid_list *trace_pid_list_alloc(void) int i; /* According to linux/thread.h, pids can be no bigger that 30 bits */ - WARN_ON_ONCE(pid_max > (1 << 30)); + WARN_ON_ONCE(init_pid_ns.pid_max > (1 << 30)); pid_list = kzalloc(sizeof(*pid_list), GFP_KERNEL); if (!pid_list) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 266740b4e121..91def760f364 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -718,8 +718,6 @@ extern unsigned long tracing_thresh; /* PID filtering */ -extern int pid_max; - bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid); bool trace_ignore_this_task(struct trace_pid_list *filtered_pids, diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index 573b5d8e8a28..cb49f7279dc8 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -442,7 +442,7 @@ int trace_alloc_tgid_map(void) if (tgid_map) return 0; - tgid_map_max = pid_max; + tgid_map_max = init_pid_ns.pid_max; map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map), GFP_KERNEL); if (!map) -- cgit v1.2.3 From 96450ead16527cbef559b5bd046182e731228f95 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 22 Nov 2024 09:44:14 -0800 Subject: seqlock: add raw_seqcount_try_begin Add raw_seqcount_try_begin() to opens a read critical section of the given seqcount_t if the counter is even. This enables eliding the critical section entirely if the counter is odd, instead of doing the speculation knowing it will fail. Suggested-by: Peter Zijlstra Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: David Hildenbrand Reviewed-by: Liam R. Howlett Link: https://lkml.kernel.org/r/20241122174416.1367052-1-surenb@google.com --- include/linux/seqlock.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5298765d6ca4..22c2c48b4265 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) __seq; \ }) +/** + * raw_seqcount_try_begin() - begin a seqcount_t read critical section + * w/o lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants + * + * Similar to raw_seqcount_begin(), except it enables eliding the critical + * section entirely if odd, instead of doing the speculation knowing it will + * fail. + * + * Useful when counter stabilization is more or less equivalent to taking + * the lock and there is a slowpath that does that. + * + * If true, start will be set to the (even) sequence count read. + * + * Return: true when a read critical section is started. + */ +#define raw_seqcount_try_begin(s, start) \ +({ \ + start = raw_read_seqcount(s); \ + !(start & 1); \ +}) + /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization -- cgit v1.2.3 From eb449bd96954b1c1e491d19066cfd2a010f0aa47 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 22 Nov 2024 09:44:15 -0800 Subject: mm: convert mm_lock_seq to a proper seqcount Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock variants to increment it, in-line with the usual seqcount usage pattern. This lets us check whether the mmap_lock is write-locked by checking mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be used when implementing mmap_lock speculation functions. As a result vm_lock_seq is also change to be unsigned to match the type of mm_lock_seq.sequence. Suggested-by: Peter Zijlstra Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Liam R. Howlett Link: https://lkml.kernel.org/r/20241122174416.1367052-2-surenb@google.com --- include/linux/mm.h | 12 ++++----- include/linux/mm_types.h | 7 +++-- include/linux/mmap_lock.h | 55 ++++++++++++++++++++++++++-------------- kernel/fork.c | 5 +--- mm/init-mm.c | 2 +- tools/testing/vma/vma.c | 4 +-- tools/testing/vma/vma_internal.h | 4 +-- 7 files changed, 53 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c39c4945946c..ca59d165f1f2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -710,7 +710,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * we don't rely on for anything - the mm_lock_seq read against which we * need ordering is below. */ - if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq)) + if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence)) return false; if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0)) @@ -727,7 +727,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma) * after it has been unlocked. * This pairs with RELEASE semantics in vma_end_write_all(). */ - if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) { + if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) { up_read(&vma->vm_lock->lock); return false; } @@ -742,7 +742,7 @@ static inline void vma_end_read(struct vm_area_struct *vma) } /* WARNING! Can only be used if mmap_lock is expected to be write-locked */ -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq) { mmap_assert_write_locked(vma->vm_mm); @@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) * current task is holding mmap_write_lock, both vma->vm_lock_seq and * mm->mm_lock_seq can't be concurrently modified. */ - *mm_lock_seq = vma->vm_mm->mm_lock_seq; + *mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence; return (vma->vm_lock_seq == *mm_lock_seq); } @@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq) */ static inline void vma_start_write(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; if (__is_vma_write_locked(vma, &mm_lock_seq)) return; @@ -779,7 +779,7 @@ static inline void vma_start_write(struct vm_area_struct *vma) static inline void vma_assert_write_locked(struct vm_area_struct *vma) { - int mm_lock_seq; + unsigned int mm_lock_seq; VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7361a8f3ab68..97e2f4fe1d6c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -697,7 +697,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; /* Unstable RCU readers are allowed to read this. */ struct vma_lock *vm_lock; #endif @@ -891,6 +891,9 @@ struct mm_struct { * Roughly speaking, incrementing the sequence number is * equivalent to releasing locks on VMAs; reading the sequence * number can be part of taking a read lock on a VMA. + * Incremented every time mmap_lock is write-locked/unlocked. + * Initialized to 0, therefore odd values indicate mmap_lock + * is write-locked and even values that it's released. * * Can be modified under write mmap_lock using RELEASE * semantics. @@ -899,7 +902,7 @@ struct mm_struct { * Can be read with ACQUIRE semantics if not holding write * mmap_lock. */ - int mm_lock_seq; + seqcount_t mm_lock_seq; #endif diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index de9dc20b01ba..9715326f5a85 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -71,39 +71,39 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK -/* - * Drop all currently-held per-VMA locks. - * This is called from the mmap_lock implementation directly before releasing - * a write-locked mmap_lock (or downgrading it to read-locked). - * This should normally NOT be called manually from other places. - * If you want to call this manually anyway, keep in mind that this will release - * *all* VMA write locks, including ones from further up the stack. - */ -static inline void vma_end_write_all(struct mm_struct *mm) +static inline void mm_lock_seqcount_init(struct mm_struct *mm) { - mmap_assert_write_locked(mm); - /* - * Nobody can concurrently modify mm->mm_lock_seq due to exclusive - * mmap_lock being held. - * We need RELEASE semantics here to ensure that preceding stores into - * the VMA take effect before we unlock it with this store. - * Pairs with ACQUIRE semantics in vma_start_read(). - */ - smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1); + seqcount_init(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) +{ + do_raw_write_seqcount_begin(&mm->mm_lock_seq); +} + +static inline void mm_lock_seqcount_end(struct mm_struct *mm) +{ + ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); + do_raw_write_seqcount_end(&mm->mm_lock_seq); } + #else -static inline void vma_end_write_all(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} #endif static inline void mmap_init_lock(struct mm_struct *mm) { init_rwsem(&mm->mmap_lock); + mm_lock_seqcount_init(mm); } static inline void mmap_write_lock(struct mm_struct *mm) { __mmap_lock_trace_start_locking(mm, true); down_write(&mm->mmap_lock); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -111,6 +111,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) { __mmap_lock_trace_start_locking(mm, true); down_write_nested(&mm->mmap_lock, subclass); + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, true); } @@ -120,10 +121,26 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm) __mmap_lock_trace_start_locking(mm, true); ret = down_write_killable(&mm->mmap_lock); + if (!ret) + mm_lock_seqcount_begin(mm); __mmap_lock_trace_acquire_returned(mm, true, ret == 0); return ret; } +/* + * Drop all currently-held per-VMA locks. + * This is called from the mmap_lock implementation directly before releasing + * a write-locked mmap_lock (or downgrading it to read-locked). + * This should normally NOT be called manually from other places. + * If you want to call this manually anyway, keep in mind that this will release + * *all* VMA write locks, including ones from further up the stack. + */ +static inline void vma_end_write_all(struct mm_struct *mm) +{ + mmap_assert_write_locked(mm); + mm_lock_seqcount_end(mm); +} + static inline void mmap_write_unlock(struct mm_struct *mm) { __mmap_lock_trace_released(mm, true); diff --git a/kernel/fork.c b/kernel/fork.c index 1450b461d196..8dc670fe90d4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -448,7 +448,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma) return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } @@ -1267,9 +1267,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, seqcount_init(&mm->write_protect_seq); mmap_init_lock(mm); INIT_LIST_HEAD(&mm->mmlist); -#ifdef CONFIG_PER_VMA_LOCK - mm->mm_lock_seq = 0; -#endif mm_pgtables_bytes_init(mm); mm->map_count = 0; mm->locked_vm = 0; diff --git a/mm/init-mm.c b/mm/init-mm.c index 24c809379274..6af3ad675930 100644 --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -40,7 +40,7 @@ struct mm_struct init_mm = { .arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), #ifdef CONFIG_PER_VMA_LOCK - .mm_lock_seq = 0, + .mm_lock_seq = SEQCNT_ZERO(init_mm.mm_lock_seq), #endif .user_ns = &init_user_ns, .cpu_bitmap = CPU_BITS_NONE, diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index 8fab5e13c7c3..9bcf1736bf18 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -89,7 +89,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, * begun. Linking to the tree will have caused this to be incremented, * which means we will get a false positive otherwise. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return vma; } @@ -214,7 +214,7 @@ static bool vma_write_started(struct vm_area_struct *vma) int seq = vma->vm_lock_seq; /* We reset after each check. */ - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; /* The vma_start_write() stub simply increments this value. */ return seq > -1; diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index e76ff579e1fd..1d9fc97b8e80 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -241,7 +241,7 @@ struct vm_area_struct { * counter reuse can only lead to occasional unnecessary use of the * slowpath. */ - int vm_lock_seq; + unsigned int vm_lock_seq; struct vma_lock *vm_lock; #endif @@ -416,7 +416,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma) return false; init_rwsem(&vma->vm_lock->lock); - vma->vm_lock_seq = -1; + vma->vm_lock_seq = UINT_MAX; return true; } -- cgit v1.2.3 From 03a001b156d2da186a5618de242750d06bf81e2d Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Fri, 22 Nov 2024 09:44:16 -0800 Subject: mm: introduce mmap_lock_speculate_{try_begin|retry} Add helper functions to speculatively perform operations without read-locking mmap_lock, expecting that mmap_lock will not be write-locked and mm is not modified from under us. Suggested-by: Peter Zijlstra Signed-off-by: Suren Baghdasaryan Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Liam R. Howlett Link: https://lkml.kernel.org/r/20241122174416.1367052-3-surenb@google.com --- include/linux/mmap_lock.h | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 9715326f5a85..45a21faa3ff6 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -71,6 +71,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm) } #ifdef CONFIG_PER_VMA_LOCK + static inline void mm_lock_seqcount_init(struct mm_struct *mm) { seqcount_init(&mm->mm_lock_seq); @@ -87,11 +88,39 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm) do_raw_write_seqcount_end(&mm->mm_lock_seq); } -#else +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) +{ + /* + * Since mmap_lock is a sleeping lock, and waiting for it to become + * unlocked is more or less equivalent with taking it ourselves, don't + * bother with the speculative path if mmap_lock is already write-locked + * and take the slow path, which takes the lock. + */ + return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return read_seqcount_retry(&mm->mm_lock_seq, seq); +} + +#else /* CONFIG_PER_VMA_LOCK */ + static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} -#endif + +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) +{ + return false; +} + +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) +{ + return true; +} + +#endif /* CONFIG_PER_VMA_LOCK */ static inline void mmap_init_lock(struct mm_struct *mm) { -- cgit v1.2.3 From 2116b349e29a2e9ba17ea2e45b31234e4b350793 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:52 +0100 Subject: objtool: Generic annotation infrastructure Avoid endless .discard.foo sections for each annotation, create a single .discard.annotate_insn section that takes an annotation type along with the instruction. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094310.932794537@infradead.org --- include/linux/objtool.h | 18 ++++++++++++++++++ tools/objtool/check.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b3b8d3dab52d..d98531ecc687 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -57,6 +57,13 @@ ".long 998b\n\t" \ ".popsection\n\t" +#define ASM_ANNOTATE(type) \ + "911:\n\t" \ + ".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t" \ + ".long 911b - .\n\t" \ + ".long " __stringify(type) "\n\t" \ + ".popsection\n\t" + #else /* __ASSEMBLY__ */ /* @@ -146,6 +153,14 @@ .popsection .endm +.macro ANNOTATE type:req +.Lhere_\@: + .pushsection .discard.annotate_insn,"M",@progbits,8 + .long .Lhere_\@ - . + .long \type + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_OBJTOOL */ @@ -155,6 +170,7 @@ #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define ASM_ANNOTATE(type) #define ANNOTATE_NOENDBR #define ASM_REACHABLE #else @@ -167,6 +183,8 @@ .endm .macro REACHABLE .endm +.macro ANNOTATE type:req +.endm #endif #endif /* CONFIG_OBJTOOL */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4ce176ad411f..b0efc8ee16d6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2373,6 +2373,49 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } +static int read_annotate(struct objtool_file *file, void (*func)(int type, struct instruction *insn)) +{ + struct section *sec; + struct instruction *insn; + struct reloc *reloc; + int type; + + sec = find_section_by_name(file->elf, ".discard.annotate_insn"); + if (!sec) + return 0; + + if (!sec->rsec) + return 0; + + if (sec->sh.sh_entsize != 8) { + static bool warned = false; + if (!warned) { + WARN("%s: dodgy linker, sh_entsize != 8", sec->name); + warned = true; + } + sec->sh.sh_entsize = 8; + } + + for_each_reloc(sec->rsec, reloc) { + type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); + + insn = find_insn(file, reloc->sym->sec, + reloc->sym->offset + reloc_addend(reloc)); + if (!insn) { + WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); + return -1; + } + + func(type, insn); + } + + return 0; +} + +static void __annotate_nop(int type, struct instruction *insn) +{ +} + static int read_noendbr_hints(struct objtool_file *file) { struct instruction *insn; @@ -2670,6 +2713,8 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + read_annotate(file, __annotate_nop); + /* * Must be before read_unwind_hints() since that needs insn->noendbr. */ -- cgit v1.2.3 From 22c3d58079688b697f36d670616e463cbb14d058 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:53 +0100 Subject: objtool: Convert ANNOTATE_NOENDBR to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.042140333@infradead.org --- include/linux/objtool.h | 17 ++++------------- include/linux/objtool_types.h | 5 +++++ tools/include/linux/objtool_types.h | 5 +++++ tools/objtool/check.c | 32 +++++--------------------------- 4 files changed, 19 insertions(+), 40 deletions(-) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index d98531ecc687..b5e9c0ab4048 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -45,12 +45,6 @@ #define STACK_FRAME_NON_STANDARD_FP(func) #endif -#define ANNOTATE_NOENDBR \ - "986: \n\t" \ - ".pushsection .discard.noendbr\n\t" \ - ".long 986b\n\t" \ - ".popsection\n\t" - #define ASM_REACHABLE \ "998:\n\t" \ ".pushsection .discard.reachable\n\t" \ @@ -64,6 +58,8 @@ ".long " __stringify(type) "\n\t" \ ".popsection\n\t" +#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) + #else /* __ASSEMBLY__ */ /* @@ -122,13 +118,6 @@ #endif .endm -.macro ANNOTATE_NOENDBR -.Lhere_\@: - .pushsection .discard.noendbr - .long .Lhere_\@ - .popsection -.endm - /* * Use objtool to validate the entry requirement that all code paths do * VALIDATE_UNRET_END before RET. @@ -161,6 +150,8 @@ .popsection .endm +#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_OBJTOOL */ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 453a4f4ef39d..4884f8cf8429 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -54,4 +54,9 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 453a4f4ef39d..4884f8cf8429 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -54,4 +54,9 @@ struct unwind_hint { #define UNWIND_HINT_TYPE_SAVE 6 #define UNWIND_HINT_TYPE_RESTORE 7 +/* + * Annotate types + */ +#define ANNOTYPE_NOENDBR 1 + #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b0efc8ee16d6..a74ff26860f7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2412,32 +2412,12 @@ static int read_annotate(struct objtool_file *file, void (*func)(int type, struc return 0; } -static void __annotate_nop(int type, struct instruction *insn) +static void __annotate_noendbr(int type, struct instruction *insn) { -} - -static int read_noendbr_hints(struct objtool_file *file) -{ - struct instruction *insn; - struct section *rsec; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.noendbr"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - insn = find_insn(file, reloc->sym->sec, - reloc->sym->offset + reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.noendbr entry"); - return -1; - } - - insn->noendbr = 1; - } + if (type != ANNOTYPE_NOENDBR) + return; - return 0; + insn->noendbr = 1; } static int read_retpoline_hints(struct objtool_file *file) @@ -2713,12 +2693,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - read_annotate(file, __annotate_nop); - /* * Must be before read_unwind_hints() since that needs insn->noendbr. */ - ret = read_noendbr_hints(file); + ret = read_annotate(file, __annotate_noendbr); if (ret) return ret; -- cgit v1.2.3 From bf5febebd99fddfc6226a94e937d38a8d470b24e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:54 +0100 Subject: objtool: Convert ANNOTATE_RETPOLINE_SAFE to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.145275669@infradead.org --- arch/x86/include/asm/nospec-branch.h | 13 ++------- include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 52 +++++++++++++----------------------- 4 files changed, 22 insertions(+), 45 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 96b410b1d4e8..50340a125953 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -184,12 +184,7 @@ * objtool the subsequent indirect jump/call is vouched safe for retpoline * builds. */ -.macro ANNOTATE_RETPOLINE_SAFE -.Lhere_\@: - .pushsection .discard.retpoline_safe - .long .Lhere_\@ - .popsection -.endm +#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE /* * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions @@ -350,11 +345,7 @@ #else /* __ASSEMBLY__ */ -#define ANNOTATE_RETPOLINE_SAFE \ - "999:\n\t" \ - ".pushsection .discard.retpoline_safe\n\t" \ - ".long 999b\n\t" \ - ".popsection\n\t" +#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE) typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 4884f8cf8429..1b348361ad1d 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -58,5 +58,6 @@ struct unwind_hint { * Annotate types */ #define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 4884f8cf8429..1b348361ad1d 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -58,5 +58,6 @@ struct unwind_hint { * Annotate types */ #define ANNOTYPE_NOENDBR 1 +#define ANNOTYPE_RETPOLINE_SAFE 2 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index a74ff26860f7..c5b52309b80d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2373,12 +2373,12 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } -static int read_annotate(struct objtool_file *file, void (*func)(int type, struct instruction *insn)) +static int read_annotate(struct objtool_file *file, int (*func)(int type, struct instruction *insn)) { struct section *sec; struct instruction *insn; struct reloc *reloc; - int type; + int type, ret; sec = find_section_by_name(file->elf, ".discard.annotate_insn"); if (!sec) @@ -2406,53 +2406,37 @@ static int read_annotate(struct objtool_file *file, void (*func)(int type, struc return -1; } - func(type, insn); + ret = func(type, insn); + if (ret < 0) + return ret; } return 0; } -static void __annotate_noendbr(int type, struct instruction *insn) +static int __annotate_noendbr(int type, struct instruction *insn) { if (type != ANNOTYPE_NOENDBR) - return; + return 0; insn->noendbr = 1; + return 0; } -static int read_retpoline_hints(struct objtool_file *file) +static int __annotate_retpoline_safe(int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe"); - if (!rsec) + if (type != ANNOTYPE_RETPOLINE_SAFE) return 0; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.retpoline_safe entry"); - return -1; - } - - if (insn->type != INSN_JUMP_DYNAMIC && - insn->type != INSN_CALL_DYNAMIC && - insn->type != INSN_RETURN && - insn->type != INSN_NOP) { - WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); - return -1; - } - - insn->retpoline_safe = true; + if (insn->type != INSN_JUMP_DYNAMIC && + insn->type != INSN_CALL_DYNAMIC && + insn->type != INSN_RETURN && + insn->type != INSN_NOP) { + WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop"); + return -1; } + insn->retpoline_safe = true; return 0; } @@ -2742,7 +2726,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_retpoline_hints(file); + ret = read_annotate(file, __annotate_retpoline_safe); if (ret) return ret; -- cgit v1.2.3 From 317f2a64618c528539d17fe6957a64106087fbd2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:55 +0100 Subject: objtool: Convert instrumentation_{begin,end}() to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.245980207@infradead.org --- include/linux/instrumentation.h | 11 ++++----- include/linux/objtool.h | 12 ++++++--- include/linux/objtool_types.h | 2 ++ tools/include/linux/objtool_types.h | 2 ++ tools/objtool/check.c | 49 ++++++++----------------------------- 5 files changed, 28 insertions(+), 48 deletions(-) (limited to 'include') diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index bc7babe91b2e..c8f866cf02d8 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -4,14 +4,14 @@ #ifdef CONFIG_NOINSTR_VALIDATION +#include #include /* Begin/end of an instrumentation safe region */ #define __instrumentation_begin(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_begin\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + __ASM_ANNOTATE(__ASM_BREF(c), ANNOTYPE_INSTR_BEGIN)\ + : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -48,9 +48,8 @@ */ #define __instrumentation_end(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - ".pushsection .discard.instr_end\n\t" \ - ".long " __stringify(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ + __ASM_ANNOTATE(__ASM_BREF(c), ANNOTYPE_INSTR_END) \ + : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) #else /* !CONFIG_NOINSTR_VALIDATION */ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index b5e9c0ab4048..89c67cd7eebe 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -51,13 +51,18 @@ ".long 998b\n\t" \ ".popsection\n\t" -#define ASM_ANNOTATE(type) \ - "911:\n\t" \ +#define __ASM_BREF(label) label ## b + +#define __ASM_ANNOTATE(label, type) \ ".pushsection .discard.annotate_insn,\"M\",@progbits,8\n\t" \ - ".long 911b - .\n\t" \ + ".long " __stringify(label) " - .\n\t" \ ".long " __stringify(type) "\n\t" \ ".popsection\n\t" +#define ASM_ANNOTATE(type) \ + "911:\n\t" \ + __ASM_ANNOTATE(911b, type) + #define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) #else /* __ASSEMBLY__ */ @@ -161,6 +166,7 @@ #define UNWIND_HINT(type, sp_reg, sp_offset, signal) "\n\t" #define STACK_FRAME_NON_STANDARD(func) #define STACK_FRAME_NON_STANDARD_FP(func) +#define __ASM_ANNOTATE(label, type) #define ASM_ANNOTATE(type) #define ANNOTATE_NOENDBR #define ASM_REACHABLE diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 1b348361ad1d..d4d68dd36f7a 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -59,5 +59,7 @@ struct unwind_hint { */ #define ANNOTYPE_NOENDBR 1 #define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 1b348361ad1d..d4d68dd36f7a 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -59,5 +59,7 @@ struct unwind_hint { */ #define ANNOTYPE_NOENDBR 1 #define ANNOTYPE_RETPOLINE_SAFE 2 +#define ANNOTYPE_INSTR_BEGIN 3 +#define ANNOTYPE_INSTR_END 4 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c5b52309b80d..8e39c7f484d8 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2440,48 +2440,19 @@ static int __annotate_retpoline_safe(int type, struct instruction *insn) return 0; } -static int read_instr_hints(struct objtool_file *file) +static int __annotate_instr(int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.instr_end"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); - return -1; - } + switch (type) { + case ANNOTYPE_INSTR_BEGIN: + insn->instr++; + break; + case ANNOTYPE_INSTR_END: insn->instr--; - } - - rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_begin entry"); - return -1; - } + break; - insn->instr++; + default: + break; } return 0; @@ -2730,7 +2701,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_instr_hints(file); + ret = read_annotate(file, __annotate_instr); if (ret) return ret; -- cgit v1.2.3 From 18aa6118a1689b4d73c5ebbd917ae3f20c9c0db1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:56 +0100 Subject: objtool: Convert VALIDATE_UNRET_BEGIN to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.358508242@infradead.org --- include/linux/objtool.h | 9 +++------ include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 28 +++++----------------------- 4 files changed, 10 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 89c67cd7eebe..5f0bf8052dc7 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -130,15 +130,12 @@ * NOTE: The macro must be used at the beginning of a global symbol, otherwise * it will be ignored. */ -.macro VALIDATE_UNRET_BEGIN #if defined(CONFIG_NOINSTR_VALIDATION) && \ (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) -.Lhere_\@: - .pushsection .discard.validate_unret - .long .Lhere_\@ - . - .popsection +#define VALIDATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN +#else +#define VALIDATE_UNRET_BEGIN #endif -.endm .macro REACHABLE .Lhere_\@: diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index d4d68dd36f7a..16236a56364b 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -61,5 +61,6 @@ struct unwind_hint { #define ANNOTYPE_RETPOLINE_SAFE 2 #define ANNOTYPE_INSTR_BEGIN 3 #define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index d4d68dd36f7a..16236a56364b 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -61,5 +61,6 @@ struct unwind_hint { #define ANNOTYPE_RETPOLINE_SAFE 2 #define ANNOTYPE_INSTR_BEGIN 3 #define ANNOTYPE_INSTR_END 4 +#define ANNOTYPE_UNRET_BEGIN 5 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8e39c7f484d8..2a703748cad1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2458,33 +2458,15 @@ static int __annotate_instr(int type, struct instruction *insn) return 0; } -static int read_validate_unret_hints(struct objtool_file *file) +static int __annotate_unret(int type, struct instruction *insn) { - struct section *rsec; - struct instruction *insn; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret"); - if (!rsec) + if (type != ANNOTYPE_UNRET_BEGIN) return 0; - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.instr_end entry"); - return -1; - } - insn->unret = 1; - } - + insn->unret = 1; return 0; -} +} static int read_intra_function_calls(struct objtool_file *file) { @@ -2705,7 +2687,7 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - ret = read_validate_unret_hints(file); + ret = read_annotate(file, __annotate_unret); if (ret) return ret; -- cgit v1.2.3 From f0cd57c35a75f152d3b31b9be3f7f413b96a6d3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:57 +0100 Subject: objtool: Convert ANNOTATE_IGNORE_ALTERNATIVE to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.465691316@infradead.org --- arch/x86/include/asm/alternative.h | 14 +++--------- include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 45 +++++++++---------------------------- 4 files changed, 15 insertions(+), 46 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index dc03a647776d..595695f85f80 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -4,6 +4,7 @@ #include #include +#include #include #define ALT_FLAGS_SHIFT 16 @@ -58,11 +59,7 @@ * objtool annotation to ignore the alternatives and only consider the original * instruction(s). */ -#define ANNOTATE_IGNORE_ALTERNATIVE \ - "999:\n\t" \ - ".pushsection .discard.ignore_alts\n\t" \ - ".long 999b\n\t" \ - ".popsection\n\t" +#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS) /* * The patching flags are part of the upper bits of the @ft_flags parameter when @@ -314,12 +311,7 @@ void nop_func(void); * objtool annotation to ignore the alternatives and only consider the original * instruction(s). */ -.macro ANNOTATE_IGNORE_ALTERNATIVE - .Lannotate_\@: - .pushsection .discard.ignore_alts - .long .Lannotate_\@ - .popsection -.endm +#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS /* * Issue one struct alt_instr descriptor entry (need to put it into diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 16236a56364b..eab15dbe1cb7 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -62,5 +62,6 @@ struct unwind_hint { #define ANNOTYPE_INSTR_BEGIN 3 #define ANNOTYPE_INSTR_END 4 #define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 16236a56364b..eab15dbe1cb7 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -62,5 +62,6 @@ struct unwind_hint { #define ANNOTYPE_INSTR_BEGIN 3 #define ANNOTYPE_INSTR_END 4 #define ANNOTYPE_UNRET_BEGIN 5 +#define ANNOTYPE_IGNORE_ALTS 6 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 2a703748cad1..ba2cb9b69399 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1309,40 +1309,6 @@ static void add_uaccess_safe(struct objtool_file *file) } } -/* - * FIXME: For now, just ignore any alternatives which add retpolines. This is - * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline. - * But it at least allows objtool to understand the control flow *around* the - * retpoline. - */ -static int add_ignore_alternatives(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - - rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.ignore_alts entry"); - return -1; - } - - insn->ignore_alts = true; - } - - return 0; -} - /* * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol * will be added to the .retpoline_sites section. @@ -2414,6 +2380,15 @@ static int read_annotate(struct objtool_file *file, int (*func)(int type, struct return 0; } +static int __annotate_ignore_alts(int type, struct instruction *insn) +{ + if (type != ANNOTYPE_IGNORE_ALTS) + return 0; + + insn->ignore_alts = true; + return 0; +} + static int __annotate_noendbr(int type, struct instruction *insn) { if (type != ANNOTYPE_NOENDBR) @@ -2626,7 +2601,7 @@ static int decode_sections(struct objtool_file *file) add_ignores(file); add_uaccess_safe(file); - ret = add_ignore_alternatives(file); + ret = read_annotate(file, __annotate_ignore_alts); if (ret) return ret; -- cgit v1.2.3 From 112765ca1cb9353e71b4f5af4e6e6c4a69c28d99 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:38:58 +0100 Subject: objtool: Convert ANNOTATE_INTRA_FUNCTION_CALL to ANNOTATE Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.584892071@infradead.org --- include/linux/objtool.h | 16 +++---- include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 96 +++++++++++++++---------------------- 4 files changed, 47 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 5f0bf8052dc7..42287c1e32ce 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -67,16 +67,6 @@ #else /* __ASSEMBLY__ */ -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL \ - 999: \ - .pushsection .discard.intra_function_calls; \ - .long 999b; \ - .popsection; - /* * In asm, there are two kinds of code: normal C-type callable functions and * the rest. The normal callable functions can be called by other code, and @@ -154,6 +144,12 @@ #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_OBJTOOL */ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index eab15dbe1cb7..23d6fb6d04c7 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -63,5 +63,6 @@ struct unwind_hint { #define ANNOTYPE_INSTR_END 4 #define ANNOTYPE_UNRET_BEGIN 5 #define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index eab15dbe1cb7..23d6fb6d04c7 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -63,5 +63,6 @@ struct unwind_hint { #define ANNOTYPE_INSTR_END 4 #define ANNOTYPE_UNRET_BEGIN 5 #define ANNOTYPE_IGNORE_ALTS 6 +#define ANNOTYPE_INTRA_FUNCTION_CALL 7 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ba2cb9b69399..2222fe710832 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2339,7 +2339,8 @@ static int read_unwind_hints(struct objtool_file *file) return 0; } -static int read_annotate(struct objtool_file *file, int (*func)(int type, struct instruction *insn)) +static int read_annotate(struct objtool_file *file, + int (*func)(struct objtool_file *file, int type, struct instruction *insn)) { struct section *sec; struct instruction *insn; @@ -2372,7 +2373,7 @@ static int read_annotate(struct objtool_file *file, int (*func)(int type, struct return -1; } - ret = func(type, insn); + ret = func(file, type, insn); if (ret < 0) return ret; } @@ -2380,7 +2381,7 @@ static int read_annotate(struct objtool_file *file, int (*func)(int type, struct return 0; } -static int __annotate_ignore_alts(int type, struct instruction *insn) +static int __annotate_ignore_alts(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_IGNORE_ALTS) return 0; @@ -2389,7 +2390,7 @@ static int __annotate_ignore_alts(int type, struct instruction *insn) return 0; } -static int __annotate_noendbr(int type, struct instruction *insn) +static int __annotate_noendbr(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_NOENDBR) return 0; @@ -2398,7 +2399,37 @@ static int __annotate_noendbr(int type, struct instruction *insn) return 0; } -static int __annotate_retpoline_safe(int type, struct instruction *insn) +static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn) +{ + unsigned long dest_off; + + if (type != ANNOTYPE_INTRA_FUNCTION_CALL) + return 0; + + if (insn->type != INSN_CALL) { + WARN_INSN(insn, "intra_function_call not a direct call"); + return -1; + } + + /* + * Treat intra-function CALLs as JMPs, but with a stack_op. + * See add_call_destinations(), which strips stack_ops from + * normal CALLs. + */ + insn->type = INSN_JUMP_UNCONDITIONAL; + + dest_off = arch_jump_destination(insn); + insn->jump_dest = find_insn(file, insn->sec, dest_off); + if (!insn->jump_dest) { + WARN_INSN(insn, "can't find call dest at %s+0x%lx", + insn->sec->name, dest_off); + return -1; + } + + return 0; +} + +static int __annotate_retpoline_safe(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_RETPOLINE_SAFE) return 0; @@ -2415,7 +2446,7 @@ static int __annotate_retpoline_safe(int type, struct instruction *insn) return 0; } -static int __annotate_instr(int type, struct instruction *insn) +static int __annotate_instr(struct objtool_file *file, int type, struct instruction *insn) { switch (type) { case ANNOTYPE_INSTR_BEGIN: @@ -2433,7 +2464,7 @@ static int __annotate_instr(int type, struct instruction *insn) return 0; } -static int __annotate_unret(int type, struct instruction *insn) +static int __annotate_unret(struct objtool_file *file, int type, struct instruction *insn) { if (type != ANNOTYPE_UNRET_BEGIN) return 0; @@ -2443,55 +2474,6 @@ static int __annotate_unret(int type, struct instruction *insn) } -static int read_intra_function_calls(struct objtool_file *file) -{ - struct instruction *insn; - struct section *rsec; - struct reloc *reloc; - - rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - unsigned long dest_off; - - if (reloc->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in %s", - rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc)); - if (!insn) { - WARN("bad .discard.intra_function_call entry"); - return -1; - } - - if (insn->type != INSN_CALL) { - WARN_INSN(insn, "intra_function_call not a direct call"); - return -1; - } - - /* - * Treat intra-function CALLs as JMPs, but with a stack_op. - * See add_call_destinations(), which strips stack_ops from - * normal CALLs. - */ - insn->type = INSN_JUMP_UNCONDITIONAL; - - dest_off = arch_jump_destination(insn); - insn->jump_dest = find_insn(file, insn->sec, dest_off); - if (!insn->jump_dest) { - WARN_INSN(insn, "can't find call dest at %s+0x%lx", - insn->sec->name, dest_off); - return -1; - } - } - - return 0; -} - /* * Return true if name matches an instrumentation function, where calls to that * function from noinstr code can safely be removed, but compilers won't do so. @@ -2630,7 +2612,7 @@ static int decode_sections(struct objtool_file *file) * Must be before add_call_destination(); it changes INSN_CALL to * INSN_JUMP. */ - ret = read_intra_function_calls(file); + ret = read_annotate(file, __annotate_ifc); if (ret) return ret; -- cgit v1.2.3 From bb8170067470cc7af28e4386e600b1e0a6a8956a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:00 +0100 Subject: objtool: Collect more annotations in objtool.h Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.786598147@infradead.org --- arch/x86/include/asm/alternative.h | 12 ------ arch/x86/include/asm/nospec-branch.h | 9 ---- include/linux/instrumentation.h | 4 +- include/linux/objtool.h | 80 ++++++++++++++++++++++++------------ 4 files changed, 55 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 595695f85f80..e3903b731305 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -55,12 +55,6 @@ #define LOCK_PREFIX "" #endif -/* - * objtool annotation to ignore the alternatives and only consider the original - * instruction(s). - */ -#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS) - /* * The patching flags are part of the upper bits of the @ft_flags parameter when * specifying them. The split is currently like this: @@ -307,12 +301,6 @@ void nop_func(void); .endm #endif -/* - * objtool annotation to ignore the alternatives and only consider the original - * instruction(s). - */ -#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS - /* * Issue one struct alt_instr descriptor entry (need to put it into * the section .altinstructions, see below). This entry contains diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 50340a125953..7e8bf78c03d5 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -179,13 +179,6 @@ #ifdef __ASSEMBLY__ -/* - * This should be used immediately before an indirect jump/call. It tells - * objtool the subsequent indirect jump/call is vouched safe for retpoline - * builds. - */ -#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE - /* * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions * vs RETBleed validation. @@ -345,8 +338,6 @@ #else /* __ASSEMBLY__ */ -#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE) - typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; diff --git a/include/linux/instrumentation.h b/include/linux/instrumentation.h index c8f866cf02d8..bf675a8aef8a 100644 --- a/include/linux/instrumentation.h +++ b/include/linux/instrumentation.h @@ -10,7 +10,7 @@ /* Begin/end of an instrumentation safe region */ #define __instrumentation_begin(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - __ASM_ANNOTATE(__ASM_BREF(c), ANNOTYPE_INSTR_BEGIN)\ + ANNOTATE_INSTR_BEGIN(__ASM_BREF(c)) \ : : "i" (c)); \ }) #define instrumentation_begin() __instrumentation_begin(__COUNTER__) @@ -48,7 +48,7 @@ */ #define __instrumentation_end(c) ({ \ asm volatile(__stringify(c) ": nop\n\t" \ - __ASM_ANNOTATE(__ASM_BREF(c), ANNOTYPE_INSTR_END) \ + ANNOTATE_INSTR_END(__ASM_BREF(c)) \ : : "i" (c)); \ }) #define instrumentation_end() __instrumentation_end(__COUNTER__) diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 42287c1e32ce..fd487d466bb2 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -63,8 +63,6 @@ "911:\n\t" \ __ASM_ANNOTATE(911b, type) -#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) - #else /* __ASSEMBLY__ */ /* @@ -113,19 +111,6 @@ #endif .endm -/* - * Use objtool to validate the entry requirement that all code paths do - * VALIDATE_UNRET_END before RET. - * - * NOTE: The macro must be used at the beginning of a global symbol, otherwise - * it will be ignored. - */ -#if defined(CONFIG_NOINSTR_VALIDATION) && \ - (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) -#define VALIDATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN -#else -#define VALIDATE_UNRET_BEGIN -#endif .macro REACHABLE .Lhere_\@: @@ -142,14 +127,6 @@ .popsection .endm -#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR - -/* - * This macro indicates that the following intra-function call is valid. - * Any non-annotated intra-function call will cause objtool to issue a warning. - */ -#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL - #endif /* __ASSEMBLY__ */ #else /* !CONFIG_OBJTOOL */ @@ -161,16 +138,12 @@ #define STACK_FRAME_NON_STANDARD_FP(func) #define __ASM_ANNOTATE(label, type) #define ASM_ANNOTATE(type) -#define ANNOTATE_NOENDBR #define ASM_REACHABLE #else -#define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -.macro ANNOTATE_NOENDBR -.endm .macro REACHABLE .endm .macro ANNOTATE type:req @@ -179,4 +152,57 @@ #endif /* CONFIG_OBJTOOL */ +#ifndef __ASSEMBLY__ +/* + * Annotate away the various 'relocation to !ENDBR` complaints; knowing that + * these relocations will never be used for indirect calls. + */ +#define ANNOTATE_NOENDBR ASM_ANNOTATE(ANNOTYPE_NOENDBR) +/* + * This should be used immediately before an indirect jump/call. It tells + * objtool the subsequent indirect jump/call is vouched safe for retpoline + * builds. + */ +#define ANNOTATE_RETPOLINE_SAFE ASM_ANNOTATE(ANNOTYPE_RETPOLINE_SAFE) +/* + * See linux/instrumentation.h + */ +#define ANNOTATE_INSTR_BEGIN(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_BEGIN) +#define ANNOTATE_INSTR_END(label) __ASM_ANNOTATE(label, ANNOTYPE_INSTR_END) +/* + * objtool annotation to ignore the alternatives and only consider the original + * instruction(s). + */ +#define ANNOTATE_IGNORE_ALTERNATIVE ASM_ANNOTATE(ANNOTYPE_IGNORE_ALTS) +/* + * This macro indicates that the following intra-function call is valid. + * Any non-annotated intra-function call will cause objtool to issue a warning. + */ +#define ANNOTATE_INTRA_FUNCTION_CALL ASM_ANNOTATE(ANNOTYPE_INTRA_FUNCTION_CALL) +/* + * Use objtool to validate the entry requirement that all code paths do + * VALIDATE_UNRET_END before RET. + * + * NOTE: The macro must be used at the beginning of a global symbol, otherwise + * it will be ignored. + */ +#define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN) + +#else +#define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR +#define ANNOTATE_RETPOLINE_SAFE ANNOTATE type=ANNOTYPE_RETPOLINE_SAFE +/* ANNOTATE_INSTR_BEGIN ANNOTATE type=ANNOTYPE_INSTR_BEGIN */ +/* ANNOTATE_INSTR_END ANNOTATE type=ANNOTYPE_INSTR_END */ +#define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS +#define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL +#define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN +#endif + +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO)) +#define VALIDATE_UNRET_BEGIN ANNOTATE_UNRET_BEGIN +#else +#define VALIDATE_UNRET_BEGIN +#endif + #endif /* _LINUX_OBJTOOL_H */ -- cgit v1.2.3 From c837de3810982cd41cd70e5170da1931439f025c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:01 +0100 Subject: unreachable: Unify Since barrier_before_unreachable() is empty for !GCC it is trivial to unify the two definitions. Less is more. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094311.924381359@infradead.org --- include/linux/compiler-gcc.h | 12 ------------ include/linux/compiler.h | 10 +++++++--- 2 files changed, 7 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d0ed9583743f..c9b58188ec61 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -52,18 +52,6 @@ */ #define barrier_before_unreachable() asm volatile("") -/* - * Mark a position in code as unreachable. This can be used to - * suppress control flow warnings after asm blocks that transfer - * control elsewhere. - */ -#define unreachable() \ - do { \ - annotate_unreachable(); \ - barrier_before_unreachable(); \ - __builtin_unreachable(); \ - } while (0) - #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 469a64dd6495..7be80897a62f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -141,12 +141,16 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ -#ifndef unreachable -# define unreachable() do { \ +/* + * Mark a position in code as unreachable. This can be used to + * suppress control flow warnings after asm blocks that transfer + * control elsewhere. + */ +#define unreachable() do { \ annotate_unreachable(); \ + barrier_before_unreachable(); \ __builtin_unreachable(); \ } while (0) -#endif /* * KENTRY - kernel entry point -- cgit v1.2.3 From 06e24745985c8dd0da18337503afcf2f2fdbdff1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:04 +0100 Subject: objtool: Remove annotate_{,un}reachable() There are no users of annotate_reachable() left. And the annotate_unreachable() usage in unreachable() is plain wrong; it will hide dangerous fall-through code-gen. Remove both. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.235637588@infradead.org --- include/linux/compiler.h | 27 --------------------------- tools/objtool/check.c | 43 ++----------------------------------------- 2 files changed, 2 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7be80897a62f..3d9a0e483e51 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -109,35 +109,9 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Unreachable code */ #ifdef CONFIG_OBJTOOL -/* - * These macros help objtool understand GCC code flow for unreachable code. - * The __COUNTER__ based labels are a hack to make each instance of the macros - * unique, to convince GCC not to merge duplicate inline asm statements. - */ -#define __stringify_label(n) #n - -#define __annotate_reachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t"); \ -}) -#define annotate_reachable() __annotate_reachable(__COUNTER__) - -#define __annotate_unreachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ -}) -#define annotate_unreachable() __annotate_unreachable(__COUNTER__) - /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table,\"a\",@progbits #") - #else /* !CONFIG_OBJTOOL */ -#define annotate_reachable() -#define annotate_unreachable() #define __annotate_jump_table #endif /* CONFIG_OBJTOOL */ @@ -147,7 +121,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * control elsewhere. */ #define unreachable() do { \ - annotate_unreachable(); \ barrier_before_unreachable(); \ __builtin_unreachable(); \ } while (0) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3bea8b2963d3..798cff5bffc4 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -638,47 +638,8 @@ static int add_dead_ends(struct objtool_file *file) uint64_t offset; /* - * Check for manually annotated dead ends. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.unreachable"); - if (!rsec) - goto reachable; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find unreachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = true; - } - -reachable: - /* - * These manually annotated reachable checks are needed for GCC 4.4, - * where the Linux unreachable() macro isn't supported. In that case - * GCC doesn't know the "ud2" is fatal, so it generates code as if it's - * not a dead end. + * UD2 defaults to being a dead-end, allow them to be annotated for + * non-fatal, eg WARN. */ rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); if (!rsec) -- cgit v1.2.3 From e7a174fb43d24adca066e82d1cb9fdee092d48d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:05 +0100 Subject: objtool: Convert {.UN}REACHABLE to ANNOTATE Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.353431347@infradead.org --- arch/loongarch/include/asm/bug.h | 2 +- arch/x86/entry/entry_64.S | 4 +- arch/x86/include/asm/bug.h | 2 +- arch/x86/include/asm/irq_stack.h | 2 +- include/linux/objtool.h | 18 ++++---- include/linux/objtool_types.h | 1 + tools/include/linux/objtool_types.h | 1 + tools/objtool/check.c | 82 ++++++++++--------------------------- 8 files changed, 36 insertions(+), 76 deletions(-) (limited to 'include') diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index 561ac1bf79e2..e25404a93882 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -45,7 +45,7 @@ #define __WARN_FLAGS(flags) \ do { \ instrumentation_begin(); \ - __BUG_FLAGS(BUGFLAG_WARNING|(flags), ASM_REACHABLE); \ + __BUG_FLAGS(BUGFLAG_WARNING|(flags), ANNOTATE_REACHABLE);\ instrumentation_end(); \ } while (0) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 1b5be07f8669..9248660ad409 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -311,7 +311,7 @@ SYM_CODE_END(xen_error_entry) call \cfunc /* For some configurations \cfunc ends up being a noreturn. */ - REACHABLE + ANNOTATE_REACHABLE jmp error_return .endm @@ -532,7 +532,7 @@ SYM_CODE_START(\asmsym) call \cfunc /* For some configurations \cfunc ends up being a noreturn. */ - REACHABLE + ANNOTATE_REACHABLE jmp paranoid_exit diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 806649c7f23d..dd8fb1779d97 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -92,7 +92,7 @@ do { \ do { \ __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \ + _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE); \ instrumentation_end(); \ } while (0) diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index b71ad173f877..5455747ed918 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -101,7 +101,7 @@ #define ASM_CALL_ARG0 \ "call %c[__func] \n" \ - ASM_REACHABLE + ANNOTATE_REACHABLE #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index fd487d466bb2..e3cb13583fba 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -111,14 +111,6 @@ #endif .endm - -.macro REACHABLE -.Lhere_\@: - .pushsection .discard.reachable - .long .Lhere_\@ - .popsection -.endm - .macro ANNOTATE type:req .Lhere_\@: .pushsection .discard.annotate_insn,"M",@progbits,8 @@ -138,14 +130,11 @@ #define STACK_FRAME_NON_STANDARD_FP(func) #define __ASM_ANNOTATE(label, type) #define ASM_ANNOTATE(type) -#define ASM_REACHABLE #else .macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 signal=0 .endm .macro STACK_FRAME_NON_STANDARD func:req .endm -.macro REACHABLE -.endm .macro ANNOTATE type:req .endm #endif @@ -187,6 +176,12 @@ * it will be ignored. */ #define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN) +/* + * This should be used directly after an instruction that is considered + * terminating, like a noreturn CALL or UD2 when we know they are not -- eg + * WARN using UD2. + */ +#define ANNOTATE_REACHABLE ASM_ANNOTATE(ANNOTYPE_REACHABLE) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR @@ -196,6 +191,7 @@ #define ANNOTATE_IGNORE_ALTERNATIVE ANNOTATE type=ANNOTYPE_IGNORE_ALTS #define ANNOTATE_INTRA_FUNCTION_CALL ANNOTATE type=ANNOTYPE_INTRA_FUNCTION_CALL #define ANNOTATE_UNRET_BEGIN ANNOTATE type=ANNOTYPE_UNRET_BEGIN +#define ANNOTATE_REACHABLE ANNOTATE type=ANNOTYPE_REACHABLE #endif #if defined(CONFIG_NOINSTR_VALIDATION) && \ diff --git a/include/linux/objtool_types.h b/include/linux/objtool_types.h index 23d6fb6d04c7..df5d9fa84dba 100644 --- a/include/linux/objtool_types.h +++ b/include/linux/objtool_types.h @@ -64,5 +64,6 @@ struct unwind_hint { #define ANNOTYPE_UNRET_BEGIN 5 #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h index 23d6fb6d04c7..df5d9fa84dba 100644 --- a/tools/include/linux/objtool_types.h +++ b/tools/include/linux/objtool_types.h @@ -64,5 +64,6 @@ struct unwind_hint { #define ANNOTYPE_UNRET_BEGIN 5 #define ANNOTYPE_IGNORE_ALTS 6 #define ANNOTYPE_INTRA_FUNCTION_CALL 7 +#define ANNOTYPE_REACHABLE 8 #endif /* _LINUX_OBJTOOL_TYPES_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 798cff5bffc4..27d0c4153582 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -627,56 +627,6 @@ static struct instruction *find_last_insn(struct objtool_file *file, return insn; } -/* - * Mark "ud2" instructions and manually annotated dead ends. - */ -static int add_dead_ends(struct objtool_file *file) -{ - struct section *rsec; - struct reloc *reloc; - struct instruction *insn; - uint64_t offset; - - /* - * UD2 defaults to being a dead-end, allow them to be annotated for - * non-fatal, eg WARN. - */ - rsec = find_section_by_name(file->elf, ".rela.discard.reachable"); - if (!rsec) - return 0; - - for_each_reloc(rsec, reloc) { - if (reloc->sym->type == STT_SECTION) { - offset = reloc_addend(reloc); - } else if (reloc->sym->local_label) { - offset = reloc->sym->offset; - } else { - WARN("unexpected relocation symbol type in %s", rsec->name); - return -1; - } - - insn = find_insn(file, reloc->sym->sec, offset); - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) { - insn = find_last_insn(file, reloc->sym->sec); - if (!insn) { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - } else { - WARN("can't find reachable insn at %s+0x%" PRIx64, - reloc->sym->sec->name, offset); - return -1; - } - - insn->dead_end = false; - } - - return 0; -} - static int create_static_call_sections(struct objtool_file *file) { struct static_call_site *site; @@ -2306,6 +2256,7 @@ static int read_annotate(struct objtool_file *file, struct section *sec; struct instruction *insn; struct reloc *reloc; + uint64_t offset; int type, ret; sec = find_section_by_name(file->elf, ".discard.annotate_insn"); @@ -2327,8 +2278,19 @@ static int read_annotate(struct objtool_file *file, for_each_reloc(sec->rsec, reloc) { type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); - insn = find_insn(file, reloc->sym->sec, - reloc->sym->offset + reloc_addend(reloc)); + offset = reloc->sym->offset + reloc_addend(reloc); + insn = find_insn(file, reloc->sym->sec, offset); + + /* + * Reachable annotations are 'funneh' and act on the previous instruction :/ + */ + if (type == ANNOTYPE_REACHABLE) { + if (insn) + insn = prev_insn_same_sec(file, insn); + else if (offset == reloc->sym->sec->sh.sh_size) + insn = find_last_insn(file, reloc->sym->sec); + } + if (!insn) { WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; @@ -2420,6 +2382,10 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi insn->unret = 1; break; + case ANNOTYPE_REACHABLE: + insn->dead_end = false; + break; + default: break; } @@ -2566,14 +2532,6 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; - /* - * Must be after add_call_destinations() such that it can override - * dead_end_function() marks. - */ - ret = add_dead_ends(file); - if (ret) - return ret; - ret = add_jump_table_alts(file); if (ret) return ret; @@ -2582,6 +2540,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be after add_call_destinations() such that it can override + * dead_end_function() marks. + */ ret = read_annotate(file, __annotate_late); if (ret) return ret; -- cgit v1.2.3 From 87116ae6da034242baf06e799f9f0e2a8ee6a796 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 28 Nov 2024 10:39:06 +0100 Subject: objtool: Fix ANNOTATE_REACHABLE to be a normal annotation Currently REACHABLE is weird for being on the instruction after the instruction it modifies. Since all REACHABLE annotations have an explicit instruction, flip them around. Signed-off-by: Peter Zijlstra (Intel) Acked-by: Josh Poimboeuf Link: https://lore.kernel.org/r/20241128094312.494176035@infradead.org --- arch/loongarch/include/asm/bug.h | 2 +- arch/x86/entry/entry_64.S | 5 ++--- arch/x86/include/asm/bug.h | 2 +- arch/x86/include/asm/irq_stack.h | 4 ++-- include/linux/objtool.h | 4 ++-- tools/objtool/check.c | 23 ----------------------- 6 files changed, 8 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/arch/loongarch/include/asm/bug.h b/arch/loongarch/include/asm/bug.h index e25404a93882..f6f254f2c5db 100644 --- a/arch/loongarch/include/asm/bug.h +++ b/arch/loongarch/include/asm/bug.h @@ -45,7 +45,7 @@ #define __WARN_FLAGS(flags) \ do { \ instrumentation_begin(); \ - __BUG_FLAGS(BUGFLAG_WARNING|(flags), ANNOTATE_REACHABLE);\ + __BUG_FLAGS(BUGFLAG_WARNING|(flags), ANNOTATE_REACHABLE(10001b));\ instrumentation_end(); \ } while (0) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9248660ad409..f52dbe0ad93c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -308,10 +308,9 @@ SYM_CODE_END(xen_error_entry) movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ .endif - call \cfunc - /* For some configurations \cfunc ends up being a noreturn. */ ANNOTATE_REACHABLE + call \cfunc jmp error_return .endm @@ -529,10 +528,10 @@ SYM_CODE_START(\asmsym) movq %rsp, %rdi /* pt_regs pointer into first argument */ movq ORIG_RAX(%rsp), %rsi /* get error code into 2nd argument*/ movq $-1, ORIG_RAX(%rsp) /* no syscall to restart */ - call \cfunc /* For some configurations \cfunc ends up being a noreturn. */ ANNOTATE_REACHABLE + call \cfunc jmp paranoid_exit diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index dd8fb1779d97..e85ac0c7c039 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -92,7 +92,7 @@ do { \ do { \ __auto_type __flags = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE); \ + _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE(1b)); \ instrumentation_end(); \ } while (0) diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 5455747ed918..562a547c29a5 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -100,8 +100,8 @@ } #define ASM_CALL_ARG0 \ - "call %c[__func] \n" \ - ANNOTATE_REACHABLE + "1: call %c[__func] \n" \ + ANNOTATE_REACHABLE(1b) #define ASM_CALL_ARG1 \ "movq %[arg1], %%rdi \n" \ diff --git a/include/linux/objtool.h b/include/linux/objtool.h index e3cb13583fba..c722a921165b 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -177,11 +177,11 @@ */ #define ANNOTATE_UNRET_BEGIN ASM_ANNOTATE(ANNOTYPE_UNRET_BEGIN) /* - * This should be used directly after an instruction that is considered + * This should be used to refer to an instruction that is considered * terminating, like a noreturn CALL or UD2 when we know they are not -- eg * WARN using UD2. */ -#define ANNOTATE_REACHABLE ASM_ANNOTATE(ANNOTYPE_REACHABLE) +#define ANNOTATE_REACHABLE(label) __ASM_ANNOTATE(label, ANNOTYPE_REACHABLE) #else #define ANNOTATE_NOENDBR ANNOTATE type=ANNOTYPE_NOENDBR diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 27d0c4153582..26bdd3ebf5d2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -614,19 +614,6 @@ static int init_pv_ops(struct objtool_file *file) return 0; } -static struct instruction *find_last_insn(struct objtool_file *file, - struct section *sec) -{ - struct instruction *insn = NULL; - unsigned int offset; - unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - - for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) - insn = find_insn(file, sec, offset); - - return insn; -} - static int create_static_call_sections(struct objtool_file *file) { struct static_call_site *site; @@ -2281,16 +2268,6 @@ static int read_annotate(struct objtool_file *file, offset = reloc->sym->offset + reloc_addend(reloc); insn = find_insn(file, reloc->sym->sec, offset); - /* - * Reachable annotations are 'funneh' and act on the previous instruction :/ - */ - if (type == ANNOTYPE_REACHABLE) { - if (insn) - insn = prev_insn_same_sec(file, insn); - else if (offset == reloc->sym->sec->sh.sh_size) - insn = find_last_insn(file, reloc->sym->sec); - } - if (!insn) { WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type); return -1; -- cgit v1.2.3 From ae5c677729e99b8cb3e6252aaa9b72a92985d203 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Oct 2024 13:52:50 -0400 Subject: sched/core: Remove HK_TYPE_SCHED The HK_TYPE_SCHED housekeeping type is defined but not set anywhere. So any code that try to use HK_TYPE_SCHED are essentially dead code. So remove HK_TYPE_SCHED and any code that use it. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20241030175253.125248-2-longman@redhat.com --- include/linux/sched/isolation.h | 1 - kernel/sched/fair.c | 14 -------------- kernel/sched/isolation.c | 1 - 3 files changed, 16 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 2b461129d1fa..499d5e480882 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -10,7 +10,6 @@ enum hk_type { HK_TYPE_TIMER, HK_TYPE_RCU, HK_TYPE_MISC, - HK_TYPE_SCHED, HK_TYPE_TICK, HK_TYPE_DOMAIN, HK_TYPE_WQ, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4283c818bbd1..ef302263f5b3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -12197,9 +12197,6 @@ static inline int on_null_domain(struct rq *rq) * - When one of the busy CPUs notices that there may be an idle rebalancing * needed, they will kick the idle load balancer, which then does idle * load balancing for all the idle CPUs. - * - * - HK_TYPE_MISC CPUs are used for this task, because HK_TYPE_SCHED is not set - * anywhere yet. */ static inline int find_new_ilb(void) { @@ -12444,10 +12441,6 @@ void nohz_balance_enter_idle(int cpu) if (!cpu_active(cpu)) return; - /* Spare idle load balancing on CPUs that don't want to be disturbed: */ - if (!housekeeping_cpu(cpu, HK_TYPE_SCHED)) - return; - /* * Can be set safely without rq->lock held * If a clear happens, it will have evaluated last additions because @@ -12667,13 +12660,6 @@ static void nohz_newidle_balance(struct rq *this_rq) { int this_cpu = this_rq->cpu; - /* - * This CPU doesn't want to be disturbed by scheduler - * housekeeping - */ - if (!housekeeping_cpu(this_cpu, HK_TYPE_SCHED)) - return; - /* Will wake up very soon. No time for doing anything else*/ if (this_rq->avg_idle < sysctl_sched_migration_cost) return; diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 5891e715f00d..5345e11f3d44 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -12,7 +12,6 @@ enum hk_flags { HK_FLAG_TIMER = BIT(HK_TYPE_TIMER), HK_FLAG_RCU = BIT(HK_TYPE_RCU), HK_FLAG_MISC = BIT(HK_TYPE_MISC), - HK_FLAG_SCHED = BIT(HK_TYPE_SCHED), HK_FLAG_TICK = BIT(HK_TYPE_TICK), HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN), HK_FLAG_WQ = BIT(HK_TYPE_WQ), -- cgit v1.2.3 From 6010d245ddc9f463bbf0311ac49073a78f444755 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Wed, 30 Oct 2024 13:52:52 -0400 Subject: sched/isolation: Consolidate housekeeping cpumasks that are always identical The housekeeping cpumasks are only set by two boot commandline parameters: "nohz_full" and "isolcpus". When there is more than one of "nohz_full" or "isolcpus", the extra ones must have the same CPU list or the setup will fail partially. The HK_TYPE_DOMAIN and HK_TYPE_MANAGED_IRQ types are settable by "isolcpus" only and their settings can be independent of the other types. The other housekeeping types are all set by "nohz_full" or "isolcpus=nohz" without a way to set them individually. So they all have identical cpumasks. There is actually no point in having different cpumasks for these "nohz_full" only housekeeping types. Consolidate these types to use the same cpumask by aliasing them to the same value. If there is a need to set any of them independently in the future, we can break them out to their own cpumasks again. With this change, the number of cpumasks in the housekeeping structure drops from 9 to 3. Other than that, there should be no other functional change. Signed-off-by: Waiman Long Signed-off-by: Peter Zijlstra (Intel) Acked-by: Frederic Weisbecker Link: https://lore.kernel.org/r/20241030175253.125248-4-longman@redhat.com --- include/linux/sched/isolation.h | 20 +++++++++++++------- kernel/sched/isolation.c | 19 ++++++------------- 2 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 499d5e480882..d8501f4709b5 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,15 +7,21 @@ #include enum hk_type { - HK_TYPE_TIMER, - HK_TYPE_RCU, - HK_TYPE_MISC, - HK_TYPE_TICK, HK_TYPE_DOMAIN, - HK_TYPE_WQ, HK_TYPE_MANAGED_IRQ, - HK_TYPE_KTHREAD, - HK_TYPE_MAX + HK_TYPE_KERNEL_NOISE, + HK_TYPE_MAX, + + /* + * The following housekeeping types are only set by the nohz_full + * boot commandline option. So they can share the same value. + */ + HK_TYPE_TICK = HK_TYPE_KERNEL_NOISE, + HK_TYPE_TIMER = HK_TYPE_KERNEL_NOISE, + HK_TYPE_RCU = HK_TYPE_KERNEL_NOISE, + HK_TYPE_MISC = HK_TYPE_KERNEL_NOISE, + HK_TYPE_WQ = HK_TYPE_KERNEL_NOISE, + HK_TYPE_KTHREAD = HK_TYPE_KERNEL_NOISE }; #ifdef CONFIG_CPU_ISOLATION diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 6a686322ce3c..81bc8b329ef1 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -9,14 +9,9 @@ */ enum hk_flags { - HK_FLAG_TIMER = BIT(HK_TYPE_TIMER), - HK_FLAG_RCU = BIT(HK_TYPE_RCU), - HK_FLAG_MISC = BIT(HK_TYPE_MISC), - HK_FLAG_TICK = BIT(HK_TYPE_TICK), HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN), - HK_FLAG_WQ = BIT(HK_TYPE_WQ), HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ), - HK_FLAG_KTHREAD = BIT(HK_TYPE_KTHREAD), + HK_FLAG_KERNEL_NOISE = BIT(HK_TYPE_KERNEL_NOISE), }; DEFINE_STATIC_KEY_FALSE(housekeeping_overridden); @@ -96,7 +91,7 @@ void __init housekeeping_init(void) static_branch_enable(&housekeeping_overridden); - if (housekeeping.flags & HK_FLAG_TICK) + if (housekeeping.flags & HK_FLAG_KERNEL_NOISE) sched_tick_offload_init(); for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) { @@ -120,7 +115,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) unsigned int first_cpu; int err = 0; - if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) { + if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) { if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) { pr_warn("Housekeeping: nohz unsupported." " Build with CONFIG_NO_HZ_FULL\n"); @@ -176,7 +171,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) housekeeping_setup_type(type, housekeeping_staging); } - if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) + if ((flags & HK_FLAG_KERNEL_NOISE) && !(housekeeping.flags & HK_FLAG_KERNEL_NOISE)) tick_nohz_full_setup(non_housekeeping_mask); housekeeping.flags |= flags; @@ -194,8 +189,7 @@ static int __init housekeeping_nohz_full_setup(char *str) { unsigned long flags; - flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | - HK_FLAG_MISC | HK_FLAG_KTHREAD; + flags = HK_FLAG_KERNEL_NOISE; return housekeeping_setup(str, flags); } @@ -214,8 +208,7 @@ static int __init housekeeping_isolcpus_setup(char *str) */ if (!strncmp(str, "nohz,", 5)) { str += 5; - flags |= HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | - HK_FLAG_RCU | HK_FLAG_MISC | HK_FLAG_KTHREAD; + flags |= HK_FLAG_KERNEL_NOISE; continue; } -- cgit v1.2.3 From 484c997e03cec04da6f69c2c17e854b22aa0f98f Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 3 Dec 2024 18:45:34 +0800 Subject: ASoC: sdw_utils: cs_amp: Assign non-overlapping TDM masks for each codec on a bus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use snd_soc_dai_set_tdm_slot() on capture DAIs to prevent multiple aggregated amps from trying to send data at the same time. When the capture DAIs of multiple amps on a bus are aggregated they will all be sharing the same bit slots for transmitted audio. This would lead to bus errors if all channels on all amps were enabled, because multiple amps would be trying to send data at the same time. To prevent this, the available channels are divided between the amps on a bus so that only one amp will be sending data for each channel position. A CS35L56 has 4 TX channels, which must be split between all the amps on a bus so that no two amps are using the same channel. This is done simply by dividing by the number of amps on the bus, so that 1 amp can use all 4 channels, 2 amps can use 2 channels each, and 3 or 4 amps can only use 1 channel each. The amps are usually aggregated across multiple SoundWire buses. In this case there will be multiple cpu DAIs in the dailink. The channel mapping is used to determine which amps are on each bus. The allocation of the 4 channels is done separately for each bus (only amps on the same bus can interfere with each other). Signed-off-by: Richard Fitzgerald Reviewed-by: Péter Ujfalusi Reviewed-by: Liam Girdwood Reviewed-by: Ranjani Sridharan Signed-off-by: Bard Liao Link: https://patch.msgid.link/20241203104534.56719-3-yung-chuan.liao@linux.intel.com Signed-off-by: Mark Brown --- include/sound/soc_sdw_utils.h | 2 ++ sound/soc/sdw_utils/soc_sdw_cs_amp.c | 46 ++++++++++++++++++++++++++++++++++++ sound/soc/sdw_utils/soc_sdw_utils.c | 1 + 3 files changed, 49 insertions(+) (limited to 'include') diff --git a/include/sound/soc_sdw_utils.h b/include/sound/soc_sdw_utils.h index 0e82598e10af..36a4a1e1d8ca 100644 --- a/include/sound/soc_sdw_utils.h +++ b/include/sound/soc_sdw_utils.h @@ -224,6 +224,8 @@ int asoc_sdw_cs_amp_init(struct snd_soc_card *card, struct snd_soc_dai_link *dai_links, struct asoc_sdw_codec_info *info, bool playback); +int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, + struct snd_soc_dai *dai); /* MAXIM codec support */ int asoc_sdw_maxim_init(struct snd_soc_card *card, diff --git a/sound/soc/sdw_utils/soc_sdw_cs_amp.c b/sound/soc/sdw_utils/soc_sdw_cs_amp.c index 58b059b68016..9146a421e02c 100644 --- a/sound/soc/sdw_utils/soc_sdw_cs_amp.c +++ b/sound/soc/sdw_utils/soc_sdw_cs_amp.c @@ -15,6 +15,7 @@ #include #define CODEC_NAME_SIZE 8 +#define CS_AMP_CHANNELS_PER_AMP 4 int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) { @@ -48,6 +49,51 @@ int asoc_sdw_cs_spk_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai } EXPORT_SYMBOL_NS(asoc_sdw_cs_spk_rtd_init, SND_SOC_SDW_UTILS); +int asoc_sdw_cs_spk_feedback_rtd_init(struct snd_soc_pcm_runtime *rtd, struct snd_soc_dai *dai) +{ + const struct snd_soc_dai_link *dai_link = rtd->dai_link; + const struct snd_soc_dai_link_ch_map *ch_map; + const struct snd_soc_dai_link_component *codec_dlc; + struct snd_soc_dai *codec_dai; + u8 ch_slot[8] = {}; + unsigned int amps_per_bus, ch_per_amp, mask; + int i, ret; + + WARN_ON(dai_link->num_cpus > ARRAY_SIZE(ch_slot)); + + /* + * CS35L56 has 4 TX channels. When the capture is aggregated the + * same bus slots will be allocated to all the amps on a bus. Only + * one amp on that bus can be transmitting in each slot so divide + * the available 4 slots between all the amps on a bus. + */ + amps_per_bus = dai_link->num_codecs / dai_link->num_cpus; + if ((amps_per_bus == 0) || (amps_per_bus > CS_AMP_CHANNELS_PER_AMP)) { + dev_err(rtd->card->dev, "Illegal num_codecs:%u / num_cpus:%u\n", + dai_link->num_codecs, dai_link->num_cpus); + return -EINVAL; + } + + ch_per_amp = CS_AMP_CHANNELS_PER_AMP / amps_per_bus; + + for_each_rtd_ch_maps(rtd, i, ch_map) { + codec_dlc = snd_soc_link_to_codec(rtd->dai_link, i); + codec_dai = snd_soc_find_dai(codec_dlc); + mask = GENMASK(ch_per_amp - 1, 0) << ch_slot[ch_map->cpu]; + + ret = snd_soc_dai_set_tdm_slot(codec_dai, 0, mask, 4, 32); + if (ret < 0) { + dev_err(rtd->card->dev, "Failed to set TDM slot:%d\n", ret); + return ret; + } + + ch_slot[ch_map->cpu] += ch_per_amp; + } + + return 0; +} +EXPORT_SYMBOL_NS(asoc_sdw_cs_spk_feedback_rtd_init, SND_SOC_SDW_UTILS); + int asoc_sdw_cs_amp_init(struct snd_soc_card *card, struct snd_soc_dai_link *dai_links, struct asoc_sdw_codec_info *info, diff --git a/sound/soc/sdw_utils/soc_sdw_utils.c b/sound/soc/sdw_utils/soc_sdw_utils.c index 111e00e916c5..375b46c5b429 100644 --- a/sound/soc/sdw_utils/soc_sdw_utils.c +++ b/sound/soc/sdw_utils/soc_sdw_utils.c @@ -504,6 +504,7 @@ struct asoc_sdw_codec_info codec_info_list[] = { .dai_name = "cs35l56-sdw1c", .dai_type = SOC_SDW_DAI_TYPE_AMP, .dailink = {SOC_SDW_UNUSED_DAI_ID, SOC_SDW_AMP_IN_DAI_ID}, + .rtd_init = asoc_sdw_cs_spk_feedback_rtd_init, }, }, .dai_num = 2, -- cgit v1.2.3 From e500d497c16c29304907f5de8bc79a8d4904c3e9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Nov 2024 19:37:56 -0800 Subject: usb: gadget: functionfs: fix spellos Fix typos in documentation as reported by codespell. Fixes: f0175ab51993 ("usb: gadget: f_fs: OS descriptors support") Fixes: ddf8abd25994 ("USB: f_fs: the FunctionFS driver") Signed-off-by: Randy Dunlap Cc: Michal Nazarewicz Cc: Andrzej Pietrasiewicz Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Link: https://lore.kernel.org/r/20241128033756.373517-1-rdunlap@infradead.org Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/usb/functionfs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/usb/functionfs.h b/include/uapi/linux/usb/functionfs.h index 2ebdba111a8f..beef1752e36e 100644 --- a/include/uapi/linux/usb/functionfs.h +++ b/include/uapi/linux/usb/functionfs.h @@ -206,7 +206,7 @@ struct usb_ffs_dmabuf_transfer_req { * +-----+-----------------+------+--------------------------+ * | off | name | type | description | * +-----+-----------------+------+--------------------------+ - * | 0 | inteface | U8 | related interface number | + * | 0 | interface | U8 | related interface number | * +-----+-----------------+------+--------------------------+ * | 1 | dwLength | U32 | length of the descriptor | * +-----+-----------------+------+--------------------------+ @@ -224,7 +224,7 @@ struct usb_ffs_dmabuf_transfer_req { * +-----+-----------------+------+--------------------------+ * | off | name | type | description | * +-----+-----------------+------+--------------------------+ - * | 0 | inteface | U8 | related interface number | + * | 0 | interface | U8 | related interface number | * +-----+-----------------+------+--------------------------+ * | 1 | dwLength | U32 | length of the descriptor | * +-----+-----------------+------+--------------------------+ @@ -237,7 +237,7 @@ struct usb_ffs_dmabuf_transfer_req { * | 11 | ExtProp[] | | list of ext. prop. d. | * +-----+-----------------+------+--------------------------+ * - * ExtCompat[] is an array of valid Extended Compatiblity descriptors + * ExtCompat[] is an array of valid Extended Compatibility descriptors * which have the following format: * * +-----+-----------------------+------+-------------------------------------+ @@ -295,7 +295,7 @@ struct usb_functionfs_strings_head { * | 16 | stringtab | StringTab[lang_count] | table of strings per lang | * * For each language there is one stringtab entry (ie. there are lang_count - * stringtab entires). Each StringTab has following format: + * stringtab entries). Each StringTab has following format: * * | off | name | type | description | * |-----+---------+-------------------+------------------------------------| -- cgit v1.2.3 From f42d22d3f79639c1b4e41daf28dad2505d6a5a8b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 22 Nov 2024 09:42:25 +0100 Subject: wifi: cfg80211: define and use wiphy guard Define a guard for the wiphy mutex, and use it in most code in cfg80211, though not all due to some interaction with RTNL and/or indentation. Suggested-by: Jeff Johnson Reviewed-by: Jeff Johnson Signed-off-by: Johannes Berg Link: https://patch.msgid.link/20241122094225.88765cbaab65.I610c9b14f36902e75e1d13f0db29f8bef2298804@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 + net/wireless/chan.c | 4 +- net/wireless/core.c | 42 +++--- net/wireless/mlme.c | 8 +- net/wireless/nl80211.c | 190 ++++++++++----------------- net/wireless/pmsr.c | 4 +- net/wireless/reg.c | 55 ++++---- net/wireless/scan.c | 40 +++--- net/wireless/sme.c | 12 +- net/wireless/util.c | 7 +- net/wireless/wext-compat.c | 317 ++++++++++++++++----------------------------- net/wireless/wext-sme.c | 43 +++--- 12 files changed, 278 insertions(+), 448 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 27acf1292a5c..63e79a22a214 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -6031,6 +6031,10 @@ static inline void wiphy_unlock(struct wiphy *wiphy) mutex_unlock(&wiphy->mtx); } +DEFINE_GUARD(wiphy, struct wiphy *, + mutex_lock(&_T->mtx), + mutex_unlock(&_T->mtx)) + struct wiphy_work; typedef void (*wiphy_work_func_t)(struct wiphy *, struct wiphy_work *); diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 40b6375a5de4..833ea73053a4 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1039,10 +1039,10 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) || cfg80211_offchan_chain_is_active(rdev, chan); - wiphy_unlock(&rdev->wiphy); if (found) return true; diff --git a/net/wireless/core.c b/net/wireless/core.c index afbdc549fb4a..70857018f020 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -191,7 +191,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, return err; } - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) continue; @@ -212,7 +213,6 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev, continue; nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); } - wiphy_unlock(&rdev->wiphy); return 0; } @@ -221,9 +221,9 @@ static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) { struct cfg80211_registered_device *rdev = data; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + rdev_rfkill_poll(rdev); - wiphy_unlock(&rdev->wiphy); } void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, @@ -283,7 +283,7 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) /* otherwise, check iftype */ - wiphy_lock(wiphy); + guard(wiphy)(wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: @@ -295,8 +295,6 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) default: break; } - - wiphy_unlock(wiphy); } } EXPORT_SYMBOL_GPL(cfg80211_shutdown_all_interfaces); @@ -331,9 +329,9 @@ static void cfg80211_event_work(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, event_work); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + cfg80211_process_rdev_events(rdev); - wiphy_unlock(&rdev->wiphy); } void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) @@ -347,10 +345,10 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) if (wdev->netdev) dev_close(wdev->netdev); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + cfg80211_leave(rdev, wdev); cfg80211_remove_virtual_intf(rdev, wdev); - wiphy_unlock(&rdev->wiphy); } } } @@ -423,9 +421,9 @@ static void cfg80211_wiphy_work(struct work_struct *work) trace_wiphy_work_worker_start(&rdev->wiphy); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); if (rdev->suspended) - goto out; + return; spin_lock_irq(&rdev->wiphy_work_lock); wk = list_first_entry_or_null(&rdev->wiphy_work_list, @@ -441,8 +439,6 @@ static void cfg80211_wiphy_work(struct work_struct *work) } else { spin_unlock_irq(&rdev->wiphy_work_lock); } -out: - wiphy_unlock(&rdev->wiphy); } /* exported functions */ @@ -1526,9 +1522,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, break; case NETDEV_REGISTER: if (!wdev->registered) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + cfg80211_register_wdev(rdev, wdev); - wiphy_unlock(&rdev->wiphy); } break; case NETDEV_UNREGISTER: @@ -1537,16 +1533,16 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * so check wdev->registered. */ if (wdev->registered && !wdev->registering) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + _cfg80211_unregister_wdev(wdev, false); - wiphy_unlock(&rdev->wiphy); } break; case NETDEV_GOING_DOWN: - wiphy_lock(&rdev->wiphy); - cfg80211_leave(rdev, wdev); - cfg80211_remove_links(wdev); - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + cfg80211_leave(rdev, wdev); + cfg80211_remove_links(wdev); + } /* since we just did cfg80211_leave() nothing to do there */ cancel_work_sync(&wdev->disconnect_wk); cancel_work_sync(&wdev->pmsr_free_wk); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index a5eb92d93074..9d577523462d 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -627,10 +627,10 @@ void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, mgmt_registrations_update_wk); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) cfg80211_mgmt_registrations_update(wdev); - wiphy_unlock(&rdev->wiphy); } int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, @@ -1193,10 +1193,10 @@ cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, chandef, event); - wiphy_unlock(&rdev->wiphy); } void cfg80211_background_cac_done_wk(struct work_struct *work) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9d2edb71f981..9590f9bd2ec0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3626,7 +3626,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } else wdev = netdev->ieee80211_ptr; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); /* * end workaround code, by now the rdev is available @@ -3639,32 +3639,24 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rtnl_unlock(); if (result) - goto out; + return result; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; - if (!rdev->ops->set_txq_params) { - result = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_txq_params) + return -EOPNOTSUPP; - if (!netdev) { - result = -EINVAL; - goto out; - } + if (!netdev) + return -EINVAL; if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && - netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { - result = -EINVAL; - goto out; - } + netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; - if (!netif_running(netdev)) { - result = -ENETDOWN; - goto out; - } + if (!netif_running(netdev)) + return -ENETDOWN; nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], @@ -3675,10 +3667,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) txq_params_policy, info->extack); if (result) - goto out; + return result; + result = parse_txq_params(tb, &txq_params); if (result) - goto out; + return result; txq_params.link_id = nl80211_link_id_or_invalid(info->attrs); @@ -3694,7 +3687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = rdev_set_txq_params(rdev, netdev, &txq_params); if (result) - goto out; + return result; } } @@ -3711,7 +3704,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (result) - goto out; + return result; } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { @@ -3722,19 +3715,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) txp_wdev = NULL; - if (!rdev->ops->set_tx_power) { - result = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_tx_power) + return -EOPNOTSUPP; idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; type = nla_get_u32(info->attrs[idx]); if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && - (type != NL80211_TX_POWER_AUTOMATIC)) { - result = -EINVAL; - goto out; - } + (type != NL80211_TX_POWER_AUTOMATIC)) + return -EINVAL; if (type != NL80211_TX_POWER_AUTOMATIC) { idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; @@ -3743,7 +3732,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) - goto out; + return result; } if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && @@ -3752,10 +3741,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if ((!rdev->wiphy.available_antennas_tx && !rdev->wiphy.available_antennas_rx) || - !rdev->ops->set_antenna) { - result = -EOPNOTSUPP; - goto out; - } + !rdev->ops->set_antenna) + return -EOPNOTSUPP; tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); @@ -3763,17 +3750,15 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) /* reject antenna configurations which don't match the * available antenna masks, except for the "all" mask */ if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || - (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) { - result = -EINVAL; - goto out; - } + (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) + return -EINVAL; tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev_set_antenna(rdev, tx_ant, rx_ant); if (result) - goto out; + return result; } changed = 0; @@ -3795,10 +3780,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { frag_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); - if (frag_threshold < 256) { - result = -EINVAL; - goto out; - } + if (frag_threshold < 256) + return -EINVAL; if (frag_threshold != (u32) -1) { /* @@ -3819,10 +3802,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { - if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { - result = -EINVAL; - goto out; - } + if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) + return -EINVAL; coverage_class = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); @@ -3830,20 +3811,17 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { - if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) { - result = -EOPNOTSUPP; - goto out; - } + if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) + return -EOPNOTSUPP; changed |= WIPHY_PARAM_DYN_ACK; } if (info->attrs[NL80211_ATTR_TXQ_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) { - result = -EOPNOTSUPP; - goto out; - } + NL80211_EXT_FEATURE_TXQS)) + return -EOPNOTSUPP; + txq_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_LIMIT]); changed |= WIPHY_PARAM_TXQ_LIMIT; @@ -3851,10 +3829,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) { - result = -EOPNOTSUPP; - goto out; - } + NL80211_EXT_FEATURE_TXQS)) + return -EOPNOTSUPP; + txq_memory_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]); changed |= WIPHY_PARAM_TXQ_MEMORY_LIMIT; @@ -3862,10 +3839,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_TXQ_QUANTUM]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_TXQS)) { - result = -EOPNOTSUPP; - goto out; - } + NL80211_EXT_FEATURE_TXQS)) + return -EOPNOTSUPP; + txq_quantum = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_QUANTUM]); changed |= WIPHY_PARAM_TXQ_QUANTUM; @@ -3877,10 +3853,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) u8 old_coverage_class; u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum; - if (!rdev->ops->set_wiphy_params) { - result = -EOPNOTSUPP; - goto out; - } + if (!rdev->ops->set_wiphy_params) + return -EOPNOTSUPP; old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; @@ -3918,15 +3892,11 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) rdev->wiphy.txq_limit = old_txq_limit; rdev->wiphy.txq_memory_limit = old_txq_memory_limit; rdev->wiphy.txq_quantum = old_txq_quantum; - goto out; + return result; } } - result = 0; - -out: - wiphy_unlock(&rdev->wiphy); - return result; + return 0; } int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef) @@ -4144,22 +4114,22 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx = 0; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; } + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, - NL80211_CMD_NEW_INTERFACE) < 0) { - wiphy_unlock(&rdev->wiphy); + NL80211_CMD_NEW_INTERFACE) < 0) goto out; - } + if_idx++; } - wiphy_unlock(&rdev->wiphy); if_start = 0; wp_idx++; @@ -4517,16 +4487,13 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - int ret; /* to avoid failing a new interface creation due to pending removal */ cfg80211_destroy_ifaces(rdev); - wiphy_lock(&rdev->wiphy); - ret = _nl80211_new_interface(skb, info); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + return _nl80211_new_interface(skb, info); } static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) @@ -10098,7 +10065,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; - int err = -EINVAL; + int err; flush_delayed_work(&rdev->dfs_update_channels_wk); @@ -10113,35 +10080,29 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, return -EINVAL; } - wiphy_lock(wiphy); + guard(wiphy)(wiphy); dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) - goto unlock; + return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) - goto unlock; + return err; err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) - goto unlock; + return err; - if (err == 0) { - err = -EINVAL; - goto unlock; - } + if (err == 0) + return -EINVAL; - if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) { - err = -EINVAL; - goto unlock; - } + if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) + return -EINVAL; - if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) { - err = cfg80211_start_background_radar_detection(rdev, wdev, - &chandef); - goto unlock; - } + if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) + return cfg80211_start_background_radar_detection(rdev, wdev, + &chandef); if (cfg80211_beaconing_iface_active(wdev)) { /* During MLO other link(s) can beacon, only the current link @@ -10151,26 +10112,19 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, !wdev->links[link_id].ap.beacon_interval) { /* nothing */ } else { - err = -EBUSY; - goto unlock; + return -EBUSY; } } - if (wdev->links[link_id].cac_started) { - err = -EBUSY; - goto unlock; - } + if (wdev->links[link_id].cac_started) + return -EBUSY; /* CAC start is offloaded to HW and can't be started manually */ - if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) { - err = -EOPNOTSUPP; - goto unlock; - } + if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) + return -EOPNOTSUPP; - if (!rdev->ops->start_radar_detection) { - err = -EOPNOTSUPP; - goto unlock; - } + if (!rdev->ops->start_radar_detection) + return -EOPNOTSUPP; cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) @@ -10197,10 +10151,8 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->links[link_id].cac_start_time = jiffies; wdev->links[link_id].cac_time_ms = cac_time_ms; } -unlock: - wiphy_unlock(wiphy); - return err; + return 0; } static int nl80211_notify_radar_detection(struct sk_buff *skb, diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 0396fa19bdf1..a117f5093ca2 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -630,9 +630,9 @@ void cfg80211_pmsr_free_wk(struct work_struct *work) struct wireless_dev *wdev = container_of(work, struct wireless_dev, pmsr_free_wk); - wiphy_lock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); + cfg80211_pmsr_process_abort(wdev); - wiphy_unlock(wdev->wiphy); } void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1df65a5a44f7..2dd0533e7660 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2465,11 +2465,11 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - wiphy_lock(wiphy); + guard(wiphy)(wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) if (!reg_wdev_chan_valid(wiphy, wdev)) cfg80211_leave(rdev, wdev); - wiphy_unlock(wiphy); } static void reg_check_chans_work(struct work_struct *work) @@ -2649,13 +2649,11 @@ void wiphy_apply_custom_regulatory(struct wiphy *wiphy, return; rtnl_lock(); - wiphy_lock(wiphy); - - tmp = get_wiphy_regdom(wiphy); - rcu_assign_pointer(wiphy->regd, new_regd); - rcu_free_regdom(tmp); - - wiphy_unlock(wiphy); + scoped_guard(wiphy, wiphy) { + tmp = get_wiphy_regdom(wiphy); + rcu_assign_pointer(wiphy->regd, new_regd); + rcu_free_regdom(tmp); + } rtnl_unlock(); } EXPORT_SYMBOL(wiphy_apply_custom_regulatory); @@ -2825,9 +2823,9 @@ reg_process_hint_driver(struct wiphy *wiphy, tmp = get_wiphy_regdom(wiphy); ASSERT_RTNL(); - wiphy_lock(wiphy); - rcu_assign_pointer(wiphy->regd, regd); - wiphy_unlock(wiphy); + scoped_guard(wiphy, wiphy) { + rcu_assign_pointer(wiphy->regd, regd); + } rcu_free_regdom(tmp); } @@ -3205,9 +3203,9 @@ static void reg_process_self_managed_hints(void) ASSERT_RTNL(); for_each_rdev(rdev) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + reg_process_self_managed_hint(&rdev->wiphy); - wiphy_unlock(&rdev->wiphy); } reg_check_channels(); @@ -3600,14 +3598,12 @@ static bool is_wiphy_all_set_reg_flag(enum ieee80211_regulatory_flags flag) struct wireless_dev *wdev; for_each_rdev(rdev) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - if (!(wdev->wiphy->regulatory_flags & flag)) { - wiphy_unlock(&rdev->wiphy); + if (!(wdev->wiphy->regulatory_flags & flag)) return false; - } } - wiphy_unlock(&rdev->wiphy); } return true; @@ -3883,19 +3879,18 @@ static int reg_set_rd_driver(const struct ieee80211_regdomain *rd, if (!driver_request->intersect) { ASSERT_RTNL(); - wiphy_lock(request_wiphy); - if (request_wiphy->regd) - tmp = get_wiphy_regdom(request_wiphy); - - regd = reg_copy_regd(rd); - if (IS_ERR(regd)) { - wiphy_unlock(request_wiphy); - return PTR_ERR(regd); + scoped_guard(wiphy, request_wiphy) { + if (request_wiphy->regd) + tmp = get_wiphy_regdom(request_wiphy); + + regd = reg_copy_regd(rd); + if (IS_ERR(regd)) + return PTR_ERR(regd); + + rcu_assign_pointer(request_wiphy->regd, regd); + rcu_free_regdom(tmp); } - rcu_assign_pointer(request_wiphy->regd, regd); - rcu_free_regdom(tmp); - wiphy_unlock(request_wiphy); reset_regdomains(false, rd); return 0; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 1c6fd45aa809..d056248c43d2 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1238,7 +1238,8 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work) rdev = container_of(work, struct cfg80211_registered_device, sched_scan_res_wk); - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) { if (req->report_results) { req->report_results = false; @@ -1253,7 +1254,6 @@ void cfg80211_sched_scan_results_wk(struct work_struct *work) NL80211_CMD_SCHED_SCAN_RESULTS); } } - wiphy_unlock(&rdev->wiphy); } void cfg80211_sched_scan_results(struct wiphy *wiphy, u64 reqid) @@ -1288,9 +1288,9 @@ EXPORT_SYMBOL(cfg80211_sched_scan_stopped_locked); void cfg80211_sched_scan_stopped(struct wiphy *wiphy, u64 reqid) { - wiphy_lock(wiphy); + guard(wiphy)(wiphy); + cfg80211_sched_scan_stopped_locked(wiphy, reqid); - wiphy_unlock(wiphy); } EXPORT_SYMBOL(cfg80211_sched_scan_stopped); @@ -3565,10 +3565,8 @@ int cfg80211_wext_siwscan(struct net_device *dev, /* translate "Scan for SSID" request */ if (wreq) { if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { - if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) { - err = -EINVAL; - goto out; - } + if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); creq->ssids[0].ssid_len = wreq->essid_len; } @@ -3584,20 +3582,20 @@ int cfg80211_wext_siwscan(struct net_device *dev, eth_broadcast_addr(creq->bssid); - wiphy_lock(&rdev->wiphy); - - rdev->scan_req = creq; - err = rdev_scan(rdev, creq); - if (err) { - rdev->scan_req = NULL; - /* creq will be freed below */ - } else { - nl80211_send_scan_start(rdev, dev->ieee80211_ptr); - /* creq now owned by driver */ - creq = NULL; - dev_hold(dev); + scoped_guard(wiphy, &rdev->wiphy) { + rdev->scan_req = creq; + err = rdev_scan(rdev, creq); + if (err) { + rdev->scan_req = NULL; + /* creq will be freed below */ + } else { + nl80211_send_scan_start(rdev, dev->ieee80211_ptr); + /* creq now owned by driver */ + creq = NULL; + dev_hold(dev); + } } - wiphy_unlock(&rdev->wiphy); + out: kfree(creq); return err; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 431da30817a6..7a734c8085af 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -251,7 +251,7 @@ void cfg80211_conn_work(struct work_struct *work) u8 bssid_buf[ETH_ALEN], *bssid = NULL; enum nl80211_timeout_reason treason; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (!wdev->netdev) @@ -279,8 +279,6 @@ void cfg80211_conn_work(struct work_struct *work) __cfg80211_connect_result(wdev->netdev, &cr, false); } } - - wiphy_unlock(&rdev->wiphy); } static void cfg80211_step_auth_next(struct cfg80211_conn *conn, @@ -692,13 +690,13 @@ static bool cfg80211_is_all_idle(void) * as chan dfs state, etc. */ for_each_rdev(rdev) { - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; } - wiphy_unlock(&rdev->wiphy); } return is_all_idle; @@ -1582,7 +1580,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) container_of(work, struct wireless_dev, disconnect_wk); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - wiphy_lock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); if (wdev->conn_owner_nlportid) { switch (wdev->iftype) { @@ -1618,6 +1616,4 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) break; } } - - wiphy_unlock(wdev->wiphy); } diff --git a/net/wireless/util.c b/net/wireless/util.c index 040d62051eb9..c7c6261c9146 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2572,7 +2572,6 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; - int ret; wdev = dev->ieee80211_ptr; if (!wdev) @@ -2584,11 +2583,9 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, memset(sinfo, 0, sizeof(*sinfo)); - wiphy_lock(&rdev->wiphy); - ret = rdev_get_station(rdev, dev, mac_addr, sinfo); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + return rdev_get_station(rdev, dev, mac_addr, sinfo); } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 90d5c0592667..687f93664d1f 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -39,7 +39,6 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; - int ret; rdev = wiphy_to_rdev(wdev->wiphy); @@ -62,11 +61,9 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, memset(&vifparams, 0, sizeof(vifparams)); - wiphy_lock(wdev->wiphy); - ret = cfg80211_change_iface(rdev, dev, type, &vifparams); - wiphy_unlock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); - return ret; + return cfg80211_change_iface(rdev, dev, type, &vifparams); } int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, @@ -258,23 +255,17 @@ int cfg80211_wext_siwrts(struct net_device *dev, u32 orts = wdev->wiphy->rts_threshold; int err; - wiphy_lock(&rdev->wiphy); - if (rts->disabled || !rts->fixed) { + guard(wiphy)(&rdev->wiphy); + if (rts->disabled || !rts->fixed) wdev->wiphy->rts_threshold = (u32) -1; - } else if (rts->value < 0) { - err = -EINVAL; - goto out; - } else { + else if (rts->value < 0) + return -EINVAL; + else wdev->wiphy->rts_threshold = rts->value; - } err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_RTS_THRESHOLD); - if (err) wdev->wiphy->rts_threshold = orts; - -out: - wiphy_unlock(&rdev->wiphy); return err; } @@ -302,12 +293,12 @@ int cfg80211_wext_siwfrag(struct net_device *dev, u32 ofrag = wdev->wiphy->frag_threshold; int err; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + if (frag->disabled || !frag->fixed) { wdev->wiphy->frag_threshold = (u32) -1; } else if (frag->value < 256) { - err = -EINVAL; - goto out; + return -EINVAL; } else { /* Fragment length must be even, so strip LSB. */ wdev->wiphy->frag_threshold = frag->value & ~0x1; @@ -316,9 +307,6 @@ int cfg80211_wext_siwfrag(struct net_device *dev, err = rdev_set_wiphy_params(rdev, WIPHY_PARAM_FRAG_THRESHOLD); if (err) wdev->wiphy->frag_threshold = ofrag; -out: - wiphy_unlock(&rdev->wiphy); - return err; } @@ -352,7 +340,8 @@ static int cfg80211_wext_siwretry(struct net_device *dev, (retry->flags & IW_RETRY_TYPE) != IW_RETRY_LIMIT) return -EINVAL; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + if (retry->flags & IW_RETRY_LONG) { wdev->wiphy->retry_long = retry->value; changed |= WIPHY_PARAM_RETRY_LONG; @@ -371,7 +360,6 @@ static int cfg80211_wext_siwretry(struct net_device *dev, wdev->wiphy->retry_short = oshort; wdev->wiphy->retry_long = olong; } - wiphy_unlock(&rdev->wiphy); return err; } @@ -578,9 +566,9 @@ static int cfg80211_wext_siwencode(struct net_device *dev, struct iw_point *erq = &wrqu->encoding; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int idx, err; - bool remove = false; struct key_params params; + bool remove = false; + int idx; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) @@ -592,11 +580,9 @@ static int cfg80211_wext_siwencode(struct net_device *dev, !rdev->ops->set_default_key) return -EOPNOTSUPP; - wiphy_lock(&rdev->wiphy); - if (wdev->valid_links) { - err = -EOPNOTSUPP; - goto out; - } + guard(wiphy)(&rdev->wiphy); + if (wdev->valid_links) + return -EOPNOTSUPP; idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { @@ -604,8 +590,7 @@ static int cfg80211_wext_siwencode(struct net_device *dev, if (idx < 0) idx = 0; } else if (idx < 1 || idx > 4) { - err = -EINVAL; - goto out; + return -EINVAL; } else { idx--; } @@ -614,7 +599,8 @@ static int cfg80211_wext_siwencode(struct net_device *dev, remove = true; else if (erq->length == 0) { /* No key data - just set the default TX key index */ - err = 0; + int err = 0; + if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) @@ -622,28 +608,22 @@ static int cfg80211_wext_siwencode(struct net_device *dev, true); if (!err) wdev->wext.default_key = idx; - goto out; + return err; } memset(¶ms, 0, sizeof(params)); params.key = keybuf; params.key_len = erq->length; - if (erq->length == 5) { + if (erq->length == 5) params.cipher = WLAN_CIPHER_SUITE_WEP40; - } else if (erq->length == 13) { + else if (erq->length == 13) params.cipher = WLAN_CIPHER_SUITE_WEP104; - } else if (!remove) { - err = -EINVAL; - goto out; - } - - err = cfg80211_set_encryption(rdev, dev, false, NULL, remove, - wdev->wext.default_key == -1, - idx, ¶ms); -out: - wiphy_unlock(&rdev->wiphy); + else if (!remove) + return -EINVAL; - return err; + return cfg80211_set_encryption(rdev, dev, false, NULL, remove, + wdev->wext.default_key == -1, + idx, ¶ms); } static int cfg80211_wext_siwencodeext(struct net_device *dev, @@ -659,7 +639,6 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, bool remove = false; struct key_params params; u32 cipher; - int ret; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_ADHOC) @@ -734,16 +713,13 @@ static int cfg80211_wext_siwencodeext(struct net_device *dev, params.seq_len = 6; } - wiphy_lock(wdev->wiphy); - ret = cfg80211_set_encryption( - rdev, dev, - !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), - addr, remove, - ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, - idx, ¶ms); - wiphy_unlock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); - return ret; + return cfg80211_set_encryption(rdev, dev, + !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), + addr, remove, + ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, + idx, ¶ms); } static int cfg80211_wext_giwencode(struct net_device *dev, @@ -794,61 +770,41 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = { .width = NL80211_CHAN_WIDTH_20_NOHT, }; - int freq, ret; + int freq; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); - break; + return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); - break; + return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra); case NL80211_IFTYPE_MONITOR: freq = cfg80211_wext_freq(wextfreq); - if (freq < 0) { - ret = freq; - break; - } - if (freq == 0) { - ret = -EINVAL; - break; - } + if (freq < 0) + return freq; + if (freq == 0) + return -EINVAL; + chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); - if (!chandef.chan) { - ret = -EINVAL; - break; - } - ret = cfg80211_set_monitor_channel(rdev, dev, &chandef); - break; + if (!chandef.chan) + return -EINVAL; + return cfg80211_set_monitor_channel(rdev, dev, &chandef); case NL80211_IFTYPE_MESH_POINT: freq = cfg80211_wext_freq(wextfreq); - if (freq < 0) { - ret = freq; - break; - } - if (freq == 0) { - ret = -EINVAL; - break; - } + if (freq < 0) + return freq; + if (freq == 0) + return -EINVAL; chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); - if (!chandef.chan) { - ret = -EINVAL; - break; - } - ret = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - break; + if (!chandef.chan) + return -EINVAL; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_giwfreq(struct net_device *dev, @@ -861,35 +817,26 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, struct cfg80211_chan_def chandef = {}; int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); - break; + return cfg80211_mgd_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); - break; + return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra); case NL80211_IFTYPE_MONITOR: - if (!rdev->ops->get_channel) { - ret = -EINVAL; - break; - } + if (!rdev->ops->get_channel) + return -EINVAL; ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret) - break; + return ret; freq->m = chandef.chan->center_freq; freq->e = 6; - ret = 0; - break; + return ret; default: - ret = -EINVAL; - break; + return -EINVAL; } - - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_siwtxpower(struct net_device *dev, @@ -900,7 +847,6 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); enum nl80211_tx_power_setting type; int dbm = 0; - int ret; if ((data->txpower.flags & IW_TXPOW_TYPE) != IW_TXPOW_DBM) return -EINVAL; @@ -942,11 +888,9 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - wiphy_lock(&rdev->wiphy); - ret = rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); } static int cfg80211_wext_giwtxpower(struct net_device *dev, @@ -965,9 +909,9 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; - wiphy_lock(&rdev->wiphy); - err = rdev_get_tx_power(rdev, wdev, &val); - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + err = rdev_get_tx_power(rdev, wdev, &val); + } if (err) return err; @@ -1209,9 +1153,9 @@ static int cfg80211_wext_siwpower(struct net_device *dev, timeout = wrq->value / 1000; } - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + err = rdev_set_power_mgmt(rdev, dev, ps, timeout); - wiphy_unlock(&rdev->wiphy); if (err) return err; @@ -1244,8 +1188,8 @@ static int cfg80211_wext_siwrate(struct net_device *dev, struct cfg80211_bitrate_mask mask; u32 fixed, maxrate; struct ieee80211_supported_band *sband; - int band, ridx, ret; bool match = false; + int band, ridx; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; @@ -1283,14 +1227,12 @@ static int cfg80211_wext_siwrate(struct net_device *dev, if (!match) return -EINVAL; - wiphy_lock(&rdev->wiphy); - if (dev->ieee80211_ptr->valid_links) - ret = -EOPNOTSUPP; - else - ret = rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); - wiphy_unlock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); - return ret; + if (dev->ieee80211_ptr->valid_links) + return -EOPNOTSUPP; + + return rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); } static int cfg80211_wext_giwrate(struct net_device *dev, @@ -1319,9 +1261,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev, if (err) return err; - wiphy_lock(&rdev->wiphy); - err = rdev_get_station(rdev, dev, addr, &sinfo); - wiphy_unlock(&rdev->wiphy); + scoped_guard(wiphy, &rdev->wiphy) { + err = rdev_get_station(rdev, dev, addr, &sinfo); + } if (err) return err; @@ -1420,23 +1362,17 @@ static int cfg80211_wext_siwap(struct net_device *dev, struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); - break; + return cfg80211_ibss_wext_siwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); - break; + return cfg80211_mgd_wext_siwap(dev, info, ap_addr, extra); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_giwap(struct net_device *dev, @@ -1446,23 +1382,17 @@ static int cfg80211_wext_giwap(struct net_device *dev, struct sockaddr *ap_addr = &wrqu->ap_addr; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); - break; + return cfg80211_ibss_wext_giwap(dev, info, ap_addr, extra); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); - break; + return cfg80211_mgd_wext_giwap(dev, info, ap_addr, extra); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_siwessid(struct net_device *dev, @@ -1472,23 +1402,17 @@ static int cfg80211_wext_siwessid(struct net_device *dev, struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_siwessid(dev, info, data, ssid); - break; + return cfg80211_ibss_wext_siwessid(dev, info, data, ssid); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_siwessid(dev, info, data, ssid); - break; + return cfg80211_mgd_wext_siwessid(dev, info, data, ssid); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_giwessid(struct net_device *dev, @@ -1498,26 +1422,20 @@ static int cfg80211_wext_giwessid(struct net_device *dev, struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); - int ret; data->flags = 0; data->length = 0; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: - ret = cfg80211_ibss_wext_giwessid(dev, info, data, ssid); - break; + return cfg80211_ibss_wext_giwessid(dev, info, data, ssid); case NL80211_IFTYPE_STATION: - ret = cfg80211_mgd_wext_giwessid(dev, info, data, ssid); - break; + return cfg80211_mgd_wext_giwessid(dev, info, data, ssid); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static int cfg80211_wext_siwpmksa(struct net_device *dev, @@ -1528,7 +1446,6 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_pmksa cfg_pmksa; struct iw_pmksa *pmksa = (struct iw_pmksa *)extra; - int ret; memset(&cfg_pmksa, 0, sizeof(struct cfg80211_pmksa)); @@ -1538,39 +1455,27 @@ static int cfg80211_wext_siwpmksa(struct net_device *dev, cfg_pmksa.bssid = pmksa->bssid.sa_data; cfg_pmksa.pmkid = pmksa->pmkid; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (pmksa->cmd) { case IW_PMKSA_ADD: - if (!rdev->ops->set_pmksa) { - ret = -EOPNOTSUPP; - break; - } + if (!rdev->ops->set_pmksa) + return -EOPNOTSUPP; - ret = rdev_set_pmksa(rdev, dev, &cfg_pmksa); - break; + return rdev_set_pmksa(rdev, dev, &cfg_pmksa); case IW_PMKSA_REMOVE: - if (!rdev->ops->del_pmksa) { - ret = -EOPNOTSUPP; - break; - } + if (!rdev->ops->del_pmksa) + return -EOPNOTSUPP; - ret = rdev_del_pmksa(rdev, dev, &cfg_pmksa); - break; + return rdev_del_pmksa(rdev, dev, &cfg_pmksa); case IW_PMKSA_FLUSH: - if (!rdev->ops->flush_pmksa) { - ret = -EOPNOTSUPP; - break; - } + if (!rdev->ops->flush_pmksa) + return -EOPNOTSUPP; - ret = rdev_flush_pmksa(rdev, dev); - break; + return rdev_flush_pmksa(rdev, dev); default: - ret = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return ret; } static const iw_handler cfg80211_handlers[] = { diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 8edd9ada69d0..573b6b15a446 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -302,8 +302,8 @@ int cfg80211_wext_siwgenie(struct net_device *dev, struct iw_point *data = &wrqu->data; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + int ie_len = data->length; u8 *ie = extra; - int ie_len = data->length, err; if (wdev->iftype != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; @@ -311,39 +311,31 @@ int cfg80211_wext_siwgenie(struct net_device *dev, if (!ie_len) ie = NULL; - wiphy_lock(wdev->wiphy); + guard(wiphy)(wdev->wiphy); /* no change */ - err = 0; if (wdev->wext.ie_len == ie_len && memcmp(wdev->wext.ie, ie, ie_len) == 0) - goto out; + return 0; if (ie_len) { ie = kmemdup(extra, ie_len, GFP_KERNEL); - if (!ie) { - err = -ENOMEM; - goto out; - } - } else + if (!ie) + return -ENOMEM; + } else { ie = NULL; + } kfree(wdev->wext.ie); wdev->wext.ie = ie; wdev->wext.ie_len = ie_len; - if (wdev->conn) { - err = cfg80211_disconnect(rdev, dev, - WLAN_REASON_DEAUTH_LEAVING, false); - if (err) - goto out; - } + if (wdev->conn) + return cfg80211_disconnect(rdev, dev, + WLAN_REASON_DEAUTH_LEAVING, false); /* userspace better not think we'll reconnect */ - err = 0; - out: - wiphy_unlock(wdev->wiphy); - return err; + return 0; } int cfg80211_wext_siwmlme(struct net_device *dev, @@ -353,7 +345,6 @@ int cfg80211_wext_siwmlme(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct iw_mlme *mlme = (struct iw_mlme *)extra; struct cfg80211_registered_device *rdev; - int err; if (!wdev) return -EOPNOTSUPP; @@ -366,17 +357,13 @@ int cfg80211_wext_siwmlme(struct net_device *dev, if (mlme->addr.sa_family != ARPHRD_ETHER) return -EINVAL; - wiphy_lock(&rdev->wiphy); + guard(wiphy)(&rdev->wiphy); + switch (mlme->cmd) { case IW_MLME_DEAUTH: case IW_MLME_DISASSOC: - err = cfg80211_disconnect(rdev, dev, mlme->reason_code, true); - break; + return cfg80211_disconnect(rdev, dev, mlme->reason_code, true); default: - err = -EOPNOTSUPP; - break; + return -EOPNOTSUPP; } - wiphy_unlock(&rdev->wiphy); - - return err; } -- cgit v1.2.3 From d9b4067aef50aa7a13d1a9fbb4e35bdea6804ff3 Mon Sep 17 00:00:00 2001 From: Duan Chenghao Date: Thu, 24 Oct 2024 10:40:38 +0800 Subject: USB: Fix the issue of task recovery failure caused by USB status when S4 wakes up When a device is inserted into the USB port and an S4 wakeup is initiated, after the USB-hub initialization is completed, it will automatically enter suspend mode. Upon detecting a device on the USB port, it will proceed with resume and set the hcd to the HCD_FLAG_WAKEUP_PENDING state. During the S4 wakeup process, peripherals are put into suspend mode, followed by task recovery. However, upon detecting that the hcd is in the HCD_FLAG_WAKEUP_PENDING state, it will return an EBUSY status, causing the S4 suspend to fail and subsequent task recovery to not proceed. - [ 27.594598][ 1] PM: pci_pm_freeze(): hcd_pci_suspend+0x0/0x28 returns -16 [ 27.594601][ 1] PM: dpm_run_callback(): pci_pm_freeze+0x0/0x100 returns -16 [ 27.603420][ 1] ehci-pci 0000:00:04.1: pci_pm_freeze+0x0/0x100 returned 0 after 3 usecs [ 27.612233][ 1] ehci-pci 0000:00:05.1: pci_pm_freeze+0x0/0x100 returned -16 after 17223 usecs [ 27.810067][ 1] PM: Device 0000:00:05.1 failed to quiesce async: error -16 [ 27.816988][ 1] PM: quiesce of devices aborted after 1833.282 msecs [ 27.823302][ 1] PM: start quiesce of devices aborted after 1839.975 msecs ...... [ 31.303172][ 1] PM: recover of devices complete after 3473.039 msecs [ 31.309818][ 1] PM: Failed to load hibernation image, recovering. [ 31.348188][ 1] PM: Basic memory bitmaps freed [ 31.352686][ 1] OOM killer enabled. [ 31.356232][ 1] Restarting tasks ... done. [ 31.360609][ 1] PM: resume from hibernation failed (0) [ 31.365800][ 1] PM: Hibernation image not present or could not be loaded. The "do_wakeup" is determined based on whether the controller's power/wakeup attribute is set. The current issue necessitates considering the type of suspend that is occurring. If the suspend type is either PM_EVENT_FREEZE or PM_EVENT_QUIESCE, then "do_wakeup" should be set to false. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202410151722.rfjtknRz-lkp@intel.com/ Signed-off-by: Alan Stern Signed-off-by: Duan Chenghao Link: https://lore.kernel.org/r/20241024024038.26157-1-duanchenghao@kylinos.cn Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd-pci.c | 15 +++++++++++++-- include/linux/pm.h | 3 ++- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index a08f3f228e6d..56b534f59907 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -422,7 +422,12 @@ static int suspend_common(struct device *dev, pm_message_t msg) bool do_wakeup; int retval; - do_wakeup = PMSG_IS_AUTO(msg) ? true : device_may_wakeup(dev); + if (PMSG_IS_AUTO(msg)) + do_wakeup = true; + else if (PMSG_NO_WAKEUP(msg)) + do_wakeup = false; + else + do_wakeup = device_may_wakeup(dev); /* Root hub suspend should have stopped all downstream traffic, * and all bus master traffic. And done so for both the interface @@ -521,6 +526,11 @@ static int hcd_pci_suspend(struct device *dev) return suspend_common(dev, PMSG_SUSPEND); } +static int hcd_pci_freeze(struct device *dev) +{ + return suspend_common(dev, PMSG_FREEZE); +} + static int hcd_pci_suspend_noirq(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); @@ -590,6 +600,7 @@ static int hcd_pci_restore(struct device *dev) #else #define hcd_pci_suspend NULL +#define hcd_pci_freeze NULL #define hcd_pci_suspend_noirq NULL #define hcd_pci_poweroff_late NULL #define hcd_pci_resume_noirq NULL @@ -624,7 +635,7 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = { .suspend_noirq = hcd_pci_suspend_noirq, .resume_noirq = hcd_pci_resume_noirq, .resume = hcd_pci_resume, - .freeze = hcd_pci_suspend, + .freeze = hcd_pci_freeze, .freeze_noirq = check_root_hub_suspended, .thaw_noirq = NULL, .thaw = hcd_pci_resume, diff --git a/include/linux/pm.h b/include/linux/pm.h index e7f0260f15ad..08c37b83fea8 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -570,7 +570,8 @@ const struct dev_pm_ops name = { \ { .event = PM_EVENT_AUTO_RESUME, }) #define PMSG_IS_AUTO(msg) (((msg).event & PM_EVENT_AUTO) != 0) - +#define PMSG_NO_WAKEUP(msg) (((msg).event & \ + (PM_EVENT_FREEZE | PM_EVENT_QUIESCE)) != 0) /* * Device run-time power management status. * -- cgit v1.2.3 From 7a53af85d3bbdbe06cd47b81a6d99a04dc0a3963 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 25 Nov 2024 14:02:16 +0530 Subject: wifi: cfg80211: send MLO links tx power info in GET_INTERFACE Currently, TX power is reported on interface/wdev level as part of NL80211_CMD_GET_INTERFACE. With MLO, Multiple links can be part of an interface/wdev and hence its necessary to report the TX power of each link. Add support to send tx power for all valid links of an MLD as part of NL80211_CMD_GET_INTERFACE request. As far as userspace is concerned, there is no behavioral change for Non-ML Interfaces. For ML interfaces, userspace should fetch TX power that is nested inside NL80211_ATTR_MLO_LINKS, similar to how channel info(NL80211_ATTR_WIPHY_FREQ) is fetched. Co-developed-by: Aaradhana Sahu Signed-off-by: Aaradhana Sahu Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20241125083217.216095-2-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 1 + .../broadcom/brcm80211/brcmfmac/cfg80211.c | 2 +- drivers/net/wireless/marvell/mwifiex/cfg80211.c | 2 +- drivers/net/wireless/microchip/wilc1000/cfg80211.c | 2 +- drivers/net/wireless/quantenna/qtnfmac/cfg80211.c | 2 +- drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 3 +- include/net/cfg80211.h | 2 +- net/mac80211/cfg.c | 1 + net/wireless/nl80211.c | 13 ++++++- net/wireless/rdev-ops.h | 7 ++-- net/wireless/trace.h | 44 +++++++++++----------- net/wireless/wext-compat.c | 2 +- 12 files changed, 47 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 61b2e3f15f0e..72ce321f2a77 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1441,6 +1441,7 @@ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, int *dbm) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 297a7c738c01..689e779fe00f 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -2676,7 +2676,7 @@ done: static s32 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - s32 *dbm) + unsigned int link_id, s32 *dbm) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_cfg80211_vif *vif = wdev_to_vif(wdev); diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index fca3eea7ee84..a099fdaafa45 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -410,7 +410,7 @@ mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy, static int mwifiex_cfg80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm) + unsigned int link_id, int *dbm) { struct mwifiex_adapter *adapter = mwifiex_cfg80211_get_adapter(wiphy); struct mwifiex_private *priv = mwifiex_get_priv(adapter, diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index e96736cc7259..e7aa0f991923 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1669,7 +1669,7 @@ static int set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, } static int get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm) + unsigned int link_id, int *dbm) { int ret; struct wilc_vif *vif = netdev_priv(wdev->netdev); diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 8b97accf6638..0b2282528342 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -881,7 +881,7 @@ static int qtnf_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, } static int qtnf_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm) + unsigned int link_id, int *dbm) { struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev); int ret; diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index c053ee9c1361..7fcc46a0bb48 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -1802,7 +1802,8 @@ static int cfg80211_rtw_set_txpower(struct wiphy *wiphy, } static int cfg80211_rtw_get_txpower(struct wiphy *wiphy, - struct wireless_dev *wdev, int *dbm) + struct wireless_dev *wdev, + unsigned int link_id, int *dbm) { *dbm = (12); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 63e79a22a214..0a48f47a77dc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4733,7 +4733,7 @@ struct cfg80211_ops { int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, - int *dbm); + unsigned int link_id, int *dbm); void (*rfkill_poll)(struct wiphy *wiphy); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 61a824ec33da..b2410a913556 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3190,6 +3190,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, static int ieee80211_get_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9590f9bd2ec0..793d910347e3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3980,10 +3980,10 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } - if (rdev->ops->get_tx_power) { + if (rdev->ops->get_tx_power && !wdev->valid_links) { int dbm, ret; - ret = rdev_get_tx_power(rdev, wdev, &dbm); + ret = rdev_get_tx_power(rdev, wdev, 0, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) @@ -4052,6 +4052,15 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag if (ret == 0 && nl80211_send_chandef(msg, &chandef)) goto nla_put_failure; + if (rdev->ops->get_tx_power) { + int dbm, ret; + + ret = rdev_get_tx_power(rdev, wdev, link_id, &dbm); + if (ret == 0 && + nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + DBM_TO_MBM(dbm))) + goto nla_put_failure; + } nla_nest_end(msg, link); } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index adb6105bbb7d..8f2aa7e76c0a 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -600,11 +600,12 @@ static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev, } static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, int *dbm) + struct wireless_dev *wdev, unsigned int link_id, + int *dbm) { int ret; - trace_rdev_get_tx_power(&rdev->wiphy, wdev); - ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, dbm); + trace_rdev_get_tx_power(&rdev->wiphy, wdev, link_id); + ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, link_id, dbm); trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d5c9bb614fa6..a57210c8087c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1690,9 +1690,28 @@ TRACE_EVENT(rdev_set_wiphy_params, WIPHY_PR_ARG, __entry->changed) ); -DEFINE_EVENT(wiphy_wdev_evt, rdev_get_tx_power, - TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), - TP_ARGS(wiphy, wdev) +DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id), + TP_ARGS(wiphy, wdev, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) +); + +DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_tx_power, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id), + TP_ARGS(wiphy, wdev, link_id) ); TRACE_EVENT(rdev_set_tx_power, @@ -2192,25 +2211,6 @@ TRACE_EVENT(rdev_set_noack_map, TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", noack_map: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map) ); - -DECLARE_EVENT_CLASS(wiphy_wdev_link_evt, - TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, - unsigned int link_id), - TP_ARGS(wiphy, wdev, link_id), - TP_STRUCT__entry( - WIPHY_ENTRY - WDEV_ENTRY - __field(unsigned int, link_id) - ), - TP_fast_assign( - WIPHY_ASSIGN; - WDEV_ASSIGN; - __entry->link_id = link_id; - ), - TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", - WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) -); - DEFINE_EVENT(wiphy_wdev_link_evt, rdev_get_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, unsigned int link_id), diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 687f93664d1f..a74b1afc594e 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -910,7 +910,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, return -EOPNOTSUPP; scoped_guard(wiphy, &rdev->wiphy) { - err = rdev_get_tx_power(rdev, wdev, &val); + err = rdev_get_tx_power(rdev, wdev, 0, &val); } if (err) return err; -- cgit v1.2.3 From 24dab555ad5951824e3fb6b665aaca84ac69dd12 Mon Sep 17 00:00:00 2001 From: Rameshkumar Sundaram Date: Mon, 25 Nov 2024 14:02:17 +0530 Subject: wifi: mac80211: get tx power per link ML interfaces can have multiple affiliated links to it and hence there is a need to report tx power of specified link rather deflink. Add changes to report tx power of requested link from mac80211, also pass link id as an argument in get_tx_power op so that supported drivers can use it to report link's tx power. Co-developed-by: Aaradhana Sahu Signed-off-by: Aaradhana Sahu Signed-off-by: Rameshkumar Sundaram Link: https://patch.msgid.link/20241125083217.216095-3-quic_ramess@quicinc.com Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/mac.c | 1 + drivers/net/wireless/ath/ath9k/main.c | 2 +- drivers/net/wireless/mediatek/mt76/mac80211.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76.h | 2 +- include/net/mac80211.h | 2 +- net/mac80211/cfg.c | 15 +++++++++++---- net/mac80211/driver-ops.h | 7 ++++--- net/mac80211/trace.h | 10 ++++++---- 8 files changed, 26 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index e6acbff06749..7e75a9b13ef9 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -9356,6 +9356,7 @@ static int ath11k_fw_stats_request(struct ath11k *ar, static int ath11k_mac_op_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + unsigned int link_id, int *dbm) { struct ath11k *ar = hw->priv; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index b92c89dad8de..2f137856a823 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2767,7 +2767,7 @@ void ath9k_fill_chanctx_ops(void) #endif static int ath9k_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm) + unsigned int link_id, int *dbm) { struct ath_softc *sc = hw->priv; struct ath_vif *avp = (void *)vif->drv_priv; diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 9d5561f44134..7fbce5e757df 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -1596,7 +1596,7 @@ void mt76_wcid_cleanup(struct mt76_dev *dev, struct mt76_wcid *wcid) EXPORT_SYMBOL_GPL(mt76_wcid_cleanup); int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm) + unsigned int link_id, int *dbm) { struct mt76_phy *phy = hw->priv; int n_chains = hweight16(phy->chainmask); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 0b75a45ad2e8..ca2dba3ac65d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -1431,7 +1431,7 @@ void mt76_sta_pre_rcu_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int mt76_get_min_avg_rssi(struct mt76_dev *dev, bool ext_phy); int mt76_get_txpower(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm); + unsigned int link_id, int *dbm); int mt76_init_sar_power(struct ieee80211_hw *hw, const struct cfg80211_sar_specs *sar); int mt76_get_sar_power(struct mt76_phy *phy, diff --git a/include/net/mac80211.h b/include/net/mac80211.h index a97c9f85ae9a..5ce4dfa3fba5 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4759,7 +4759,7 @@ struct ieee80211_ops { u32 (*get_expected_throughput)(struct ieee80211_hw *hw, struct ieee80211_sta *sta); int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - int *dbm); + unsigned int link_id, int *dbm); int (*tdls_channel_switch)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b2410a913556..2fa594fb6c1a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3195,15 +3195,22 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, { struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct ieee80211_link_data *link_data; if (local->ops->get_txpower && (sdata->flags & IEEE80211_SDATA_IN_DRIVER)) - return drv_get_txpower(local, sdata, dbm); + return drv_get_txpower(local, sdata, link_id, dbm); - if (local->emulate_chanctx) + if (local->emulate_chanctx) { *dbm = local->hw.conf.power_level; - else - *dbm = sdata->vif.bss_conf.txpower; + } else { + link_data = wiphy_dereference(wiphy, sdata->link[link_id]); + + if (link_data) + *dbm = link_data->conf->txpower; + else + return -ENOLINK; + } /* INT_MIN indicates no power level was set yet */ if (*dbm == INT_MIN) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index edd1e4d4ad9d..c64531e0a60e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1273,7 +1273,8 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, } static inline int drv_get_txpower(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, int *dbm) + struct ieee80211_sub_if_data *sdata, + unsigned int link_id, int *dbm) { int ret; @@ -1283,8 +1284,8 @@ static inline int drv_get_txpower(struct ieee80211_local *local, if (!local->ops->get_txpower) return -EOPNOTSUPP; - ret = local->ops->get_txpower(&local->hw, &sdata->vif, dbm); - trace_drv_get_txpower(local, sdata, *dbm, ret); + ret = local->ops->get_txpower(&local->hw, &sdata->vif, link_id, dbm); + trace_drv_get_txpower(local, sdata, link_id, *dbm, ret); return ret; } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 7a4985fc2b16..dc35fed7e9b0 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2173,13 +2173,14 @@ DEFINE_EVENT(chanswitch_evt, drv_channel_switch_rx_beacon, TRACE_EVENT(drv_get_txpower, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - int dbm, int ret), + unsigned int link_id, int dbm, int ret), - TP_ARGS(local, sdata, dbm, ret), + TP_ARGS(local, sdata, link_id, dbm, ret), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY + __field(unsigned int, link_id) __field(int, dbm) __field(int, ret) ), @@ -2187,13 +2188,14 @@ TRACE_EVENT(drv_get_txpower, TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; + __entry->link_id = link_id; __entry->dbm = dbm; __entry->ret = ret; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " dbm:%d ret:%d", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->dbm, __entry->ret + LOCAL_PR_FMT VIF_PR_FMT " link_id:%d dbm:%d ret:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->link_id, __entry->dbm, __entry->ret ) ); -- cgit v1.2.3 From d8d936c51388442f769a81e512b505dcf87c6a51 Mon Sep 17 00:00:00 2001 From: Dingyan Li <18500469033@163.com> Date: Wed, 30 Oct 2024 16:38:58 +0800 Subject: usb: storage: add a macro for the upper limit of max LUN The meaning of this value is already used in several places, but with constant values and comments to explain it separately. It's better to have a central place to do this then use the macro in those places for better readability. Signed-off-by: Dingyan Li <18500469033@163.com> Reviewed-by: Alan Stern Link: https://lore.kernel.org/r/20241030083858.46907-1-18500469033@163.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/function/f_tcm.c | 8 ++------ drivers/usb/gadget/function/storage_common.h | 2 +- drivers/usb/storage/transport.c | 8 ++------ include/linux/usb/storage.h | 8 ++++++++ 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c index 15bb3aa12aa8..e1914b20f816 100644 --- a/drivers/usb/gadget/function/f_tcm.c +++ b/drivers/usb/gadget/function/f_tcm.c @@ -441,14 +441,10 @@ static int usbg_bot_setup(struct usb_function *f, pr_err("No LUNs configured?\n"); return -EINVAL; } - /* - * If 4 LUNs are present we return 3 i.e. LUN 0..3 can be - * accessed. The upper limit is 0xf - */ luns--; - if (luns > 0xf) { + if (luns > US_BULK_MAX_LUN_LIMIT) { pr_info_once("Limiting the number of luns to 16\n"); - luns = 0xf; + luns = US_BULK_MAX_LUN_LIMIT; } ret_lun = cdev->req->buf; *ret_lun = luns; diff --git a/drivers/usb/gadget/function/storage_common.h b/drivers/usb/gadget/function/storage_common.h index ced5d2b09234..11ac785d5eee 100644 --- a/drivers/usb/gadget/function/storage_common.h +++ b/drivers/usb/gadget/function/storage_common.h @@ -131,7 +131,7 @@ static inline bool fsg_lun_is_open(struct fsg_lun *curlun) #define FSG_BUFLEN ((u32)16384) /* Maximal number of LUNs supported in mass storage function */ -#define FSG_MAX_LUNS 16 +#define FSG_MAX_LUNS (US_BULK_MAX_LUN_LIMIT + 1) enum fsg_buffer_state { BUF_STATE_SENDING = -2, diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 9d767f6bf722..e6bc8ecaecbb 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -1087,13 +1087,9 @@ int usb_stor_Bulk_max_lun(struct us_data *us) usb_stor_dbg(us, "GetMaxLUN command result is %d, data is %d\n", result, us->iobuf[0]); - /* - * If we have a successful request, return the result if valid. The - * CBW LUN field is 4 bits wide, so the value reported by the device - * should fit into that. - */ + /* If we have a successful request, return the result if valid. */ if (result > 0) { - if (us->iobuf[0] < 16) { + if (us->iobuf[0] <= US_BULK_MAX_LUN_LIMIT) { return us->iobuf[0]; } else { dev_info(&us->pusb_intf->dev, diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index 8539956bc2be..51be3bb8fccb 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -82,4 +82,12 @@ struct bulk_cs_wrap { #define US_BULK_RESET_REQUEST 0xff #define US_BULK_GET_MAX_LUN 0xfe +/* + * If 4 LUNs are supported then the LUNs would be + * numbered from 0 to 3, and the return value for + * US_BULK_GET_MAX_LUN request would be 3. The valid + * LUN field is 4 bits wide, the upper limit is 0x0f. + */ +#define US_BULK_MAX_LUN_LIMIT 0x0f + #endif -- cgit v1.2.3 From 535a07698b8b3e6f305673102d297262cae2360a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 4 Dec 2024 05:09:22 +0200 Subject: serial: 8250_pci: Share WCH IDs with parport_serial driver parport_serial driver uses subset of WCH IDs that are present in 8250_pci. Share them via pci_ids.h and switch parport_serial to use defined constants. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20241204031114.1029882-3-andriy.shevchenko@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- drivers/parport/parport_serial.c | 12 ++++++++---- drivers/tty/serial/8250/8250_pci.c | 10 ++-------- include/linux/pci_ids.h | 11 +++++++++++ 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 3644997a8342..24d4f3a3ec3d 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -266,10 +266,14 @@ static struct pci_device_id parport_serial_pci_tbl[] = { { 0x1409, 0x7168, 0x1409, 0xd079, 0, 0, timedia_9079c }, /* WCH CARDS */ - { 0x4348, 0x5053, PCI_ANY_ID, PCI_ANY_ID, 0, 0, wch_ch353_1s1p}, - { 0x4348, 0x7053, 0x4348, 0x3253, 0, 0, wch_ch353_2s1p}, - { 0x1c00, 0x3050, 0x1c00, 0x3050, 0, 0, wch_ch382_0s1p}, - { 0x1c00, 0x3250, 0x1c00, 0x3250, 0, 0, wch_ch382_2s1p}, + { PCI_VENDOR_ID_WCHCN, PCI_DEVICE_ID_WCHCN_CH353_1S1P, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, wch_ch353_1s1p }, + { PCI_VENDOR_ID_WCHCN, PCI_DEVICE_ID_WCHCN_CH353_2S1P, + 0x4348, 0x3253, 0, 0, wch_ch353_2s1p }, + { PCI_VENDOR_ID_WCHIC, PCI_DEVICE_ID_WCHIC_CH382_0S1P, + 0x1c00, 0x3050, 0, 0, wch_ch382_0s1p }, + { PCI_VENDOR_ID_WCHIC, PCI_DEVICE_ID_WCHIC_CH382_2S1P, + 0x1c00, 0x3250, 0, 0, wch_ch382_2s1p }, /* BrainBoxes PX272/PX306 MIO card */ { PCI_VENDOR_ID_INTASHIELD, 0x4100, diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index dfac79744d37..df4d0d832e54 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -64,23 +64,17 @@ #define PCIE_DEVICE_ID_NEO_2_OX_IBM 0x00F6 #define PCI_DEVICE_ID_PLX_CRONYX_OMEGA 0xc001 #define PCI_DEVICE_ID_INTEL_PATSBURG_KT 0x1d3d -#define PCI_VENDOR_ID_WCHCN 0x4348 + #define PCI_DEVICE_ID_WCHCN_CH352_2S 0x3253 -#define PCI_DEVICE_ID_WCHCN_CH353_4S 0x3453 -#define PCI_DEVICE_ID_WCHCN_CH353_2S1PF 0x5046 -#define PCI_DEVICE_ID_WCHCN_CH353_1S1P 0x5053 -#define PCI_DEVICE_ID_WCHCN_CH353_2S1P 0x7053 #define PCI_DEVICE_ID_WCHCN_CH355_4S 0x7173 + #define PCI_VENDOR_ID_AGESTAR 0x5372 #define PCI_DEVICE_ID_AGESTAR_9375 0x6872 #define PCI_DEVICE_ID_BROADCOM_TRUMANAGE 0x160a #define PCI_DEVICE_ID_AMCC_ADDIDATA_APCI7800 0x818e -#define PCI_VENDOR_ID_WCHIC 0x1c00 -#define PCI_DEVICE_ID_WCHIC_CH382_2S1P 0x3250 #define PCI_DEVICE_ID_WCHIC_CH384_4S 0x3470 #define PCI_DEVICE_ID_WCHIC_CH384_8S 0x3853 -#define PCI_DEVICE_ID_WCHIC_CH382_2S 0x3253 #define PCI_DEVICE_ID_MOXA_CP102E 0x1024 #define PCI_DEVICE_ID_MOXA_CP102EL 0x1025 diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d2402bf4aea2..de5deb1a0118 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2593,6 +2593,11 @@ #define PCI_VENDOR_ID_REDHAT 0x1b36 +#define PCI_VENDOR_ID_WCHIC 0x1c00 +#define PCI_DEVICE_ID_WCHIC_CH382_0S1P 0x3050 +#define PCI_DEVICE_ID_WCHIC_CH382_2S1P 0x3250 +#define PCI_DEVICE_ID_WCHIC_CH382_2S 0x3253 + #define PCI_VENDOR_ID_SILICOM_DENMARK 0x1c2c #define PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS 0x1c36 @@ -2647,6 +2652,12 @@ #define PCI_VENDOR_ID_AKS 0x416c #define PCI_DEVICE_ID_AKS_ALADDINCARD 0x0100 +#define PCI_VENDOR_ID_WCHCN 0x4348 +#define PCI_DEVICE_ID_WCHCN_CH353_4S 0x3453 +#define PCI_DEVICE_ID_WCHCN_CH353_2S1PF 0x5046 +#define PCI_DEVICE_ID_WCHCN_CH353_1S1P 0x5053 +#define PCI_DEVICE_ID_WCHCN_CH353_2S1P 0x7053 + #define PCI_VENDOR_ID_ACCESSIO 0x494f #define PCI_DEVICE_ID_ACCESSIO_WDG_CSM 0x22c0 -- cgit v1.2.3 From 6fba89813ccf333d2bc4d5caea04cd5f3c39eb50 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 23 Oct 2024 14:21:54 -0700 Subject: lsm: ensure the correct LSM context releaser Add a new lsm_context data structure to hold all the information about a "security context", including the string, its size and which LSM allocated the string. The allocation information is necessary because LSMs have different policies regarding the lifecycle of these strings. SELinux allocates and destroys them on each use, whereas Smack provides a pointer to an entry in a list that never goes away. Update security_release_secctx() to use the lsm_context instead of a (char *, len) pair. Change its callers to do likewise. The LSMs supporting this hook have had comments added to remind the developer that there is more work to be done. The BPF security module provides all LSM hooks. While there has yet to be a known instance of a BPF configuration that uses security contexts, the possibility is real. In the existing implementation there is potential for multiple frees in that case. Cc: linux-integrity@vger.kernel.org Cc: netdev@vger.kernel.org Cc: audit@vger.kernel.org Cc: netfilter-devel@vger.kernel.org To: Pablo Neira Ayuso Cc: linux-nfs@vger.kernel.org Cc: Todd Kjos Signed-off-by: Casey Schaufler [PM: subject tweak] Signed-off-by: Paul Moore --- drivers/android/binder.c | 24 ++++++++++---------- fs/ceph/xattr.c | 6 ++++- fs/nfs/nfs4proc.c | 8 +++++-- fs/nfsd/nfs4xdr.c | 8 +++++-- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 35 +++++++++++++++++++++++++++-- include/net/scm.h | 11 ++++----- kernel/audit.c | 30 ++++++++++++------------- kernel/auditsc.c | 23 ++++++++++--------- net/ipv4/ip_sockglue.c | 10 ++++----- net/netfilter/nf_conntrack_netlink.c | 10 ++++----- net/netfilter/nf_conntrack_standalone.c | 9 ++++---- net/netfilter/nfnetlink_queue.c | 13 +++++++---- net/netlabel/netlabel_unlabeled.c | 40 +++++++++++++++------------------ net/netlabel/netlabel_user.c | 11 +++++---- security/apparmor/include/secid.h | 2 +- security/apparmor/secid.c | 11 +++++++-- security/security.c | 8 +++---- security/selinux/hooks.c | 11 +++++++-- 19 files changed, 165 insertions(+), 107 deletions(-) (limited to 'include') diff --git a/drivers/android/binder.c b/drivers/android/binder.c index ef353ca13c35..e8245df63289 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3017,8 +3017,7 @@ static void binder_transaction(struct binder_proc *proc, struct binder_context *context = proc->context; int t_debug_id = atomic_inc_return(&binder_last_id); ktime_t t_start_time = ktime_get(); - char *secctx = NULL; - u32 secctx_sz = 0; + struct lsm_context lsmctx; struct list_head sgc_head; struct list_head pf_head; const void __user *user_buffer = (const void __user *) @@ -3297,7 +3296,8 @@ static void binder_transaction(struct binder_proc *proc, size_t added_size; security_cred_getsecid(proc->cred, &secid); - ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); + ret = security_secid_to_secctx(secid, &lsmctx.context, + &lsmctx.len); if (ret) { binder_txn_error("%d:%d failed to get security context\n", thread->pid, proc->pid); @@ -3306,7 +3306,7 @@ static void binder_transaction(struct binder_proc *proc, return_error_line = __LINE__; goto err_get_secctx_failed; } - added_size = ALIGN(secctx_sz, sizeof(u64)); + added_size = ALIGN(lsmctx.len, sizeof(u64)); extra_buffers_size += added_size; if (extra_buffers_size < added_size) { binder_txn_error("%d:%d integer overflow of extra_buffers_size\n", @@ -3340,23 +3340,23 @@ static void binder_transaction(struct binder_proc *proc, t->buffer = NULL; goto err_binder_alloc_buf_failed; } - if (secctx) { + if (lsmctx.context) { int err; size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) + ALIGN(tr->offsets_size, sizeof(void *)) + ALIGN(extra_buffers_size, sizeof(void *)) - - ALIGN(secctx_sz, sizeof(u64)); + ALIGN(lsmctx.len, sizeof(u64)); t->security_ctx = t->buffer->user_data + buf_offset; err = binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, buf_offset, - secctx, secctx_sz); + lsmctx.context, lsmctx.len); if (err) { t->security_ctx = 0; WARN_ON(1); } - security_release_secctx(secctx, secctx_sz); - secctx = NULL; + security_release_secctx(&lsmctx); + lsmctx.context = NULL; } t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; @@ -3400,7 +3400,7 @@ static void binder_transaction(struct binder_proc *proc, off_end_offset = off_start_offset + tr->offsets_size; sg_buf_offset = ALIGN(off_end_offset, sizeof(void *)); sg_buf_end_offset = sg_buf_offset + extra_buffers_size - - ALIGN(secctx_sz, sizeof(u64)); + ALIGN(lsmctx.len, sizeof(u64)); off_min = 0; for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; buffer_offset += sizeof(binder_size_t)) { @@ -3779,8 +3779,8 @@ err_copy_data_failed: binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: err_bad_extra_size: - if (secctx) - security_release_secctx(secctx, secctx_sz); + if (lsmctx.context) + security_release_secctx(&lsmctx); err_get_secctx_failed: kfree(tcomplete); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 1a9f12204666..2a14335a4bb7 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1446,12 +1446,16 @@ out: void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx) { +#ifdef CONFIG_CEPH_FS_SECURITY_LABEL + struct lsm_context scaff; /* scaffolding */ +#endif #ifdef CONFIG_CEPH_FS_POSIX_ACL posix_acl_release(as_ctx->acl); posix_acl_release(as_ctx->default_acl); #endif #ifdef CONFIG_CEPH_FS_SECURITY_LABEL - security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen); + lsmcontext_init(&scaff, as_ctx->sec_ctx, as_ctx->sec_ctxlen, 0); + security_release_secctx(&scaff); #endif #ifdef CONFIG_FS_ENCRYPTION kfree(as_ctx->fscrypt_auth); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 405f17e6e0b4..2daeb6f663d9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -138,8 +138,12 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, static inline void nfs4_label_release_security(struct nfs4_label *label) { - if (label) - security_release_secctx(label->label, label->len); + struct lsm_context scaff; /* scaffolding */ + + if (label) { + lsmcontext_init(&scaff, label->label, label->len, 0); + security_release_secctx(&scaff); + } } static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 53fac037611c..95ec6a4b4da3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3644,8 +3644,12 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, out: #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (args.context) - security_release_secctx(args.context, args.contextlen); + if (args.context) { + struct lsm_context scaff; /* scaffolding */ + + lsmcontext_init(&scaff, args.context, args.contextlen, 0); + security_release_secctx(&scaff); + } #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(args.acl); if (tempfh) { diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index eb2937599cb0..c13df23132eb 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -300,7 +300,7 @@ LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, char **secdata, u32 *seclen) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) -LSM_HOOK(void, LSM_RET_VOID, release_secctx, char *secdata, u32 seclen) +LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) diff --git a/include/linux/security.h b/include/linux/security.h index cbdba435b798..68e56935716b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -225,6 +225,37 @@ extern unsigned long dac_mmap_min_addr; #define dac_mmap_min_addr 0UL #endif +/* + * A "security context" is the text representation of + * the information used by LSMs. + * This structure contains the string, its length, and which LSM + * it is useful for. + */ +struct lsm_context { + char *context; /* Provided by the module */ + u32 len; + int id; /* Identifies the module */ +}; + +/** + * lsmcontext_init - initialize an lsmcontext structure. + * @cp: Pointer to the context to initialize + * @context: Initial context, or NULL + * @size: Size of context, or 0 + * @id: Which LSM provided the context + * + * Fill in the lsmcontext from the provided information. + * This is a scaffolding function that will be removed when + * lsm_context integration is complete. + */ +static inline void lsmcontext_init(struct lsm_context *cp, char *context, + u32 size, int id) +{ + cp->id = id; + cp->context = context; + cp->len = size; +} + /* * Values used in the task_security_ops calls */ @@ -556,7 +587,7 @@ int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); -void security_release_secctx(char *secdata, u32 seclen); +void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); @@ -1545,7 +1576,7 @@ static inline int security_secctx_to_secid(const char *secdata, return -EOPNOTSUPP; } -static inline void security_release_secctx(char *secdata, u32 seclen) +static inline void security_release_secctx(struct lsm_context *cp) { } diff --git a/include/net/scm.h b/include/net/scm.h index 0d35c7c77a74..f75449e1d876 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -105,16 +105,17 @@ static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, #ifdef CONFIG_SECURITY_NETWORK static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { - char *secdata; - u32 seclen; + struct lsm_context ctx; int err; if (test_bit(SOCK_PASSSEC, &sock->flags)) { - err = security_secid_to_secctx(scm->secid, &secdata, &seclen); + err = security_secid_to_secctx(scm->secid, &ctx.context, + &ctx.len); if (!err) { - put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, seclen, secdata); - security_release_secctx(secdata, seclen); + put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len, + ctx.context); + security_release_secctx(&ctx); } } } diff --git a/kernel/audit.c b/kernel/audit.c index 6a95a6077953..1d48d0654a46 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1221,8 +1221,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct audit_buffer *ab; u16 msg_type = nlh->nlmsg_type; struct audit_sig_info *sig_data; - char *ctx = NULL; - u32 len; + struct lsm_context lsmctx; err = audit_netlink_ok(skb, msg_type); if (err) @@ -1472,27 +1471,29 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, break; } case AUDIT_SIGNAL_INFO: - len = 0; if (lsmprop_is_set(&audit_sig_lsm)) { - err = security_lsmprop_to_secctx(&audit_sig_lsm, &ctx, - &len); + err = security_lsmprop_to_secctx(&audit_sig_lsm, + &lsmctx.context, + &lsmctx.len); if (err) return err; } - sig_data = kmalloc(struct_size(sig_data, ctx, len), GFP_KERNEL); + sig_data = kmalloc(struct_size(sig_data, ctx, lsmctx.len), + GFP_KERNEL); if (!sig_data) { if (lsmprop_is_set(&audit_sig_lsm)) - security_release_secctx(ctx, len); + security_release_secctx(&lsmctx); return -ENOMEM; } sig_data->uid = from_kuid(&init_user_ns, audit_sig_uid); sig_data->pid = audit_sig_pid; if (lsmprop_is_set(&audit_sig_lsm)) { - memcpy(sig_data->ctx, ctx, len); - security_release_secctx(ctx, len); + memcpy(sig_data->ctx, lsmctx.context, lsmctx.len); + security_release_secctx(&lsmctx); } audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, - sig_data, struct_size(sig_data, ctx, len)); + sig_data, struct_size(sig_data, ctx, + lsmctx.len)); kfree(sig_data); break; case AUDIT_TTY_GET: { @@ -2180,23 +2181,22 @@ void audit_log_key(struct audit_buffer *ab, char *key) int audit_log_task_context(struct audit_buffer *ab) { struct lsm_prop prop; - char *ctx = NULL; - unsigned len; + struct lsm_context ctx; int error; security_current_getlsmprop_subj(&prop); if (!lsmprop_is_set(&prop)) return 0; - error = security_lsmprop_to_secctx(&prop, &ctx, &len); + error = security_lsmprop_to_secctx(&prop, &ctx.context, &ctx.len); if (error) { if (error != -EINVAL) goto error_path; return 0; } - audit_log_format(ab, " subj=%s", ctx); - security_release_secctx(ctx, len); + audit_log_format(ab, " subj=%s", ctx.context); + security_release_secctx(&ctx); return 0; error_path: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 279ba5c420a4..de8fac6c5bd3 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1098,8 +1098,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, char *comm) { struct audit_buffer *ab; - char *ctx = NULL; - u32 len; + struct lsm_context ctx; int rc = 0; ab = audit_log_start(context, GFP_KERNEL, AUDIT_OBJ_PID); @@ -1110,12 +1109,12 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, from_kuid(&init_user_ns, auid), from_kuid(&init_user_ns, uid), sessionid); if (lsmprop_is_set(prop)) { - if (security_lsmprop_to_secctx(prop, &ctx, &len)) { + if (security_lsmprop_to_secctx(prop, &ctx.context, &ctx.len)) { audit_log_format(ab, " obj=(none)"); rc = 1; } else { - audit_log_format(ab, " obj=%s", ctx); - security_release_secctx(ctx, len); + audit_log_format(ab, " obj=%s", ctx.context); + security_release_secctx(&ctx); } } audit_log_format(ab, " ocomm="); @@ -1371,6 +1370,7 @@ static void audit_log_time(struct audit_context *context, struct audit_buffer ** static void show_special(struct audit_context *context, int *call_panic) { + struct lsm_context lsmcxt; struct audit_buffer *ab; int i; @@ -1401,7 +1401,8 @@ static void show_special(struct audit_context *context, int *call_panic) *call_panic = 1; } else { audit_log_format(ab, " obj=%s", ctx); - security_release_secctx(ctx, len); + lsmcontext_init(&lsmcxt, ctx, len, 0); + security_release_secctx(&lsmcxt); } } if (context->ipc.has_perm) { @@ -1560,15 +1561,15 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, MAJOR(n->rdev), MINOR(n->rdev)); if (lsmprop_is_set(&n->oprop)) { - char *ctx = NULL; - u32 len; + struct lsm_context ctx; - if (security_lsmprop_to_secctx(&n->oprop, &ctx, &len)) { + if (security_lsmprop_to_secctx(&n->oprop, &ctx.context, + &ctx.len)) { if (call_panic) *call_panic = 2; } else { - audit_log_format(ab, " obj=%s", ctx); - security_release_secctx(ctx, len); + audit_log_format(ab, " obj=%s", ctx.context); + security_release_secctx(&ctx); } } diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cf377377b52d..a8180dcc2a32 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -128,20 +128,20 @@ static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb, static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) { - char *secdata; - u32 seclen, secid; + struct lsm_context ctx; + u32 secid; int err; err = security_socket_getpeersec_dgram(NULL, skb, &secid); if (err) return; - err = security_secid_to_secctx(secid, &secdata, &seclen); + err = security_secid_to_secctx(secid, &ctx.context, &ctx.len); if (err) return; - put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata); - security_release_secctx(secdata, seclen); + put_cmsg(msg, SOL_IP, SCM_SECURITY, ctx.len, ctx.context); + security_release_secctx(&ctx); } static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 36168f8b6efa..22f46be33f1e 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -357,10 +357,10 @@ nla_put_failure: static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_secctx; - int len, ret; - char *secctx; + struct lsm_context ctx; + int ret; - ret = security_secid_to_secctx(ct->secmark, &secctx, &len); + ret = security_secid_to_secctx(ct->secmark, &ctx.context, &ctx.len); if (ret) return 0; @@ -369,13 +369,13 @@ static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) if (!nest_secctx) goto nla_put_failure; - if (nla_put_string(skb, CTA_SECCTX_NAME, secctx)) + if (nla_put_string(skb, CTA_SECCTX_NAME, ctx.context)) goto nla_put_failure; nla_nest_end(skb, nest_secctx); ret = 0; nla_put_failure: - security_release_secctx(secctx, len); + security_release_secctx(&ctx); return ret; } #else diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 7d4f0fa8b609..5f7fd23b7afe 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -172,17 +172,16 @@ static void ct_seq_stop(struct seq_file *s, void *v) #ifdef CONFIG_NF_CONNTRACK_SECMARK static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) { + struct lsm_context ctx; int ret; - u32 len; - char *secctx; - ret = security_secid_to_secctx(ct->secmark, &secctx, &len); + ret = security_secid_to_secctx(ct->secmark, &ctx.context, &ctx.len); if (ret) return; - seq_printf(s, "secctx=%s ", secctx); + seq_printf(s, "secctx=%s ", ctx.context); - security_release_secctx(secctx, len); + security_release_secctx(&ctx); } #else static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index d2773ce9b585..37757cd77cf1 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -567,6 +567,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, enum ip_conntrack_info ctinfo = 0; const struct nfnl_ct_hook *nfnl_ct; bool csum_verify; + struct lsm_context scaff; /* scaffolding */ char *secdata = NULL; u32 seclen = 0; ktime_t tstamp; @@ -810,8 +811,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; - if (seclen) - security_release_secctx(secdata, seclen); + if (seclen) { + lsmcontext_init(&scaff, secdata, seclen, 0); + security_release_secctx(&scaff); + } return skb; nla_put_failure: @@ -819,8 +822,10 @@ nla_put_failure: kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); nlmsg_failure: - if (seclen) - security_release_secctx(secdata, seclen); + if (seclen) { + lsmcontext_init(&scaff, secdata, seclen, 0); + security_release_secctx(&scaff); + } return NULL; } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 1bc2d0890a9f..921fa8eeb451 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -374,8 +374,7 @@ int netlbl_unlhsh_add(struct net *net, struct net_device *dev; struct netlbl_unlhsh_iface *iface; struct audit_buffer *audit_buf = NULL; - char *secctx = NULL; - u32 secctx_len; + struct lsm_context ctx; if (addr_len != sizeof(struct in_addr) && addr_len != sizeof(struct in6_addr)) @@ -439,10 +438,10 @@ unlhsh_add_return: rcu_read_unlock(); if (audit_buf != NULL) { if (security_secid_to_secctx(secid, - &secctx, - &secctx_len) == 0) { - audit_log_format(audit_buf, " sec_obj=%s", secctx); - security_release_secctx(secctx, secctx_len); + &ctx.context, + &ctx.len) == 0) { + audit_log_format(audit_buf, " sec_obj=%s", ctx.context); + security_release_secctx(&ctx); } audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); audit_log_end(audit_buf); @@ -473,8 +472,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, struct netlbl_unlhsh_addr4 *entry; struct audit_buffer *audit_buf; struct net_device *dev; - char *secctx; - u32 secctx_len; + struct lsm_context ctx; spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, @@ -495,9 +493,9 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, - &secctx, &secctx_len) == 0) { - audit_log_format(audit_buf, " sec_obj=%s", secctx); - security_release_secctx(secctx, secctx_len); + &ctx.context, &ctx.len) == 0) { + audit_log_format(audit_buf, " sec_obj=%s", ctx.context); + security_release_secctx(&ctx); } audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); @@ -534,8 +532,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, struct netlbl_unlhsh_addr6 *entry; struct audit_buffer *audit_buf; struct net_device *dev; - char *secctx; - u32 secctx_len; + struct lsm_context ctx; spin_lock(&netlbl_unlhsh_lock); list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); @@ -555,9 +552,9 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, dev_put(dev); if (entry != NULL && security_secid_to_secctx(entry->secid, - &secctx, &secctx_len) == 0) { - audit_log_format(audit_buf, " sec_obj=%s", secctx); - security_release_secctx(secctx, secctx_len); + &ctx.context, &ctx.len) == 0) { + audit_log_format(audit_buf, " sec_obj=%s", ctx.context); + security_release_secctx(&ctx); } audit_log_format(audit_buf, " res=%u", entry != NULL ? 1 : 0); audit_log_end(audit_buf); @@ -1069,10 +1066,9 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, int ret_val = -ENOMEM; struct netlbl_unlhsh_walk_arg *cb_arg = arg; struct net_device *dev; + struct lsm_context ctx; void *data; u32 secid; - char *secctx; - u32 secctx_len; data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, @@ -1127,14 +1123,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, secid = addr6->secid; } - ret_val = security_secid_to_secctx(secid, &secctx, &secctx_len); + ret_val = security_secid_to_secctx(secid, &ctx.context, &ctx.len); if (ret_val != 0) goto list_cb_failure; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_SECCTX, - secctx_len, - secctx); - security_release_secctx(secctx, secctx_len); + ctx.len, + ctx.context); + security_release_secctx(&ctx); if (ret_val != 0) goto list_cb_failure; diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 81635a13987b..f5e7a9919df1 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -84,8 +84,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, struct netlbl_audit *audit_info) { struct audit_buffer *audit_buf; - char *secctx; - u32 secctx_len; + struct lsm_context ctx; if (audit_enabled == AUDIT_OFF) return NULL; @@ -99,10 +98,10 @@ struct audit_buffer *netlbl_audit_start_common(int type, audit_info->sessionid); if (lsmprop_is_set(&audit_info->prop) && - security_lsmprop_to_secctx(&audit_info->prop, &secctx, - &secctx_len) == 0) { - audit_log_format(audit_buf, " subj=%s", secctx); - security_release_secctx(secctx, secctx_len); + security_lsmprop_to_secctx(&audit_info->prop, &ctx.context, + &ctx.len) == 0) { + audit_log_format(audit_buf, " subj=%s", ctx.context); + security_release_secctx(&ctx); } return audit_buf; diff --git a/security/apparmor/include/secid.h b/security/apparmor/include/secid.h index f6a515640950..1bcde2f45f24 100644 --- a/security/apparmor/include/secid.h +++ b/security/apparmor/include/secid.h @@ -29,7 +29,7 @@ int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); -void apparmor_release_secctx(char *secdata, u32 seclen); +void apparmor_release_secctx(struct lsm_context *cp); int aa_alloc_secid(struct aa_label *label, gfp_t gfp); diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index 47dc08fc583e..43e4dab7f6e6 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -106,9 +106,16 @@ int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) return 0; } -void apparmor_release_secctx(char *secdata, u32 seclen) +void apparmor_release_secctx(struct lsm_context *cp) { - kfree(secdata); + /* + * stacking scaffolding: + * When it is possible for more than one LSM to provide a + * release hook, do this check: + * if (cp->id == LSM_ID_APPARMOR || cp->id == LSM_ID_UNDEF) + */ + + kfree(cp->context); } /** diff --git a/security/security.c b/security/security.c index 09664e09fec9..b7f8ec93f6f0 100644 --- a/security/security.c +++ b/security/security.c @@ -4360,14 +4360,14 @@ EXPORT_SYMBOL(security_secctx_to_secid); /** * security_release_secctx() - Free a secctx buffer - * @secdata: secctx - * @seclen: length of secctx + * @cp: the security context * * Release the security context. */ -void security_release_secctx(char *secdata, u32 seclen) +void security_release_secctx(struct lsm_context *cp) { - call_void_hook(release_secctx, secdata, seclen); + call_void_hook(release_secctx, cp); + memset(cp, 0, sizeof(*cp)); } EXPORT_SYMBOL(security_release_secctx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f5a08f94e094..aabc724f4d44 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6657,9 +6657,16 @@ static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) secid, GFP_KERNEL); } -static void selinux_release_secctx(char *secdata, u32 seclen) +static void selinux_release_secctx(struct lsm_context *cp) { - kfree(secdata); + /* + * stacking scaffolding: + * When it is possible for more than one LSM to provide a + * release hook, do this check: + * if (cp->id == LSM_ID_SELINUX || cp->id == LSM_ID_UNDEF) + */ + + kfree(cp->context); } static void selinux_inode_invalidate_secctx(struct inode *inode) -- cgit v1.2.3 From 1995edc5f9089ecb8b77a34f21e4abd8f887b856 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 19:03:54 -0800 Subject: bpf: Consolidate locks and reference state in verifier state Currently, state for RCU read locks and preemption is in bpf_verifier_state, while locks and pointer reference state remains in bpf_func_state. There is no particular reason to keep the latter in bpf_func_state. Additionally, it is copied into a new frame's state and copied back to the caller frame's state everytime the verifier processes a pseudo call instruction. This is a bit wasteful, given this state is global for a given verification state / path. Move all resource and reference related state in bpf_verifier_state structure in this patch, in preparation for introducing new reference state types in the future. Since we switch print_verifier_state and friends to print using vstate, we now need to explicitly pass in the verifier state from the caller along with the bpf_func_state, so modify the prototype and callers to do so. To ensure func state matches the verifier state when we're printing data, take in frame number instead of bpf_func_state pointer instead and avoid inconsistencies induced by the caller. Acked-by: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204030400.208005-2-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 19 +++--- kernel/bpf/log.c | 20 ++++--- kernel/bpf/verifier.c | 140 ++++++++++++++++++++----------------------- 3 files changed, 88 insertions(+), 91 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f4290c179bee..03e351c43fa8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -315,9 +315,6 @@ struct bpf_func_state { u32 callback_depth; /* The following fields should be last. See copy_func_state() */ - int acquired_refs; - int active_locks; - struct bpf_reference_state *refs; /* The state of the stack. Each element of the array describes BPF_REG_SIZE * (i.e. 8) bytes worth of stack memory. * stack[0] represents bytes [*(r10-8)..*(r10-1)] @@ -370,6 +367,8 @@ struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; struct bpf_verifier_state *parent; + /* Acquired reference states */ + struct bpf_reference_state *refs; /* * 'branches' field is the number of branches left to explore: * 0 - all possible paths from this state reached bpf_exit or @@ -419,9 +418,12 @@ struct bpf_verifier_state { u32 insn_idx; u32 curframe; - bool speculative; + u32 acquired_refs; + u32 active_locks; + u32 active_preempt_locks; bool active_rcu_lock; - u32 active_preempt_lock; + + bool speculative; /* If this state was ever pointed-to by other state's loop_entry field * this flag would be set to true. Used to avoid freeing such states * while they are still in use. @@ -979,8 +981,9 @@ const char *dynptr_type_str(enum bpf_dynptr_type type); const char *iter_type_str(const struct btf *btf, u32 btf_id); const char *iter_state_str(enum bpf_iter_state state); -void print_verifier_state(struct bpf_verifier_env *env, - const struct bpf_func_state *state, bool print_all); -void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state); +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno, bool print_all); +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno); #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 4a858fdb6476..2d28ce926053 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -753,9 +753,10 @@ static void print_reg_state(struct bpf_verifier_env *env, verbose(env, ")"); } -void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state, - bool print_all) +void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno, bool print_all) { + const struct bpf_func_state *state = vstate->frame[frameno]; const struct bpf_reg_state *reg; int i; @@ -843,11 +844,11 @@ void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_st break; } } - if (state->acquired_refs && state->refs[0].id) { - verbose(env, " refs=%d", state->refs[0].id); - for (i = 1; i < state->acquired_refs; i++) - if (state->refs[i].id) - verbose(env, ",%d", state->refs[i].id); + if (vstate->acquired_refs && vstate->refs[0].id) { + verbose(env, " refs=%d", vstate->refs[0].id); + for (i = 1; i < vstate->acquired_refs; i++) + if (vstate->refs[i].id) + verbose(env, ",%d", vstate->refs[i].id); } if (state->in_callback_fn) verbose(env, " cb"); @@ -864,7 +865,8 @@ static inline u32 vlog_alignment(u32 pos) BPF_LOG_MIN_ALIGNMENT) - pos - 1; } -void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state *state) +void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, + u32 frameno) { if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) { /* remove new line character */ @@ -873,5 +875,5 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_func_state } else { verbose(env, "%d:", env->insn_idx); } - print_verifier_state(env, state, false); + print_verifier_state(env, vstate, frameno, false); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1c4ebb326785..019c56c782a2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1279,15 +1279,17 @@ out: return arr ? arr : ZERO_SIZE_PTR; } -static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf_verifier_state *src) { dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, sizeof(struct bpf_reference_state), GFP_KERNEL); if (!dst->refs) return -ENOMEM; - dst->active_locks = src->active_locks; dst->acquired_refs = src->acquired_refs; + dst->active_locks = src->active_locks; + dst->active_preempt_locks = src->active_preempt_locks; + dst->active_rcu_lock = src->active_rcu_lock; return 0; } @@ -1304,7 +1306,7 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st return 0; } -static int resize_reference_state(struct bpf_func_state *state, size_t n) +static int resize_reference_state(struct bpf_verifier_state *state, size_t n) { state->refs = realloc_array(state->refs, state->acquired_refs, n, sizeof(struct bpf_reference_state)); @@ -1349,7 +1351,7 @@ static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state */ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; int new_ofs = state->acquired_refs; int id, err; @@ -1367,7 +1369,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum ref_state_type type, int id, void *ptr) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; int new_ofs = state->acquired_refs; int err; @@ -1384,7 +1386,7 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r } /* release function corresponding to acquire_reference_state(). Idempotent. */ -static int release_reference_state(struct bpf_func_state *state, int ptr_id) +static int release_reference_state(struct bpf_verifier_state *state, int ptr_id) { int i, last_idx; @@ -1404,7 +1406,7 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) return -EINVAL; } -static int release_lock_state(struct bpf_func_state *state, int type, int id, void *ptr) +static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr) { int i, last_idx; @@ -1425,10 +1427,9 @@ static int release_lock_state(struct bpf_func_state *state, int type, int id, vo return -EINVAL; } -static struct bpf_reference_state *find_lock_state(struct bpf_verifier_env *env, enum ref_state_type type, +static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, int id, void *ptr) { - struct bpf_func_state *state = cur_func(env); int i; for (i = 0; i < state->acquired_refs; i++) { @@ -1447,7 +1448,6 @@ static void free_func_state(struct bpf_func_state *state) { if (!state) return; - kfree(state->refs); kfree(state->stack); kfree(state); } @@ -1461,6 +1461,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, free_func_state(state->frame[i]); state->frame[i] = NULL; } + kfree(state->refs); if (free_self) kfree(state); } @@ -1471,12 +1472,7 @@ static void free_verifier_state(struct bpf_verifier_state *state, static int copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src) { - int err; - - memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); - err = copy_reference_state(dst, src); - if (err) - return err; + memcpy(dst, src, offsetof(struct bpf_func_state, stack)); return copy_stack_state(dst, src); } @@ -1493,9 +1489,10 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, free_func_state(dst_state->frame[i]); dst_state->frame[i] = NULL; } + err = copy_reference_state(dst_state, src); + if (err) + return err; dst_state->speculative = src->speculative; - dst_state->active_rcu_lock = src->active_rcu_lock; - dst_state->active_preempt_lock = src->active_preempt_lock; dst_state->in_sleepable = src->in_sleepable; dst_state->curframe = src->curframe; dst_state->branches = src->branches; @@ -4499,7 +4496,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno) fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_frame_stack_mask(bt, fr)); verbose(env, "stack=%s: ", env->tmp_str_buf); - print_verifier_state(env, func, true); + print_verifier_state(env, st, fr, true); } } @@ -5496,7 +5493,7 @@ static bool in_sleepable(struct bpf_verifier_env *env) static bool in_rcu_cs(struct bpf_verifier_env *env) { return env->cur_state->active_rcu_lock || - cur_func(env)->active_locks || + env->cur_state->active_locks || !in_sleepable(env); } @@ -7850,15 +7847,15 @@ static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg * Since only one bpf_spin_lock is allowed the checks are simpler than * reg_is_refcounted() logic. The verifier needs to remember only * one spin_lock instead of array of acquired_refs. - * cur_func(env)->active_locks remembers which map value element or allocated + * env->cur_state->active_locks remembers which map value element or allocated * object got locked and clears it after bpf_spin_unlock. */ static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock) { struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_verifier_state *cur = env->cur_state; bool is_const = tnum_is_const(reg->var_off); - struct bpf_func_state *cur = cur_func(env); u64 val = reg->var_off.value; struct bpf_map *map = NULL; struct btf *btf = NULL; @@ -7925,7 +7922,7 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, return -EINVAL; } - if (release_lock_state(cur_func(env), REF_TYPE_LOCK, reg->id, ptr)) { + if (release_lock_state(env->cur_state, REF_TYPE_LOCK, reg->id, ptr)) { verbose(env, "bpf_spin_unlock of different lock\n"); return -EINVAL; } @@ -9679,7 +9676,7 @@ static int release_reference(struct bpf_verifier_env *env, struct bpf_reg_state *reg; int err; - err = release_reference_state(cur_func(env), ref_obj_id); + err = release_reference_state(env->cur_state, ref_obj_id); if (err) return err; @@ -9757,9 +9754,7 @@ static int setup_func_entry(struct bpf_verifier_env *env, int subprog, int calls callsite, state->curframe + 1 /* frameno within this callchain */, subprog /* subprog number within this prog */); - /* Transfer references to the callee */ - err = copy_reference_state(callee, caller); - err = err ?: set_callee_state_cb(env, caller, callee, callsite); + err = set_callee_state_cb(env, caller, callee, callsite); if (err) goto err_out; @@ -9992,14 +9987,14 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, const char *sub_name = subprog_name(env, subprog); /* Only global subprogs cannot be called with a lock held. */ - if (cur_func(env)->active_locks) { + if (env->cur_state->active_locks) { verbose(env, "global function calls are not allowed while holding a lock,\n" "use static function instead\n"); return -EINVAL; } /* Only global subprogs cannot be called with preemption disabled. */ - if (env->cur_state->active_preempt_lock) { + if (env->cur_state->active_preempt_locks) { verbose(env, "global function calls are not allowed with preemption disabled,\n" "use static function instead\n"); return -EINVAL; @@ -10039,9 +10034,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); - print_verifier_state(env, caller, true); + print_verifier_state(env, state, caller->frameno, true); verbose(env, "callee:\n"); - print_verifier_state(env, state->frame[state->curframe], true); + print_verifier_state(env, state, state->curframe, true); } return 0; @@ -10333,11 +10328,6 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller->regs[BPF_REG_0] = *r0; } - /* Transfer references to the caller */ - err = copy_reference_state(caller, callee); - if (err) - return err; - /* for callbacks like bpf_loop or bpf_for_each_map_elem go back to callsite, * there function call logic would reschedule callback visit. If iteration * converges is_state_visited() would prune that visit eventually. @@ -10350,9 +10340,9 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); - print_verifier_state(env, callee, true); + print_verifier_state(env, state, callee->frameno, true); verbose(env, "to caller at %d:\n", *insn_idx); - print_verifier_state(env, caller, true); + print_verifier_state(env, state, caller->frameno, true); } /* clear everything in the callee. In case of exceptional exits using * bpf_throw, this will be done by copy_verifier_state for extra frames. */ @@ -10502,11 +10492,11 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit) { - struct bpf_func_state *state = cur_func(env); + struct bpf_verifier_state *state = env->cur_state; bool refs_lingering = false; int i; - if (!exception_exit && state->frameno) + if (!exception_exit && cur_func(env)->frameno) return 0; for (i = 0; i < state->acquired_refs; i++) { @@ -10523,7 +10513,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit { int err; - if (check_lock && cur_func(env)->active_locks) { + if (check_lock && env->cur_state->active_locks) { verbose(env, "%s cannot be used inside bpf_spin_lock-ed region\n", prefix); return -EINVAL; } @@ -10539,7 +10529,7 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit return -EINVAL; } - if (check_lock && env->cur_state->active_preempt_lock) { + if (check_lock && env->cur_state->active_preempt_locks) { verbose(env, "%s cannot be used inside bpf_preempt_disable-ed region\n", prefix); return -EINVAL; } @@ -10727,7 +10717,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } - if (env->cur_state->active_preempt_lock) { + if (env->cur_state->active_preempt_locks) { if (fn->might_sleep) { verbose(env, "sleepable helper %s#%d in non-preemptible region\n", func_id_name(func_id), func_id); @@ -10784,7 +10774,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn struct bpf_func_state *state; struct bpf_reg_state *reg; - err = release_reference_state(cur_func(env), ref_obj_id); + err = release_reference_state(env->cur_state, ref_obj_id); if (!err) { bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg->ref_obj_id == ref_obj_id) { @@ -11746,7 +11736,7 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state { struct btf_record *rec = reg_btf_record(reg); - if (!cur_func(env)->active_locks) { + if (!env->cur_state->active_locks) { verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n"); return -EFAULT; } @@ -11765,12 +11755,11 @@ static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id) { - struct bpf_func_state *state, *unused; + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *unused; struct bpf_reg_state *reg; int i; - state = cur_func(env); - if (!ref_obj_id) { verbose(env, "verifier internal error: ref_obj_id is zero for " "owning -> non-owning conversion\n"); @@ -11860,9 +11849,9 @@ static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_ } id = reg->id; - if (!cur_func(env)->active_locks) + if (!env->cur_state->active_locks) return -EINVAL; - s = find_lock_state(env, REF_TYPE_LOCK, id, ptr); + s = find_lock_state(env->cur_state, REF_TYPE_LOCK, id, ptr); if (!s) { verbose(env, "held lock and object are not in the same allocation\n"); return -EINVAL; @@ -12789,17 +12778,17 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } - if (env->cur_state->active_preempt_lock) { + if (env->cur_state->active_preempt_locks) { if (preempt_disable) { - env->cur_state->active_preempt_lock++; + env->cur_state->active_preempt_locks++; } else if (preempt_enable) { - env->cur_state->active_preempt_lock--; + env->cur_state->active_preempt_locks--; } else if (sleepable) { verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name); return -EACCES; } } else if (preempt_disable) { - env->cur_state->active_preempt_lock++; + env->cur_state->active_preempt_locks++; } else if (preempt_enable) { verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name); return -EINVAL; @@ -14495,12 +14484,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, state, true); + print_verifier_state(env, vstate, vstate->curframe, true); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@ -15398,7 +15387,7 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, * No one could have freed the reference state before * doing the NULL check. */ - WARN_ON_ONCE(release_reference_state(state, id)); + WARN_ON_ONCE(release_reference_state(vstate, id)); bpf_for_each_reg_in_vstate(vstate, state, reg, ({ mark_ptr_or_null_reg(state, reg, id, is_null); @@ -15708,7 +15697,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx)) return -EFAULT; if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); *insn_idx += insn->off; return 0; } else if (pred == 0) { @@ -15722,7 +15711,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, *insn_idx)) return -EFAULT; if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); return 0; } @@ -15839,7 +15828,7 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, return -EACCES; } if (env->log.level & BPF_LOG_LEVEL) - print_insn_state(env, this_branch->frame[this_branch->curframe]); + print_insn_state(env, this_branch, this_branch->curframe); return 0; } @@ -17750,7 +17739,7 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, return true; } -static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, +static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *cur, struct bpf_idmap *idmap) { int i; @@ -17758,6 +17747,15 @@ static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur, if (old->acquired_refs != cur->acquired_refs) return false; + if (old->active_locks != cur->active_locks) + return false; + + if (old->active_preempt_locks != cur->active_preempt_locks) + return false; + + if (old->active_rcu_lock != cur->active_rcu_lock) + return false; + for (i = 0; i < old->acquired_refs; i++) { if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || old->refs[i].type != cur->refs[i].type) @@ -17820,9 +17818,6 @@ static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_stat if (!stacksafe(env, old, cur, &env->idmap_scratch, exact)) return false; - if (!refsafe(old, cur, &env->idmap_scratch)) - return false; - return true; } @@ -17850,13 +17845,10 @@ static bool states_equal(struct bpf_verifier_env *env, if (old->speculative && !cur->speculative) return false; - if (old->active_rcu_lock != cur->active_rcu_lock) - return false; - - if (old->active_preempt_lock != cur->active_preempt_lock) + if (old->in_sleepable != cur->in_sleepable) return false; - if (old->in_sleepable != cur->in_sleepable) + if (!refsafe(old, cur, &env->idmap_scratch)) return false; /* for states to be equal callsites have to be the same @@ -18249,9 +18241,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) verbose_linfo(env, insn_idx, "; "); verbose(env, "infinite loop detected at insn %d\n", insn_idx); verbose(env, "cur state:"); - print_verifier_state(env, cur->frame[cur->curframe], true); + print_verifier_state(env, cur, cur->curframe, true); verbose(env, "old state:"); - print_verifier_state(env, sl->state.frame[cur->curframe], true); + print_verifier_state(env, &sl->state, cur->curframe, true); return -EINVAL; } /* if the verifier is processing a loop, avoid adding new state @@ -18607,7 +18599,7 @@ static int do_check(struct bpf_verifier_env *env) env->prev_insn_idx, env->insn_idx, env->cur_state->speculative ? " (speculative execution)" : ""); - print_verifier_state(env, state->frame[state->curframe], true); + print_verifier_state(env, state, state->curframe, true); do_print_state = false; } @@ -18619,7 +18611,7 @@ static int do_check(struct bpf_verifier_env *env) }; if (verifier_state_scratched(env)) - print_insn_state(env, state->frame[state->curframe]); + print_insn_state(env, state, state->curframe); verbose_linfo(env, env->insn_idx, "; "); env->prev_log_pos = env->log.end_pos; @@ -18751,7 +18743,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - if (cur_func(env)->active_locks) { + if (env->cur_state->active_locks) { if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) || (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) { -- cgit v1.2.3 From c8e2ee1f3df05dc4caa746c062c6b5791c745c79 Mon Sep 17 00:00:00 2001 From: Kumar Kartikeya Dwivedi Date: Tue, 3 Dec 2024 19:03:57 -0800 Subject: bpf: Introduce support for bpf_local_irq_{save,restore} Teach the verifier about IRQ-disabled sections through the introduction of two new kfuncs, bpf_local_irq_save, to save IRQ state and disable them, and bpf_local_irq_restore, to restore IRQ state and enable them back again. For the purposes of tracking the saved IRQ state, the verifier is taught about a new special object on the stack of type STACK_IRQ_FLAG. This is a 8 byte value which saves the IRQ flags which are to be passed back to the IRQ restore kfunc. Renumber the enums for REF_TYPE_* to simplify the check in find_lock_state, filtering out non-lock types as they grow will become cumbersome and is unecessary. To track a dynamic number of IRQ-disabled regions and their associated saved states, a new resource type RES_TYPE_IRQ is introduced, which its state management functions: acquire_irq_state and release_irq_state, taking advantage of the refactoring and clean ups made in earlier commits. One notable requirement of the kernel's IRQ save and restore API is that they cannot happen out of order. For this purpose, when releasing reference we keep track of the prev_id we saw with REF_TYPE_IRQ. Since reference states are inserted in increasing order of the index, this is used to remember the ordering of acquisitions of IRQ saved states, so that we maintain a logical stack in acquisition order of resource identities, and can enforce LIFO ordering when restoring IRQ state. The top of the stack is maintained using bpf_verifier_state's active_irq_id. To maintain the stack property when releasing reference states, we need to modify release_reference_state to instead shift the remaining array left using memmove instead of swapping deleted element with last that might break the ordering. A selftest to test this subtle behavior is added in late patches. The logic to detect initialized and unitialized irq flag slots, marking and unmarking is similar to how it's done for iterators. No additional checks are needed in refsafe for REF_TYPE_IRQ, apart from the usual check_id satisfiability check on the ref[i].id. We have to perform the same check_ids check on state->active_irq_id as well. To ensure we don't get assigned REF_TYPE_PTR by default after acquire_reference_state, if someone forgets to assign the type, let's also renumber the enum ref_state_type. This way any unassigned types get caught by refsafe's default switch statement, don't assume REF_TYPE_PTR by default. The kfuncs themselves are plain wrappers over local_irq_save and local_irq_restore macros. Acked-by: Eduard Zingerman Signed-off-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/r/20241204030400.208005-5-memxor@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 7 +- kernel/bpf/helpers.c | 17 +++ kernel/bpf/log.c | 1 + kernel/bpf/verifier.c | 299 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 320 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 03e351c43fa8..de09ac3067ae 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -233,6 +233,7 @@ enum bpf_stack_slot_type { */ STACK_DYNPTR, STACK_ITER, + STACK_IRQ_FLAG, }; #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ @@ -254,8 +255,9 @@ struct bpf_reference_state { * default to pointer reference on zero initialization of a state. */ enum ref_state_type { - REF_TYPE_PTR = 0, - REF_TYPE_LOCK, + REF_TYPE_PTR = 1, + REF_TYPE_IRQ = 2, + REF_TYPE_LOCK = 3, } type; /* Track each reference created with a unique id, even if the same * instruction creates the reference multiple times (eg, via CALL). @@ -421,6 +423,7 @@ struct bpf_verifier_state { u32 acquired_refs; u32 active_locks; u32 active_preempt_locks; + u32 active_irq_id; bool active_rcu_lock; bool speculative; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 751c150f9e1c..532ea74d4850 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -3057,6 +3057,21 @@ __bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void __user return ret + 1; } +/* Keep unsinged long in prototype so that kfunc is usable when emitted to + * vmlinux.h in BPF programs directly, but note that while in BPF prog, the + * unsigned long always points to 8-byte region on stack, the kernel may only + * read and write the 4-bytes on 32-bit. + */ +__bpf_kfunc void bpf_local_irq_save(unsigned long *flags__irq_flag) +{ + local_irq_save(*flags__irq_flag); +} + +__bpf_kfunc void bpf_local_irq_restore(unsigned long *flags__irq_flag) +{ + local_irq_restore(*flags__irq_flag); +} + __bpf_kfunc_end_defs(); BTF_KFUNCS_START(generic_btf_ids) @@ -3149,6 +3164,8 @@ BTF_ID_FLAGS(func, bpf_get_kmem_cache) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_new, KF_ITER_NEW | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_next, KF_ITER_NEXT | KF_RET_NULL | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_iter_kmem_cache_destroy, KF_ITER_DESTROY | KF_SLEEPABLE) +BTF_ID_FLAGS(func, bpf_local_irq_save) +BTF_ID_FLAGS(func, bpf_local_irq_restore) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/log.c b/kernel/bpf/log.c index 2d28ce926053..38050f4ee400 100644 --- a/kernel/bpf/log.c +++ b/kernel/bpf/log.c @@ -537,6 +537,7 @@ static char slot_type_char[] = { [STACK_ZERO] = '0', [STACK_DYNPTR] = 'd', [STACK_ITER] = 'i', + [STACK_IRQ_FLAG] = 'f' }; static void print_liveness(struct bpf_verifier_env *env, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b4a486abe134..b23f6fddf3af 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -661,6 +661,11 @@ static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, return stack_slot_obj_get_spi(env, reg, "iter", nr_slots); } +static int irq_flag_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + return stack_slot_obj_get_spi(env, reg, "irq_flag", 1); +} + static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type) { switch (arg_type & DYNPTR_TYPE_FLAG_MASK) { @@ -1156,10 +1161,136 @@ static int is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_s return 0; } +static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx); +static int release_irq_state(struct bpf_verifier_state *state, int id); + +static int mark_stack_slot_irq_flag(struct bpf_verifier_env *env, + struct bpf_kfunc_call_arg_meta *meta, + struct bpf_reg_state *reg, int insn_idx) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i, id; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + + id = acquire_irq_state(env, insn_idx); + if (id < 0) + return id; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + __mark_reg_known_zero(st); + st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + st->live |= REG_LIVE_WRITTEN; + st->ref_obj_id = id; + + for (i = 0; i < BPF_REG_SIZE; i++) + slot->slot_type[i] = STACK_IRQ_FLAG; + + mark_stack_slot_scratched(env, spi); + return 0; +} + +static int unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i, err; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + err = release_irq_state(env->cur_state, st->ref_obj_id); + WARN_ON_ONCE(err && err != -EACCES); + if (err) { + int insn_idx = 0; + + for (int i = 0; i < env->cur_state->acquired_refs; i++) { + if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) { + insn_idx = env->cur_state->refs[i].insn_idx; + break; + } + } + + verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n", + env->cur_state->active_irq_id, insn_idx); + return err; + } + + __mark_reg_not_init(env, st); + + /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */ + st->live |= REG_LIVE_WRITTEN; + + for (i = 0; i < BPF_REG_SIZE; i++) + slot->slot_type[i] = STACK_INVALID; + + mark_stack_slot_scratched(env, spi); + return 0; +} + +static bool is_irq_flag_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + int spi, i; + + /* For -ERANGE (i.e. spi not falling into allocated stack slots), we + * will do check_mem_access to check and update stack bounds later, so + * return true for that case. + */ + spi = irq_flag_get_spi(env, reg); + if (spi == -ERANGE) + return true; + if (spi < 0) + return false; + + slot = &state->stack[spi]; + + for (i = 0; i < BPF_REG_SIZE; i++) + if (slot->slot_type[i] == STACK_IRQ_FLAG) + return false; + return true; +} + +static int is_irq_flag_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + struct bpf_func_state *state = func(env, reg); + struct bpf_stack_state *slot; + struct bpf_reg_state *st; + int spi, i; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return -EINVAL; + + slot = &state->stack[spi]; + st = &slot->spilled_ptr; + + if (!st->ref_obj_id) + return -EINVAL; + + for (i = 0; i < BPF_REG_SIZE; i++) + if (slot->slot_type[i] != STACK_IRQ_FLAG) + return -EINVAL; + return 0; +} + /* Check if given stack slot is "special": * - spilled register state (STACK_SPILL); * - dynptr state (STACK_DYNPTR); * - iter state (STACK_ITER). + * - irq flag state (STACK_IRQ_FLAG) */ static bool is_stack_slot_special(const struct bpf_stack_state *stack) { @@ -1169,6 +1300,7 @@ static bool is_stack_slot_special(const struct bpf_stack_state *stack) case STACK_SPILL: case STACK_DYNPTR: case STACK_ITER: + case STACK_IRQ_FLAG: return true; case STACK_INVALID: case STACK_MISC: @@ -1291,6 +1423,7 @@ static int copy_reference_state(struct bpf_verifier_state *dst, const struct bpf dst->active_locks = src->active_locks; dst->active_preempt_locks = src->active_preempt_locks; dst->active_rcu_lock = src->active_rcu_lock; + dst->active_irq_id = src->active_irq_id; return 0; } @@ -1391,13 +1524,35 @@ static int acquire_lock_state(struct bpf_verifier_env *env, int insn_idx, enum r return 0; } +static int acquire_irq_state(struct bpf_verifier_env *env, int insn_idx) +{ + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reference_state *s; + + s = acquire_reference_state(env, insn_idx); + if (!s) + return -ENOMEM; + s->type = REF_TYPE_IRQ; + s->id = ++env->id_gen; + + state->active_irq_id = s->id; + return s->id; +} + static void release_reference_state(struct bpf_verifier_state *state, int idx) { int last_idx; + size_t rem; + /* IRQ state requires the relative ordering of elements remaining the + * same, since it relies on the refs array to behave as a stack, so that + * it can detect out-of-order IRQ restore. Hence use memmove to shift + * the array instead of swapping the final element into the deleted idx. + */ last_idx = state->acquired_refs - 1; + rem = state->acquired_refs - idx - 1; if (last_idx && idx != last_idx) - memcpy(&state->refs[idx], &state->refs[last_idx], sizeof(*state->refs)); + memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem); memset(&state->refs[last_idx], 0, sizeof(*state->refs)); state->acquired_refs--; return; @@ -1419,6 +1574,28 @@ static int release_lock_state(struct bpf_verifier_state *state, int type, int id return -EINVAL; } +static int release_irq_state(struct bpf_verifier_state *state, int id) +{ + u32 prev_id = 0; + int i; + + if (id != state->active_irq_id) + return -EACCES; + + for (i = 0; i < state->acquired_refs; i++) { + if (state->refs[i].type != REF_TYPE_IRQ) + continue; + if (state->refs[i].id == id) { + release_reference_state(state, i); + state->active_irq_id = prev_id; + return 0; + } else { + prev_id = state->refs[i].id; + } + } + return -EINVAL; +} + static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, int id, void *ptr) { @@ -1427,7 +1604,7 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st for (i = 0; i < state->acquired_refs; i++) { struct bpf_reference_state *s = &state->refs[i]; - if (s->type == REF_TYPE_PTR || s->type != type) + if (s->type != type) continue; if (s->id == id && s->ptr == ptr) @@ -3235,6 +3412,16 @@ static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *re return mark_stack_slot_obj_read(env, reg, spi, nr_slots); } +static int mark_irq_flag_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg) +{ + int spi; + + spi = irq_flag_get_spi(env, reg); + if (spi < 0) + return spi; + return mark_stack_slot_obj_read(env, reg, spi, 1); +} + /* This function is supposed to be used by the following 32-bit optimization * code only. It returns TRUE if the source or destination register operates * on 64-bit, otherwise return FALSE. @@ -10008,6 +10195,12 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } + if (env->cur_state->active_irq_id) { + verbose(env, "global function calls are not allowed with IRQs disabled,\n" + "use static function instead\n"); + return -EINVAL; + } + if (err) { verbose(env, "Caller passes invalid args into func#%d ('%s')\n", subprog, sub_name); @@ -10532,6 +10725,11 @@ static int check_resource_leak(struct bpf_verifier_env *env, bool exception_exit return err; } + if (check_lock && env->cur_state->active_irq_id) { + verbose(env, "%s cannot be used inside bpf_local_irq_save-ed region\n", prefix); + return -EINVAL; + } + if (check_lock && env->cur_state->active_rcu_lock) { verbose(env, "%s cannot be used inside bpf_rcu_read_lock-ed region\n", prefix); return -EINVAL; @@ -10736,6 +10934,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } + if (env->cur_state->active_irq_id) { + if (fn->might_sleep) { + verbose(env, "sleepable helper %s#%d in IRQ-disabled region\n", + func_id_name(func_id), func_id); + return -EINVAL; + } + + if (in_sleepable(env) && is_storage_get_function(func_id)) + env->insn_aux_data[insn_idx].storage_get_func_atomic = true; + } + meta.func_id = func_id; /* check args */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { @@ -11297,6 +11506,11 @@ static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param return btf_param_match_suffix(btf, arg, "__str"); } +static bool is_kfunc_arg_irq_flag(const struct btf *btf, const struct btf_param *arg) +{ + return btf_param_match_suffix(btf, arg, "__irq_flag"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -11450,6 +11664,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_CONST_STR, KF_ARG_PTR_TO_MAP, KF_ARG_PTR_TO_WORKQUEUE, + KF_ARG_PTR_TO_IRQ_FLAG, }; enum special_kfunc_type { @@ -11481,6 +11696,8 @@ enum special_kfunc_type { KF_bpf_iter_css_task_new, KF_bpf_session_cookie, KF_bpf_get_kmem_cache, + KF_bpf_local_irq_save, + KF_bpf_local_irq_restore, }; BTF_SET_START(special_kfunc_set) @@ -11547,6 +11764,8 @@ BTF_ID(func, bpf_session_cookie) BTF_ID_UNUSED #endif BTF_ID(func, bpf_get_kmem_cache) +BTF_ID(func, bpf_local_irq_save) +BTF_ID(func, bpf_local_irq_restore) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -11637,6 +11856,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_wq(meta->btf, &args[argno])) return KF_ARG_PTR_TO_WORKQUEUE; + if (is_kfunc_arg_irq_flag(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_IRQ_FLAG; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11740,6 +11962,54 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env, return 0; } +static int process_irq_flag(struct bpf_verifier_env *env, int regno, + struct bpf_kfunc_call_arg_meta *meta) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + bool irq_save; + int err; + + if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_save]) { + irq_save = true; + } else if (meta->func_id == special_kfunc_list[KF_bpf_local_irq_restore]) { + irq_save = false; + } else { + verbose(env, "verifier internal error: unknown irq flags kfunc\n"); + return -EFAULT; + } + + if (irq_save) { + if (!is_irq_flag_reg_valid_uninit(env, reg)) { + verbose(env, "expected uninitialized irq flag as arg#%d\n", regno - 1); + return -EINVAL; + } + + err = check_mem_access(env, env->insn_idx, regno, 0, BPF_DW, BPF_WRITE, -1, false, false); + if (err) + return err; + + err = mark_stack_slot_irq_flag(env, meta, reg, env->insn_idx); + if (err) + return err; + } else { + err = is_irq_flag_reg_valid_init(env, reg); + if (err) { + verbose(env, "expected an initialized irq flag as arg#%d\n", regno - 1); + return err; + } + + err = mark_irq_flag_read(env, reg); + if (err) + return err; + + err = unmark_stack_slot_irq_flag(env, reg); + if (err) + return err; + } + return 0; +} + + static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg) { struct btf_record *rec = reg_btf_record(reg); @@ -12328,6 +12598,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_REFCOUNTED_KPTR: case KF_ARG_PTR_TO_CONST_STR: case KF_ARG_PTR_TO_WORKQUEUE: + case KF_ARG_PTR_TO_IRQ_FLAG: break; default: WARN_ON_ONCE(1); @@ -12622,6 +12893,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ if (ret < 0) return ret; break; + case KF_ARG_PTR_TO_IRQ_FLAG: + if (reg->type != PTR_TO_STACK) { + verbose(env, "arg#%d doesn't point to an irq flag on stack\n", i); + return -EINVAL; + } + ret = process_irq_flag(env, regno, meta); + if (ret < 0) + return ret; + break; } } @@ -12802,6 +13082,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn, return -EINVAL; } + if (env->cur_state->active_irq_id && sleepable) { + verbose(env, "kernel func %s is sleepable within IRQ-disabled region\n", func_name); + return -EACCES; + } + /* In case of release function, we get register number of refcounted * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now. */ @@ -17735,6 +18020,12 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) return false; break; + case STACK_IRQ_FLAG: + old_reg = &old->stack[spi].spilled_ptr; + cur_reg = &cur->stack[spi].spilled_ptr; + if (!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap)) + return false; + break; case STACK_MISC: case STACK_ZERO: case STACK_INVALID: @@ -17764,12 +18055,16 @@ static bool refsafe(struct bpf_verifier_state *old, struct bpf_verifier_state *c if (old->active_rcu_lock != cur->active_rcu_lock) return false; + if (!check_ids(old->active_irq_id, cur->active_irq_id, idmap)) + return false; + for (i = 0; i < old->acquired_refs; i++) { if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap) || old->refs[i].type != cur->refs[i].type) return false; switch (old->refs[i].type) { case REF_TYPE_PTR: + case REF_TYPE_IRQ: break; case REF_TYPE_LOCK: if (old->refs[i].ptr != cur->refs[i].ptr) -- cgit v1.2.3 From cdb03e598750e7ebc222571aa96653e9b5a59dbe Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 10 Nov 2024 23:33:24 +0100 Subject: scsi: bsg: Replace zero-length array with flexible array member Replace the deprecated zero-length array with a modern flexible array member in the struct iscsi_bsg_host_vendor_reply. Link: https://github.com/KSPP/linux/issues/78 Signed-off-by: Thorsten Blum Link: https://lore.kernel.org/r/20241110223323.42772-2-thorsten.blum@linux.dev Signed-off-by: Martin K. Petersen --- include/scsi/scsi_bsg_iscsi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_bsg_iscsi.h b/include/scsi/scsi_bsg_iscsi.h index 9b1f0f424a79..a569c35b258d 100644 --- a/include/scsi/scsi_bsg_iscsi.h +++ b/include/scsi/scsi_bsg_iscsi.h @@ -59,7 +59,7 @@ struct iscsi_bsg_host_vendor { */ struct iscsi_bsg_host_vendor_reply { /* start of vendor response area */ - uint32_t vendor_rsp[0]; + DECLARE_FLEX_ARRAY(uint32_t, vendor_rsp); }; -- cgit v1.2.3 From 209f4e43b8068c24cde227f464111030430153fa Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 24 Nov 2024 09:08:07 +0200 Subject: scsi: ufs: core: Introduce a new clock_gating lock Introduce a new clock gating lock to serialize access to some of the clock gating members instead of the host_lock. While at it, simplify the code with the guard() macro and co for automatic cleanup of the new lock. There are some explicit spin_lock_irqsave()/spin_unlock_irqrestore() snaking instances I left behind because I couldn't make heads or tails of it. Additionally, move the trace_ufshcd_clk_gating() call from inside the region protected by the lock as it doesn't needs protection. Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241124070808.194860-4-avri.altman@wdc.com Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 109 ++++++++++++++++++++++------------------------ include/ufs/ufshcd.h | 9 +++- 2 files changed, 59 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 939030672c84..9288575f94a2 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1816,19 +1816,16 @@ static void ufshcd_exit_clk_scaling(struct ufs_hba *hba) static void ufshcd_ungate_work(struct work_struct *work) { int ret; - unsigned long flags; struct ufs_hba *hba = container_of(work, struct ufs_hba, clk_gating.ungate_work); cancel_delayed_work_sync(&hba->clk_gating.gate_work); - spin_lock_irqsave(hba->host->host_lock, flags); - if (hba->clk_gating.state == CLKS_ON) { - spin_unlock_irqrestore(hba->host->host_lock, flags); - return; + scoped_guard(spinlock_irqsave, &hba->clk_gating.lock) { + if (hba->clk_gating.state == CLKS_ON) + return; } - spin_unlock_irqrestore(hba->host->host_lock, flags); ufshcd_hba_vreg_set_hpm(hba); ufshcd_setup_clocks(hba, true); @@ -1863,7 +1860,7 @@ void ufshcd_hold(struct ufs_hba *hba) if (!ufshcd_is_clkgating_allowed(hba) || !hba->clk_gating.is_initialized) return; - spin_lock_irqsave(hba->host->host_lock, flags); + spin_lock_irqsave(&hba->clk_gating.lock, flags); hba->clk_gating.active_reqs++; start: @@ -1879,11 +1876,11 @@ start: */ if (ufshcd_can_hibern8_during_gating(hba) && ufshcd_is_link_hibern8(hba)) { - spin_unlock_irqrestore(hba->host->host_lock, flags); + spin_unlock_irqrestore(&hba->clk_gating.lock, flags); flush_result = flush_work(&hba->clk_gating.ungate_work); if (hba->clk_gating.is_suspended && !flush_result) return; - spin_lock_irqsave(hba->host->host_lock, flags); + spin_lock_irqsave(&hba->clk_gating.lock, flags); goto start; } break; @@ -1912,17 +1909,17 @@ start: */ fallthrough; case REQ_CLKS_ON: - spin_unlock_irqrestore(hba->host->host_lock, flags); + spin_unlock_irqrestore(&hba->clk_gating.lock, flags); flush_work(&hba->clk_gating.ungate_work); /* Make sure state is CLKS_ON before returning */ - spin_lock_irqsave(hba->host->host_lock, flags); + spin_lock_irqsave(&hba->clk_gating.lock, flags); goto start; default: dev_err(hba->dev, "%s: clk gating is in invalid state %d\n", __func__, hba->clk_gating.state); break; } - spin_unlock_irqrestore(hba->host->host_lock, flags); + spin_unlock_irqrestore(&hba->clk_gating.lock, flags); } EXPORT_SYMBOL_GPL(ufshcd_hold); @@ -1930,30 +1927,32 @@ static void ufshcd_gate_work(struct work_struct *work) { struct ufs_hba *hba = container_of(work, struct ufs_hba, clk_gating.gate_work.work); - unsigned long flags; int ret; - spin_lock_irqsave(hba->host->host_lock, flags); - /* - * In case you are here to cancel this work the gating state - * would be marked as REQ_CLKS_ON. In this case save time by - * skipping the gating work and exit after changing the clock - * state to CLKS_ON. - */ - if (hba->clk_gating.is_suspended || - (hba->clk_gating.state != REQ_CLKS_OFF)) { - hba->clk_gating.state = CLKS_ON; - trace_ufshcd_clk_gating(dev_name(hba->dev), - hba->clk_gating.state); - goto rel_lock; - } + scoped_guard(spinlock_irqsave, &hba->clk_gating.lock) { + /* + * In case you are here to cancel this work the gating state + * would be marked as REQ_CLKS_ON. In this case save time by + * skipping the gating work and exit after changing the clock + * state to CLKS_ON. + */ + if (hba->clk_gating.is_suspended || + hba->clk_gating.state != REQ_CLKS_OFF) { + hba->clk_gating.state = CLKS_ON; + trace_ufshcd_clk_gating(dev_name(hba->dev), + hba->clk_gating.state); + return; + } - if (ufshcd_is_ufs_dev_busy(hba) || - hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL || - hba->clk_gating.active_reqs) - goto rel_lock; + if (hba->clk_gating.active_reqs) + return; + } - spin_unlock_irqrestore(hba->host->host_lock, flags); + scoped_guard(spinlock_irqsave, hba->host->host_lock) { + if (ufshcd_is_ufs_dev_busy(hba) || + hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) + return; + } /* put the link into hibern8 mode before turning off clocks */ if (ufshcd_can_hibern8_during_gating(hba)) { @@ -1964,7 +1963,7 @@ static void ufshcd_gate_work(struct work_struct *work) __func__, ret); trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); - goto out; + return; } ufshcd_set_link_hibern8(hba); } @@ -1984,32 +1983,34 @@ static void ufshcd_gate_work(struct work_struct *work) * prevent from doing cancel work multiple times when there are * new requests arriving before the current cancel work is done. */ - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_gating.lock); if (hba->clk_gating.state == REQ_CLKS_OFF) { hba->clk_gating.state = CLKS_OFF; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); } -rel_lock: - spin_unlock_irqrestore(hba->host->host_lock, flags); -out: - return; } -/* host lock must be held before calling this variant */ static void __ufshcd_release(struct ufs_hba *hba) { + lockdep_assert_held(&hba->clk_gating.lock); + if (!ufshcd_is_clkgating_allowed(hba)) return; hba->clk_gating.active_reqs--; if (hba->clk_gating.active_reqs || hba->clk_gating.is_suspended || - hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL || - ufshcd_has_pending_tasks(hba) || !hba->clk_gating.is_initialized || + !hba->clk_gating.is_initialized || hba->clk_gating.state == CLKS_OFF) return; + scoped_guard(spinlock_irqsave, hba->host->host_lock) { + if (ufshcd_has_pending_tasks(hba) || + hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) + return; + } + hba->clk_gating.state = REQ_CLKS_OFF; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); queue_delayed_work(hba->clk_gating.clk_gating_workq, @@ -2019,11 +2020,8 @@ static void __ufshcd_release(struct ufs_hba *hba) void ufshcd_release(struct ufs_hba *hba) { - unsigned long flags; - - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_gating.lock); __ufshcd_release(hba); - spin_unlock_irqrestore(hba->host->host_lock, flags); } EXPORT_SYMBOL_GPL(ufshcd_release); @@ -2038,11 +2036,9 @@ static ssize_t ufshcd_clkgate_delay_show(struct device *dev, void ufshcd_clkgate_delay_set(struct device *dev, unsigned long value) { struct ufs_hba *hba = dev_get_drvdata(dev); - unsigned long flags; - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_gating.lock); hba->clk_gating.delay_ms = value; - spin_unlock_irqrestore(hba->host->host_lock, flags); } EXPORT_SYMBOL_GPL(ufshcd_clkgate_delay_set); @@ -2070,7 +2066,6 @@ static ssize_t ufshcd_clkgate_enable_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct ufs_hba *hba = dev_get_drvdata(dev); - unsigned long flags; u32 value; if (kstrtou32(buf, 0, &value)) @@ -2078,9 +2073,10 @@ static ssize_t ufshcd_clkgate_enable_store(struct device *dev, value = !!value; - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_gating.lock); + if (value == hba->clk_gating.is_enabled) - goto out; + return count; if (value) __ufshcd_release(hba); @@ -2088,8 +2084,7 @@ static ssize_t ufshcd_clkgate_enable_store(struct device *dev, hba->clk_gating.active_reqs++; hba->clk_gating.is_enabled = value; -out: - spin_unlock_irqrestore(hba->host->host_lock, flags); + return count; } @@ -2131,6 +2126,8 @@ static void ufshcd_init_clk_gating(struct ufs_hba *hba) INIT_DELAYED_WORK(&hba->clk_gating.gate_work, ufshcd_gate_work); INIT_WORK(&hba->clk_gating.ungate_work, ufshcd_ungate_work); + spin_lock_init(&hba->clk_gating.lock); + hba->clk_gating.clk_gating_workq = alloc_ordered_workqueue( "ufs_clk_gating_%d", WQ_MEM_RECLAIM | WQ_HIGHPRI, hba->host->host_no); @@ -9126,7 +9123,6 @@ static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on) int ret = 0; struct ufs_clk_info *clki; struct list_head *head = &hba->clk_list_head; - unsigned long flags; ktime_t start = ktime_get(); bool clk_state_changed = false; @@ -9177,11 +9173,10 @@ out: clk_disable_unprepare(clki->clk); } } else if (!ret && on) { - spin_lock_irqsave(hba->host->host_lock, flags); - hba->clk_gating.state = CLKS_ON; + scoped_guard(spinlock_irqsave, &hba->clk_gating.lock) + hba->clk_gating.state = CLKS_ON; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); - spin_unlock_irqrestore(hba->host->host_lock, flags); } if (clk_state_changed) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index d7aca9e61684..b6311069d901 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -403,6 +403,9 @@ enum clk_gating_state { * delay_ms * @ungate_work: worker to turn on clocks that will be used in case of * interrupt context + * @clk_gating_workq: workqueue for clock gating work. + * @lock: serialize access to some struct ufs_clk_gating members. An outer lock + * relative to the host lock * @state: the current clocks state * @delay_ms: gating delay in ms * @is_suspended: clk gating is suspended when set to 1 which can be used @@ -413,11 +416,14 @@ enum clk_gating_state { * @is_initialized: Indicates whether clock gating is initialized or not * @active_reqs: number of requests that are pending and should be waited for * completion before gating clocks. - * @clk_gating_workq: workqueue for clock gating work. */ struct ufs_clk_gating { struct delayed_work gate_work; struct work_struct ungate_work; + struct workqueue_struct *clk_gating_workq; + + spinlock_t lock; + enum clk_gating_state state; unsigned long delay_ms; bool is_suspended; @@ -426,7 +432,6 @@ struct ufs_clk_gating { bool is_enabled; bool is_initialized; int active_reqs; - struct workqueue_struct *clk_gating_workq; }; /** -- cgit v1.2.3 From be769e5cf53b8a45eedcc7354bacf939ae16f72c Mon Sep 17 00:00:00 2001 From: Avri Altman Date: Sun, 24 Nov 2024 09:08:08 +0200 Subject: scsi: ufs: core: Introduce a new clock_scaling lock Introduce a new clock scaling lock to serialize access to some of the clock scaling members instead of the host_lock. here also, simplify the code with the guard() macro and co. Reviewed-by: Bart Van Assche Signed-off-by: Avri Altman Link: https://lore.kernel.org/r/20241124070808.194860-5-avri.altman@wdc.com Signed-off-by: Martin K. Petersen --- drivers/ufs/core/ufshcd.c | 132 +++++++++++++++++++++------------------------- include/ufs/ufshcd.h | 16 +++--- 2 files changed, 71 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 9288575f94a2..bd2cc2fdb5ba 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1452,16 +1452,16 @@ static void ufshcd_clk_scaling_suspend_work(struct work_struct *work) { struct ufs_hba *hba = container_of(work, struct ufs_hba, clk_scaling.suspend_work); - unsigned long irq_flags; - spin_lock_irqsave(hba->host->host_lock, irq_flags); - if (hba->clk_scaling.active_reqs || hba->clk_scaling.is_suspended) { - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); - return; + scoped_guard(spinlock_irqsave, &hba->clk_scaling.lock) + { + if (hba->clk_scaling.active_reqs || + hba->clk_scaling.is_suspended) + return; + + hba->clk_scaling.is_suspended = true; + hba->clk_scaling.window_start_t = 0; } - hba->clk_scaling.is_suspended = true; - hba->clk_scaling.window_start_t = 0; - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); devfreq_suspend_device(hba->devfreq); } @@ -1470,15 +1470,13 @@ static void ufshcd_clk_scaling_resume_work(struct work_struct *work) { struct ufs_hba *hba = container_of(work, struct ufs_hba, clk_scaling.resume_work); - unsigned long irq_flags; - spin_lock_irqsave(hba->host->host_lock, irq_flags); - if (!hba->clk_scaling.is_suspended) { - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); - return; + scoped_guard(spinlock_irqsave, &hba->clk_scaling.lock) + { + if (!hba->clk_scaling.is_suspended) + return; + hba->clk_scaling.is_suspended = false; } - hba->clk_scaling.is_suspended = false; - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); devfreq_resume_device(hba->devfreq); } @@ -1492,7 +1490,6 @@ static int ufshcd_devfreq_target(struct device *dev, bool scale_up = false, sched_clk_scaling_suspend_work = false; struct list_head *clk_list = &hba->clk_list_head; struct ufs_clk_info *clki; - unsigned long irq_flags; if (!ufshcd_is_clkscaling_supported(hba)) return -EINVAL; @@ -1513,43 +1510,38 @@ static int ufshcd_devfreq_target(struct device *dev, *freq = (unsigned long) clk_round_rate(clki->clk, *freq); } - spin_lock_irqsave(hba->host->host_lock, irq_flags); - if (ufshcd_eh_in_progress(hba)) { - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); - return 0; - } + scoped_guard(spinlock_irqsave, &hba->clk_scaling.lock) + { + if (ufshcd_eh_in_progress(hba)) + return 0; - /* Skip scaling clock when clock scaling is suspended */ - if (hba->clk_scaling.is_suspended) { - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); - dev_warn(hba->dev, "clock scaling is suspended, skip"); - return 0; - } + /* Skip scaling clock when clock scaling is suspended */ + if (hba->clk_scaling.is_suspended) { + dev_warn(hba->dev, "clock scaling is suspended, skip"); + return 0; + } - if (!hba->clk_scaling.active_reqs) - sched_clk_scaling_suspend_work = true; + if (!hba->clk_scaling.active_reqs) + sched_clk_scaling_suspend_work = true; - if (list_empty(clk_list)) { - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); - goto out; - } + if (list_empty(clk_list)) + goto out; - /* Decide based on the target or rounded-off frequency and update */ - if (hba->use_pm_opp) - scale_up = *freq > hba->clk_scaling.target_freq; - else - scale_up = *freq == clki->max_freq; + /* Decide based on the target or rounded-off frequency and update */ + if (hba->use_pm_opp) + scale_up = *freq > hba->clk_scaling.target_freq; + else + scale_up = *freq == clki->max_freq; - if (!hba->use_pm_opp && !scale_up) - *freq = clki->min_freq; + if (!hba->use_pm_opp && !scale_up) + *freq = clki->min_freq; - /* Update the frequency */ - if (!ufshcd_is_devfreq_scaling_required(hba, *freq, scale_up)) { - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); - ret = 0; - goto out; /* no state change required */ + /* Update the frequency */ + if (!ufshcd_is_devfreq_scaling_required(hba, *freq, scale_up)) { + ret = 0; + goto out; /* no state change required */ + } } - spin_unlock_irqrestore(hba->host->host_lock, irq_flags); start = ktime_get(); ret = ufshcd_devfreq_scale(hba, *freq, scale_up); @@ -1574,7 +1566,6 @@ static int ufshcd_devfreq_get_dev_status(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); struct ufs_clk_scaling *scaling = &hba->clk_scaling; - unsigned long flags; ktime_t curr_t; if (!ufshcd_is_clkscaling_supported(hba)) @@ -1582,7 +1573,8 @@ static int ufshcd_devfreq_get_dev_status(struct device *dev, memset(stat, 0, sizeof(*stat)); - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_scaling.lock); + curr_t = ktime_get(); if (!scaling->window_start_t) goto start_window; @@ -1618,7 +1610,7 @@ start_window: scaling->busy_start_t = 0; scaling->is_busy_started = false; } - spin_unlock_irqrestore(hba->host->host_lock, flags); + return 0; } @@ -1682,19 +1674,19 @@ static void ufshcd_devfreq_remove(struct ufs_hba *hba) static void ufshcd_suspend_clkscaling(struct ufs_hba *hba) { - unsigned long flags; bool suspend = false; cancel_work_sync(&hba->clk_scaling.suspend_work); cancel_work_sync(&hba->clk_scaling.resume_work); - spin_lock_irqsave(hba->host->host_lock, flags); - if (!hba->clk_scaling.is_suspended) { - suspend = true; - hba->clk_scaling.is_suspended = true; - hba->clk_scaling.window_start_t = 0; + scoped_guard(spinlock_irqsave, &hba->clk_scaling.lock) + { + if (!hba->clk_scaling.is_suspended) { + suspend = true; + hba->clk_scaling.is_suspended = true; + hba->clk_scaling.window_start_t = 0; + } } - spin_unlock_irqrestore(hba->host->host_lock, flags); if (suspend) devfreq_suspend_device(hba->devfreq); @@ -1702,15 +1694,15 @@ static void ufshcd_suspend_clkscaling(struct ufs_hba *hba) static void ufshcd_resume_clkscaling(struct ufs_hba *hba) { - unsigned long flags; bool resume = false; - spin_lock_irqsave(hba->host->host_lock, flags); - if (hba->clk_scaling.is_suspended) { - resume = true; - hba->clk_scaling.is_suspended = false; + scoped_guard(spinlock_irqsave, &hba->clk_scaling.lock) + { + if (hba->clk_scaling.is_suspended) { + resume = true; + hba->clk_scaling.is_suspended = false; + } } - spin_unlock_irqrestore(hba->host->host_lock, flags); if (resume) devfreq_resume_device(hba->devfreq); @@ -1796,6 +1788,8 @@ static void ufshcd_init_clk_scaling(struct ufs_hba *hba) INIT_WORK(&hba->clk_scaling.resume_work, ufshcd_clk_scaling_resume_work); + spin_lock_init(&hba->clk_scaling.lock); + hba->clk_scaling.workq = alloc_ordered_workqueue( "ufs_clkscaling_%d", WQ_MEM_RECLAIM, hba->host->host_no); @@ -2157,19 +2151,17 @@ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba) { bool queue_resume_work = false; ktime_t curr_t = ktime_get(); - unsigned long flags; if (!ufshcd_is_clkscaling_supported(hba)) return; - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_scaling.lock); + if (!hba->clk_scaling.active_reqs++) queue_resume_work = true; - if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) { - spin_unlock_irqrestore(hba->host->host_lock, flags); + if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) return; - } if (queue_resume_work) queue_work(hba->clk_scaling.workq, @@ -2185,18 +2177,17 @@ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba) hba->clk_scaling.busy_start_t = curr_t; hba->clk_scaling.is_busy_started = true; } - spin_unlock_irqrestore(hba->host->host_lock, flags); } static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba) { struct ufs_clk_scaling *scaling = &hba->clk_scaling; - unsigned long flags; if (!ufshcd_is_clkscaling_supported(hba)) return; - spin_lock_irqsave(hba->host->host_lock, flags); + guard(spinlock_irqsave)(&hba->clk_scaling.lock); + hba->clk_scaling.active_reqs--; if (!scaling->active_reqs && scaling->is_busy_started) { scaling->tot_busy_t += ktime_to_us(ktime_sub(ktime_get(), @@ -2204,7 +2195,6 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba) scaling->busy_start_t = 0; scaling->is_busy_started = false; } - spin_unlock_irqrestore(hba->host->host_lock, flags); } static inline int ufshcd_monitor_opcode2dir(u8 opcode) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index b6311069d901..ce7667b020e2 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -436,6 +436,10 @@ struct ufs_clk_gating { /** * struct ufs_clk_scaling - UFS clock scaling related data + * @workq: workqueue to schedule devfreq suspend/resume work + * @suspend_work: worker to suspend devfreq + * @resume_work: worker to resume devfreq + * @lock: serialize access to some struct ufs_clk_scaling members * @active_reqs: number of requests that are pending. If this is zero when * devfreq ->target() function is called then schedule "suspend_work" to * suspend devfreq. @@ -445,9 +449,6 @@ struct ufs_clk_gating { * @enable_attr: sysfs attribute to enable/disable clock scaling * @saved_pwr_info: UFS power mode may also be changed during scaling and this * one keeps track of previous power mode. - * @workq: workqueue to schedule devfreq suspend/resume work - * @suspend_work: worker to suspend devfreq - * @resume_work: worker to resume devfreq * @target_freq: frequency requested by devfreq framework * @min_gear: lowest HS gear to scale down to * @is_enabled: tracks if scaling is currently enabled or not, controlled by @@ -459,15 +460,18 @@ struct ufs_clk_gating { * @is_suspended: tracks if devfreq is suspended or not */ struct ufs_clk_scaling { + struct workqueue_struct *workq; + struct work_struct suspend_work; + struct work_struct resume_work; + + spinlock_t lock; + int active_reqs; unsigned long tot_busy_t; ktime_t window_start_t; ktime_t busy_start_t; struct device_attribute enable_attr; struct ufs_pa_layer_attr saved_pwr_info; - struct workqueue_struct *workq; - struct work_struct suspend_work; - struct work_struct resume_work; unsigned long target_freq; u32 min_gear; bool is_enabled; -- cgit v1.2.3 From 2d470c778120d3cdb8d8ab250329ca85f49f12b1 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 23 Oct 2024 14:21:55 -0700 Subject: lsm: replace context+len with lsm_context Replace the (secctx,seclen) pointer pair with a single lsm_context pointer to allow return of the LSM identifier along with the context and context length. This allows security_release_secctx() to know how to release the context. Callers have been modified to use or save the returned data from the new structure. security_secid_to_secctx() and security_lsmproc_to_secctx() will now return the length value on success instead of 0. Cc: netdev@vger.kernel.org Cc: audit@vger.kernel.org Cc: netfilter-devel@vger.kernel.org Cc: Todd Kjos Signed-off-by: Casey Schaufler [PM: subject tweak, kdoc fix, signedness fix from Dan Carpenter] Signed-off-by: Paul Moore --- drivers/android/binder.c | 5 ++-- include/linux/lsm_hook_defs.h | 5 ++-- include/linux/security.h | 9 +++---- include/net/scm.h | 5 ++-- kernel/audit.c | 9 +++---- kernel/auditsc.c | 16 +++++------- net/ipv4/ip_sockglue.c | 4 +-- net/netfilter/nf_conntrack_netlink.c | 12 ++++----- net/netfilter/nf_conntrack_standalone.c | 4 +-- net/netfilter/nfnetlink_queue.c | 27 ++++++++------------ net/netlabel/netlabel_unlabeled.c | 14 ++++------ net/netlabel/netlabel_user.c | 3 +-- security/apparmor/include/secid.h | 5 ++-- security/apparmor/secid.c | 26 +++++++++---------- security/security.c | 34 +++++++++++-------------- security/selinux/hooks.c | 23 +++++++++++++---- security/smack/smack_lsm.c | 45 +++++++++++++++++++-------------- 17 files changed, 121 insertions(+), 125 deletions(-) (limited to 'include') diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e8245df63289..919da8e674f5 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -3296,9 +3296,8 @@ static void binder_transaction(struct binder_proc *proc, size_t added_size; security_cred_getsecid(proc->cred, &secid); - ret = security_secid_to_secctx(secid, &lsmctx.context, - &lsmctx.len); - if (ret) { + ret = security_secid_to_secctx(secid, &lsmctx); + if (ret < 0) { binder_txn_error("%d:%d failed to get security context\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index c13df23132eb..01e5a8e09bba 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -295,10 +295,9 @@ LSM_HOOK(int, -EINVAL, getprocattr, struct task_struct *p, const char *name, char **value) LSM_HOOK(int, -EINVAL, setprocattr, const char *name, void *value, size_t size) LSM_HOOK(int, 0, ismaclabel, const char *name) -LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, char **secdata, - u32 *seclen) +LSM_HOOK(int, -EOPNOTSUPP, secid_to_secctx, u32 secid, struct lsm_context *cp) LSM_HOOK(int, -EOPNOTSUPP, lsmprop_to_secctx, struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) LSM_HOOK(int, 0, secctx_to_secid, const char *secdata, u32 seclen, u32 *secid) LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) diff --git a/include/linux/security.h b/include/linux/security.h index 68e56935716b..58518bbc00a6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -584,8 +584,8 @@ int security_getprocattr(struct task_struct *p, int lsmid, const char *name, int security_setprocattr(int lsmid, const char *name, void *value, size_t size); int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); -int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); -int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, u32 *seclen); +int security_secid_to_secctx(u32 secid, struct lsm_context *cp); +int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); int security_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); @@ -1557,14 +1557,13 @@ static inline int security_ismaclabel(const char *name) return 0; } -static inline int security_secid_to_secctx(u32 secid, char **secdata, - u32 *seclen) +static inline int security_secid_to_secctx(u32 secid, struct lsm_context *cp) { return -EOPNOTSUPP; } static inline int security_lsmprop_to_secctx(struct lsm_prop *prop, - char **secdata, u32 *seclen) + struct lsm_context *cp) { return -EOPNOTSUPP; } diff --git a/include/net/scm.h b/include/net/scm.h index f75449e1d876..22bb49589fde 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -109,10 +109,9 @@ static inline void scm_passec(struct socket *sock, struct msghdr *msg, struct sc int err; if (test_bit(SOCK_PASSSEC, &sock->flags)) { - err = security_secid_to_secctx(scm->secid, &ctx.context, - &ctx.len); + err = security_secid_to_secctx(scm->secid, &ctx); - if (!err) { + if (err >= 0) { put_cmsg(msg, SOL_SOCKET, SCM_SECURITY, ctx.len, ctx.context); security_release_secctx(&ctx); diff --git a/kernel/audit.c b/kernel/audit.c index 1d48d0654a46..13d0144efaa3 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1473,9 +1473,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh, case AUDIT_SIGNAL_INFO: if (lsmprop_is_set(&audit_sig_lsm)) { err = security_lsmprop_to_secctx(&audit_sig_lsm, - &lsmctx.context, - &lsmctx.len); - if (err) + &lsmctx); + if (err < 0) return err; } sig_data = kmalloc(struct_size(sig_data, ctx, lsmctx.len), @@ -2188,8 +2187,8 @@ int audit_log_task_context(struct audit_buffer *ab) if (!lsmprop_is_set(&prop)) return 0; - error = security_lsmprop_to_secctx(&prop, &ctx.context, &ctx.len); - if (error) { + error = security_lsmprop_to_secctx(&prop, &ctx); + if (error < 0) { if (error != -EINVAL) goto error_path; return 0; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index de8fac6c5bd3..bb0e7346d916 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1109,7 +1109,7 @@ static int audit_log_pid_context(struct audit_context *context, pid_t pid, from_kuid(&init_user_ns, auid), from_kuid(&init_user_ns, uid), sessionid); if (lsmprop_is_set(prop)) { - if (security_lsmprop_to_secctx(prop, &ctx.context, &ctx.len)) { + if (security_lsmprop_to_secctx(prop, &ctx) < 0) { audit_log_format(ab, " obj=(none)"); rc = 1; } else { @@ -1370,7 +1370,6 @@ static void audit_log_time(struct audit_context *context, struct audit_buffer ** static void show_special(struct audit_context *context, int *call_panic) { - struct lsm_context lsmcxt; struct audit_buffer *ab; int i; @@ -1393,16 +1392,14 @@ static void show_special(struct audit_context *context, int *call_panic) from_kgid(&init_user_ns, context->ipc.gid), context->ipc.mode); if (lsmprop_is_set(&context->ipc.oprop)) { - char *ctx = NULL; - u32 len; + struct lsm_context lsmctx; if (security_lsmprop_to_secctx(&context->ipc.oprop, - &ctx, &len)) { + &lsmctx) < 0) { *call_panic = 1; } else { - audit_log_format(ab, " obj=%s", ctx); - lsmcontext_init(&lsmcxt, ctx, len, 0); - security_release_secctx(&lsmcxt); + audit_log_format(ab, " obj=%s", lsmctx.context); + security_release_secctx(&lsmctx); } } if (context->ipc.has_perm) { @@ -1563,8 +1560,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n, if (lsmprop_is_set(&n->oprop)) { struct lsm_context ctx; - if (security_lsmprop_to_secctx(&n->oprop, &ctx.context, - &ctx.len)) { + if (security_lsmprop_to_secctx(&n->oprop, &ctx) < 0) { if (call_panic) *call_panic = 2; } else { diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index a8180dcc2a32..dadbf619b20f 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -136,8 +136,8 @@ static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb) if (err) return; - err = security_secid_to_secctx(secid, &ctx.context, &ctx.len); - if (err) + err = security_secid_to_secctx(secid, &ctx); + if (err < 0) return; put_cmsg(msg, SOL_IP, SCM_SECURITY, ctx.len, ctx.context); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 22f46be33f1e..7b74b24348fc 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -360,8 +360,8 @@ static int ctnetlink_dump_secctx(struct sk_buff *skb, const struct nf_conn *ct) struct lsm_context ctx; int ret; - ret = security_secid_to_secctx(ct->secmark, &ctx.context, &ctx.len); - if (ret) + ret = security_secid_to_secctx(ct->secmark, &ctx); + if (ret < 0) return 0; ret = -1; @@ -663,14 +663,14 @@ static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) static inline int ctnetlink_secctx_size(const struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_SECMARK - int len, ret; + int ret; - ret = security_secid_to_secctx(ct->secmark, NULL, &len); - if (ret) + ret = security_secid_to_secctx(ct->secmark, NULL); + if (ret < 0) return 0; return nla_total_size(0) /* CTA_SECCTX */ - + nla_total_size(sizeof(char) * len); /* CTA_SECCTX_NAME */ + + nla_total_size(sizeof(char) * ret); /* CTA_SECCTX_NAME */ #else return 0; #endif diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 5f7fd23b7afe..502cf10aab41 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -175,8 +175,8 @@ static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) struct lsm_context ctx; int ret; - ret = security_secid_to_secctx(ct->secmark, &ctx.context, &ctx.len); - if (ret) + ret = security_secid_to_secctx(ct->secmark, &ctx); + if (ret < 0) return; seq_printf(s, "secctx=%s ", ctx.context); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 37757cd77cf1..5110f29b2f40 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -470,18 +470,18 @@ static int nfqnl_put_sk_classid(struct sk_buff *skb, struct sock *sk) return 0; } -static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata) +static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, struct lsm_context *ctx) { u32 seclen = 0; #if IS_ENABLED(CONFIG_NETWORK_SECMARK) + if (!skb || !sk_fullsock(skb->sk)) return 0; read_lock_bh(&skb->sk->sk_callback_lock); if (skb->secmark) - security_secid_to_secctx(skb->secmark, secdata, &seclen); - + seclen = security_secid_to_secctx(skb->secmark, ctx); read_unlock_bh(&skb->sk->sk_callback_lock); #endif return seclen; @@ -567,8 +567,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, enum ip_conntrack_info ctinfo = 0; const struct nfnl_ct_hook *nfnl_ct; bool csum_verify; - struct lsm_context scaff; /* scaffolding */ - char *secdata = NULL; + struct lsm_context ctx; u32 seclen = 0; ktime_t tstamp; @@ -643,8 +642,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) { - seclen = nfqnl_get_sk_secctx(entskb, &secdata); - if (seclen) + seclen = nfqnl_get_sk_secctx(entskb, &ctx); + if (seclen >= 0) size += nla_total_size(seclen); } @@ -783,7 +782,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (nfqnl_put_sk_classid(skb, entskb->sk) < 0) goto nla_put_failure; - if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata)) + if (seclen && nla_put(skb, NFQA_SECCTX, ctx.len, ctx.context)) goto nla_put_failure; if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0) @@ -811,10 +810,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } nlh->nlmsg_len = skb->len; - if (seclen) { - lsmcontext_init(&scaff, secdata, seclen, 0); - security_release_secctx(&scaff); - } + if (seclen >= 0) + security_release_secctx(&ctx); return skb; nla_put_failure: @@ -822,10 +819,8 @@ nla_put_failure: kfree_skb(skb); net_err_ratelimited("nf_queue: error creating packet message\n"); nlmsg_failure: - if (seclen) { - lsmcontext_init(&scaff, secdata, seclen, 0); - security_release_secctx(&scaff); - } + if (seclen >= 0) + security_release_secctx(&ctx); return NULL; } diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 921fa8eeb451..bd7094f225d1 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -437,9 +437,7 @@ int netlbl_unlhsh_add(struct net *net, unlhsh_add_return: rcu_read_unlock(); if (audit_buf != NULL) { - if (security_secid_to_secctx(secid, - &ctx.context, - &ctx.len) == 0) { + if (security_secid_to_secctx(secid, &ctx) == 0) { audit_log_format(audit_buf, " sec_obj=%s", ctx.context); security_release_secctx(&ctx); } @@ -492,8 +490,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, addr->s_addr, mask->s_addr); dev_put(dev); if (entry != NULL && - security_secid_to_secctx(entry->secid, - &ctx.context, &ctx.len) == 0) { + security_secid_to_secctx(entry->secid, &ctx) == 0) { audit_log_format(audit_buf, " sec_obj=%s", ctx.context); security_release_secctx(&ctx); } @@ -551,8 +548,7 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, addr, mask); dev_put(dev); if (entry != NULL && - security_secid_to_secctx(entry->secid, - &ctx.context, &ctx.len) == 0) { + security_secid_to_secctx(entry->secid, &ctx) == 0) { audit_log_format(audit_buf, " sec_obj=%s", ctx.context); security_release_secctx(&ctx); } @@ -1123,8 +1119,8 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, secid = addr6->secid; } - ret_val = security_secid_to_secctx(secid, &ctx.context, &ctx.len); - if (ret_val != 0) + ret_val = security_secid_to_secctx(secid, &ctx); + if (ret_val < 0) goto list_cb_failure; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_SECCTX, diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index f5e7a9919df1..0d04d23aafe7 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -98,8 +98,7 @@ struct audit_buffer *netlbl_audit_start_common(int type, audit_info->sessionid); if (lsmprop_is_set(&audit_info->prop) && - security_lsmprop_to_secctx(&audit_info->prop, &ctx.context, - &ctx.len) == 0) { + security_lsmprop_to_secctx(&audit_info->prop, &ctx) > 0) { audit_log_format(audit_buf, " subj=%s", ctx.context); security_release_secctx(&ctx); } diff --git a/security/apparmor/include/secid.h b/security/apparmor/include/secid.h index 1bcde2f45f24..6025d3849cf8 100644 --- a/security/apparmor/include/secid.h +++ b/security/apparmor/include/secid.h @@ -25,9 +25,8 @@ struct aa_label; extern int apparmor_display_secid_mode; struct aa_label *aa_secid_to_label(u32 secid); -int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen); -int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, - u32 *seclen); +int apparmor_secid_to_secctx(u32 secid, struct lsm_context *cp); +int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid); void apparmor_release_secctx(struct lsm_context *cp); diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c index 43e4dab7f6e6..b08969ec34b7 100644 --- a/security/apparmor/secid.c +++ b/security/apparmor/secid.c @@ -47,23 +47,21 @@ struct aa_label *aa_secid_to_label(u32 secid) return xa_load(&aa_secids, secid); } -static int apparmor_label_to_secctx(struct aa_label *label, char **secdata, - u32 *seclen) +static int apparmor_label_to_secctx(struct aa_label *label, + struct lsm_context *cp) { /* TODO: cache secctx and ref count so we don't have to recreate */ int flags = FLAG_VIEW_SUBNS | FLAG_HIDDEN_UNCONFINED | FLAG_ABS_ROOT; int len; - AA_BUG(!seclen); - if (!label) return -EINVAL; if (apparmor_display_secid_mode) flags |= FLAG_SHOW_MODE; - if (secdata) - len = aa_label_asxprint(secdata, root_ns, label, + if (cp) + len = aa_label_asxprint(&cp->context, root_ns, label, flags, GFP_ATOMIC); else len = aa_label_snxprint(NULL, 0, root_ns, label, flags); @@ -71,26 +69,28 @@ static int apparmor_label_to_secctx(struct aa_label *label, char **secdata, if (len < 0) return -ENOMEM; - *seclen = len; + if (cp) { + cp->len = len; + cp->id = LSM_ID_APPARMOR; + } - return 0; + return len; } -int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +int apparmor_secid_to_secctx(u32 secid, struct lsm_context *cp) { struct aa_label *label = aa_secid_to_label(secid); - return apparmor_label_to_secctx(label, secdata, seclen); + return apparmor_label_to_secctx(label, cp); } -int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, - u32 *seclen) +int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp) { struct aa_label *label; label = prop->apparmor.label; - return apparmor_label_to_secctx(label, secdata, seclen); + return apparmor_label_to_secctx(label, cp); } int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) diff --git a/security/security.c b/security/security.c index b7f8ec93f6f0..1edf8fb2046c 100644 --- a/security/security.c +++ b/security/security.c @@ -4304,40 +4304,36 @@ EXPORT_SYMBOL(security_ismaclabel); /** * security_secid_to_secctx() - Convert a secid to a secctx * @secid: secid - * @secdata: secctx - * @seclen: secctx length + * @cp: the LSM context * - * Convert secid to security context. If @secdata is NULL the length of the - * result will be returned in @seclen, but no @secdata will be returned. This + * Convert secid to security context. If @cp is NULL the length of the + * result will be returned, but no data will be returned. This * does mean that the length could change between calls to check the length and - * the next call which actually allocates and returns the @secdata. + * the next call which actually allocates and returns the data. * - * Return: Return 0 on success, error on failure. + * Return: Return length of data on success, error on failure. */ -int security_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +int security_secid_to_secctx(u32 secid, struct lsm_context *cp) { - return call_int_hook(secid_to_secctx, secid, secdata, seclen); + return call_int_hook(secid_to_secctx, secid, cp); } EXPORT_SYMBOL(security_secid_to_secctx); /** * security_lsmprop_to_secctx() - Convert a lsm_prop to a secctx * @prop: lsm specific information - * @secdata: secctx - * @seclen: secctx length + * @cp: the LSM context * - * Convert a @prop entry to security context. If @secdata is NULL the - * length of the result will be returned in @seclen, but no @secdata - * will be returned. This does mean that the length could change between - * calls to check the length and the next call which actually allocates - * and returns the @secdata. + * Convert a @prop entry to security context. If @cp is NULL the + * length of the result will be returned. This does mean that the + * length could change between calls to check the length and the + * next call which actually allocates and returns the @cp. * - * Return: Return 0 on success, error on failure. + * Return: Return length of data on success, error on failure. */ -int security_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, - u32 *seclen) +int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp) { - return call_int_hook(lsmprop_to_secctx, prop, secdata, seclen); + return call_int_hook(lsmprop_to_secctx, prop, cp); } EXPORT_SYMBOL(security_lsmprop_to_secctx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index aabc724f4d44..ddc24db7c0b2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6640,15 +6640,28 @@ static int selinux_ismaclabel(const char *name) return (strcmp(name, XATTR_SELINUX_SUFFIX) == 0); } -static int selinux_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +static int selinux_secid_to_secctx(u32 secid, struct lsm_context *cp) { - return security_sid_to_context(secid, secdata, seclen); + u32 seclen; + int ret; + + if (cp) { + cp->id = LSM_ID_SELINUX; + ret = security_sid_to_context(secid, &cp->context, &cp->len); + if (ret < 0) + return ret; + return cp->len; + } + ret = security_sid_to_context(secid, NULL, &seclen); + if (ret < 0) + return ret; + return seclen; } -static int selinux_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, - u32 *seclen) +static int selinux_lsmprop_to_secctx(struct lsm_prop *prop, + struct lsm_context *cp) { - return selinux_secid_to_secctx(prop->selinux.secid, secdata, seclen); + return selinux_secid_to_secctx(prop->selinux.secid, cp); } static int selinux_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 0c476282e279..4084f99c493c 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4817,41 +4817,48 @@ static int smack_ismaclabel(const char *name) return (strcmp(name, XATTR_SMACK_SUFFIX) == 0); } +/** + * smack_to_secctx - fill a lsm_context + * @skp: Smack label + * @cp: destination + * + * Fill the passed @cp and return the length of the string + */ +static int smack_to_secctx(struct smack_known *skp, struct lsm_context *cp) +{ + int len = strlen(skp->smk_known); + + if (cp) { + cp->context = skp->smk_known; + cp->len = len; + cp->id = LSM_ID_SMACK; + } + return len; +} + /** * smack_secid_to_secctx - return the smack label for a secid * @secid: incoming integer - * @secdata: destination - * @seclen: how long it is + * @cp: destination * * Exists for networking code. */ -static int smack_secid_to_secctx(u32 secid, char **secdata, u32 *seclen) +static int smack_secid_to_secctx(u32 secid, struct lsm_context *cp) { - struct smack_known *skp = smack_from_secid(secid); - - if (secdata) - *secdata = skp->smk_known; - *seclen = strlen(skp->smk_known); - return 0; + return smack_to_secctx(smack_from_secid(secid), cp); } /** * smack_lsmprop_to_secctx - return the smack label * @prop: includes incoming Smack data - * @secdata: destination - * @seclen: how long it is + * @cp: destination * * Exists for audit code. */ -static int smack_lsmprop_to_secctx(struct lsm_prop *prop, char **secdata, - u32 *seclen) +static int smack_lsmprop_to_secctx(struct lsm_prop *prop, + struct lsm_context *cp) { - struct smack_known *skp = prop->smack.skp; - - if (secdata) - *secdata = skp->smk_known; - *seclen = strlen(skp->smk_known); - return 0; + return smack_to_secctx(prop->smack.skp, cp); } /** -- cgit v1.2.3 From 76ecf306ae5da84ef8f48c7a2608736e6866440c Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 23 Oct 2024 14:21:56 -0700 Subject: lsm: use lsm_context in security_inode_getsecctx Change the security_inode_getsecctx() interface to fill a lsm_context structure instead of data and length pointers. This provides the information about which LSM created the context so that security_release_secctx() can use the correct hook. Cc: linux-nfs@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subject tweak] Signed-off-by: Paul Moore --- fs/nfsd/nfs4xdr.c | 26 ++++++++++---------------- include/linux/lsm_hook_defs.h | 4 ++-- include/linux/security.h | 5 +++-- security/security.c | 12 ++++++------ security/selinux/hooks.c | 10 ++++++---- security/smack/smack_lsm.c | 7 ++++--- 6 files changed, 31 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 95ec6a4b4da3..8dd2e2ada474 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2818,11 +2818,11 @@ static __be32 nfsd4_encode_nfsace4(struct xdr_stream *xdr, struct svc_rqst *rqst #ifdef CONFIG_NFSD_V4_SECURITY_LABEL static inline __be32 nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, - void *context, int len) + const struct lsm_context *context) { __be32 *p; - p = xdr_reserve_space(xdr, len + 4 + 4 + 4); + p = xdr_reserve_space(xdr, context->len + 4 + 4 + 4); if (!p) return nfserr_resource; @@ -2832,13 +2832,13 @@ nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, */ *p++ = cpu_to_be32(0); /* lfs */ *p++ = cpu_to_be32(0); /* pi */ - p = xdr_encode_opaque(p, context, len); + p = xdr_encode_opaque(p, context->context, context->len); return 0; } #else static inline __be32 nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp, - void *context, int len) + struct lsm_context *context) { return 0; } #endif @@ -2920,8 +2920,7 @@ struct nfsd4_fattr_args { struct kstatfs statfs; struct nfs4_acl *acl; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - void *context; - int contextlen; + struct lsm_context context; #endif u32 rdattr_err; bool contextsupport; @@ -3376,8 +3375,7 @@ static __be32 nfsd4_encode_fattr4_suppattr_exclcreat(struct xdr_stream *xdr, static __be32 nfsd4_encode_fattr4_sec_label(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { - return nfsd4_encode_security_label(xdr, args->rqstp, - args->context, args->contextlen); + return nfsd4_encode_security_label(xdr, args->rqstp, &args->context); } #endif @@ -3527,7 +3525,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.ignore_crossmnt = (ignore_crossmnt != 0); args.acl = NULL; #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - args.context = NULL; + args.context.context = NULL; #endif /* @@ -3607,7 +3605,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) { if (exp->ex_flags & NFSEXP_SECURITY_LABEL) err = security_inode_getsecctx(d_inode(dentry), - &args.context, &args.contextlen); + &args.context); else err = -EOPNOTSUPP; args.contextsupport = (err == 0); @@ -3644,12 +3642,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, out: #ifdef CONFIG_NFSD_V4_SECURITY_LABEL - if (args.context) { - struct lsm_context scaff; /* scaffolding */ - - lsmcontext_init(&scaff, args.context, args.contextlen, 0); - security_release_secctx(&scaff); - } + if (args.context.context) + security_release_secctx(&args.context); #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */ kfree(args.acl); if (tempfh) { diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 01e5a8e09bba..69e1076448c6 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -303,8 +303,8 @@ LSM_HOOK(void, LSM_RET_VOID, release_secctx, struct lsm_context *cp) LSM_HOOK(void, LSM_RET_VOID, inode_invalidate_secctx, struct inode *inode) LSM_HOOK(int, 0, inode_notifysecctx, struct inode *inode, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) -LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, void **ctx, - u32 *ctxlen) +LSM_HOOK(int, -EOPNOTSUPP, inode_getsecctx, struct inode *inode, + struct lsm_context *cp) #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, diff --git a/include/linux/security.h b/include/linux/security.h index 58518bbc00a6..29f8100bc7c8 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -591,7 +591,7 @@ void security_release_secctx(struct lsm_context *cp); void security_inode_invalidate_secctx(struct inode *inode); int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); -int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); +int security_inode_getsecctx(struct inode *inode, struct lsm_context *cp); int security_locked_down(enum lockdown_reason what); int lsm_fill_user_ctx(struct lsm_ctx __user *uctx, u32 *uctx_len, void *val, size_t val_len, u64 id, u64 flags); @@ -1591,7 +1591,8 @@ static inline int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 { return -EOPNOTSUPP; } -static inline int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +static inline int security_inode_getsecctx(struct inode *inode, + struct lsm_context *cp) { return -EOPNOTSUPP; } diff --git a/security/security.c b/security/security.c index 1edf8fb2046c..1aecf1696c50 100644 --- a/security/security.c +++ b/security/security.c @@ -4426,17 +4426,17 @@ EXPORT_SYMBOL(security_inode_setsecctx); /** * security_inode_getsecctx() - Get the security label of an inode * @inode: inode - * @ctx: secctx - * @ctxlen: length of secctx + * @cp: security context * - * On success, returns 0 and fills out @ctx and @ctxlen with the security - * context for the given @inode. + * On success, returns 0 and fills out @cp with the security context + * for the given @inode. * * Return: Returns 0 on success, error on failure. */ -int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +int security_inode_getsecctx(struct inode *inode, struct lsm_context *cp) { - return call_int_hook(inode_getsecctx, inode, ctx, ctxlen); + memset(cp, 0, sizeof(*cp)); + return call_int_hook(inode_getsecctx, inode, cp); } EXPORT_SYMBOL(security_inode_getsecctx); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index ddc24db7c0b2..9254570de103 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6711,14 +6711,16 @@ static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) ctx, ctxlen, 0, NULL); } -static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +static int selinux_inode_getsecctx(struct inode *inode, struct lsm_context *cp) { - int len = 0; + int len; len = selinux_inode_getsecurity(&nop_mnt_idmap, inode, - XATTR_SELINUX_SUFFIX, ctx, true); + XATTR_SELINUX_SUFFIX, + (void **)&cp->context, true); if (len < 0) return len; - *ctxlen = len; + cp->len = len; + cp->id = LSM_ID_SELINUX; return 0; } #ifdef CONFIG_KEYS diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 4084f99c493c..55a556f17ade 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4898,12 +4898,13 @@ static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen) ctx, ctxlen, 0, NULL); } -static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) +static int smack_inode_getsecctx(struct inode *inode, struct lsm_context *cp) { struct smack_known *skp = smk_of_inode(inode); - *ctx = skp->smk_known; - *ctxlen = strlen(skp->smk_known); + cp->context = skp->smk_known; + cp->len = strlen(skp->smk_known); + cp->id = LSM_ID_SMACK; return 0; } -- cgit v1.2.3 From b530104f50e86db6f187d39fed5821b3cca755ee Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Wed, 23 Oct 2024 14:21:57 -0700 Subject: lsm: lsm_context in security_dentry_init_security Replace the (secctx,seclen) pointer pair with a single lsm_context pointer to allow return of the LSM identifier along with the context and context length. This allows security_release_secctx() to know how to release the context. Callers have been modified to use or save the returned data from the new structure. Cc: ceph-devel@vger.kernel.org Cc: linux-nfs@vger.kernel.org Signed-off-by: Casey Schaufler [PM: subject tweak] Signed-off-by: Paul Moore --- fs/ceph/super.h | 3 +-- fs/ceph/xattr.c | 16 ++++++---------- fs/fuse/dir.c | 35 ++++++++++++++++++----------------- fs/nfs/nfs4proc.c | 20 ++++++++++++-------- include/linux/lsm_hook_defs.h | 2 +- include/linux/security.h | 26 +++----------------------- security/security.c | 9 ++++----- security/selinux/hooks.c | 8 ++++---- 8 files changed, 49 insertions(+), 70 deletions(-) (limited to 'include') diff --git a/fs/ceph/super.h b/fs/ceph/super.h index af14ec382246..7fa1e7be50e4 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1132,8 +1132,7 @@ struct ceph_acl_sec_ctx { void *acl; #endif #ifdef CONFIG_CEPH_FS_SECURITY_LABEL - void *sec_ctx; - u32 sec_ctxlen; + struct lsm_context lsmctx; #endif #ifdef CONFIG_FS_ENCRYPTION struct ceph_fscrypt_auth *fscrypt_auth; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2a14335a4bb7..537165db4519 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -1383,8 +1383,7 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, int err; err = security_dentry_init_security(dentry, mode, &dentry->d_name, - &name, &as_ctx->sec_ctx, - &as_ctx->sec_ctxlen); + &name, &as_ctx->lsmctx); if (err < 0) { WARN_ON_ONCE(err != -EOPNOTSUPP); err = 0; /* do nothing */ @@ -1409,7 +1408,7 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, */ name_len = strlen(name); err = ceph_pagelist_reserve(pagelist, - 4 * 2 + name_len + as_ctx->sec_ctxlen); + 4 * 2 + name_len + as_ctx->lsmctx.len); if (err) goto out; @@ -1432,8 +1431,9 @@ int ceph_security_init_secctx(struct dentry *dentry, umode_t mode, ceph_pagelist_encode_32(pagelist, name_len); ceph_pagelist_append(pagelist, name, name_len); - ceph_pagelist_encode_32(pagelist, as_ctx->sec_ctxlen); - ceph_pagelist_append(pagelist, as_ctx->sec_ctx, as_ctx->sec_ctxlen); + ceph_pagelist_encode_32(pagelist, as_ctx->lsmctx.len); + ceph_pagelist_append(pagelist, as_ctx->lsmctx.context, + as_ctx->lsmctx.len); err = 0; out: @@ -1446,16 +1446,12 @@ out: void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx) { -#ifdef CONFIG_CEPH_FS_SECURITY_LABEL - struct lsm_context scaff; /* scaffolding */ -#endif #ifdef CONFIG_CEPH_FS_POSIX_ACL posix_acl_release(as_ctx->acl); posix_acl_release(as_ctx->default_acl); #endif #ifdef CONFIG_CEPH_FS_SECURITY_LABEL - lsmcontext_init(&scaff, as_ctx->sec_ctx, as_ctx->sec_ctxlen, 0); - security_release_secctx(&scaff); + security_release_secctx(&as_ctx->lsmctx); #endif #ifdef CONFIG_FS_ENCRYPTION kfree(as_ctx->fscrypt_auth); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 494ac372ace0..0b2f8567ca30 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -467,29 +467,29 @@ static int get_security_context(struct dentry *entry, umode_t mode, { struct fuse_secctx *fctx; struct fuse_secctx_header *header; - void *ctx = NULL, *ptr; - u32 ctxlen, total_len = sizeof(*header); + struct lsm_context lsmctx = { }; + void *ptr; + u32 total_len = sizeof(*header); int err, nr_ctx = 0; - const char *name; + const char *name = NULL; size_t namelen; err = security_dentry_init_security(entry, mode, &entry->d_name, - &name, &ctx, &ctxlen); - if (err) { - if (err != -EOPNOTSUPP) - goto out_err; - /* No LSM is supporting this security hook. Ignore error */ - ctxlen = 0; - ctx = NULL; - } + &name, &lsmctx); + + /* If no LSM is supporting this security hook ignore error */ + if (err && err != -EOPNOTSUPP) + goto out_err; - if (ctxlen) { + if (lsmctx.len) { nr_ctx = 1; namelen = strlen(name) + 1; err = -EIO; - if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || ctxlen > S32_MAX)) + if (WARN_ON(namelen > XATTR_NAME_MAX + 1 || + lsmctx.len > S32_MAX)) goto out_err; - total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + ctxlen); + total_len += FUSE_REC_ALIGN(sizeof(*fctx) + namelen + + lsmctx.len); } err = -ENOMEM; @@ -502,19 +502,20 @@ static int get_security_context(struct dentry *entry, umode_t mode, ptr += sizeof(*header); if (nr_ctx) { fctx = ptr; - fctx->size = ctxlen; + fctx->size = lsmctx.len; ptr += sizeof(*fctx); strcpy(ptr, name); ptr += namelen; - memcpy(ptr, ctx, ctxlen); + memcpy(ptr, lsmctx.context, lsmctx.len); } ext->size = total_len; ext->value = header; err = 0; out_err: - kfree(ctx); + if (nr_ctx) + security_release_secctx(&lsmctx); return err; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2daeb6f663d9..d615d520f8cf 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -114,6 +114,7 @@ static inline struct nfs4_label * nfs4_label_init_security(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs4_label *label) { + struct lsm_context shim; int err; if (label == NULL) @@ -128,21 +129,24 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, label->label = NULL; err = security_dentry_init_security(dentry, sattr->ia_mode, - &dentry->d_name, NULL, - (void **)&label->label, &label->len); - if (err == 0) - return label; + &dentry->d_name, NULL, &shim); + if (err) + return NULL; - return NULL; + label->label = shim.context; + label->len = shim.len; + return label; } static inline void nfs4_label_release_security(struct nfs4_label *label) { - struct lsm_context scaff; /* scaffolding */ + struct lsm_context shim; if (label) { - lsmcontext_init(&scaff, label->label, label->len, 0); - security_release_secctx(&scaff); + shim.context = label->label; + shim.len = label->len; + shim.id = LSM_ID_UNDEF; + security_release_secctx(&shim); } } static inline u32 *nfs4_bitmask(struct nfs_server *server, struct nfs4_label *label) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 69e1076448c6..e2f1ce37c41e 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -83,7 +83,7 @@ LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, u32 *ctxlen) + struct lsm_context *cp) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, struct cred *new) diff --git a/include/linux/security.h b/include/linux/security.h index 29f8100bc7c8..980b6c207cad 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -237,25 +237,6 @@ struct lsm_context { int id; /* Identifies the module */ }; -/** - * lsmcontext_init - initialize an lsmcontext structure. - * @cp: Pointer to the context to initialize - * @context: Initial context, or NULL - * @size: Size of context, or 0 - * @id: Which LSM provided the context - * - * Fill in the lsmcontext from the provided information. - * This is a scaffolding function that will be removed when - * lsm_context integration is complete. - */ -static inline void lsmcontext_init(struct lsm_context *cp, char *context, - u32 size, int id) -{ - cp->id = id; - cp->context = context; - cp->len = size; -} - /* * Values used in the task_security_ops calls */ @@ -409,8 +390,8 @@ int security_sb_clone_mnt_opts(const struct super_block *oldsb, int security_move_mount(const struct path *from_path, const struct path *to_path); int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen); + const char **xattr_name, + struct lsm_context *lsmcxt); int security_dentry_create_files_as(struct dentry *dentry, int mode, struct qstr *name, const struct cred *old, @@ -883,8 +864,7 @@ static inline int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, - void **ctx, - u32 *ctxlen) + struct lsm_context *lsmcxt) { return -EOPNOTSUPP; } diff --git a/security/security.c b/security/security.c index 1aecf1696c50..7523d14f31fb 100644 --- a/security/security.c +++ b/security/security.c @@ -1735,8 +1735,7 @@ void security_inode_free(struct inode *inode) * @mode: mode used to determine resource type * @name: name of the last path component * @xattr_name: name of the security/LSM xattr - * @ctx: pointer to the resulting LSM context - * @ctxlen: length of @ctx + * @lsmctx: pointer to the resulting LSM context * * Compute a context for a dentry as the inode is not yet available since NFSv4 * has no label backed by an EA anyway. It is important to note that @@ -1746,11 +1745,11 @@ void security_inode_free(struct inode *inode) */ int security_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen) + const char **xattr_name, + struct lsm_context *lsmctx) { return call_int_hook(dentry_init_security, dentry, mode, name, - xattr_name, ctx, ctxlen); + xattr_name, lsmctx); } EXPORT_SYMBOL(security_dentry_init_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9254570de103..4f8d37ae769a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2869,8 +2869,8 @@ static void selinux_inode_free_security(struct inode *inode) static int selinux_dentry_init_security(struct dentry *dentry, int mode, const struct qstr *name, - const char **xattr_name, void **ctx, - u32 *ctxlen) + const char **xattr_name, + struct lsm_context *cp) { u32 newsid; int rc; @@ -2885,8 +2885,8 @@ static int selinux_dentry_init_security(struct dentry *dentry, int mode, if (xattr_name) *xattr_name = XATTR_NAME_SELINUX; - return security_sid_to_context(newsid, (char **)ctx, - ctxlen); + cp->id = LSM_ID_SELINUX; + return security_sid_to_context(newsid, &cp->context, &cp->len); } static int selinux_dentry_create_files_as(struct dentry *dentry, int mode, -- cgit v1.2.3 From ed638918f4df39daa458435f0825b487c1f192c8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Oct 2024 11:07:53 -0700 Subject: scsi: Rename .slave_alloc() and .slave_destroy() Rename .slave_alloc() into .sdev_init() and .slave_destroy() into .sdev_destroy(). The new names make it clear that these are actions on SCSI devices. Make this change in the SCSI core, SCSI drivers and also in the ATA drivers. No functionality has been changed. This patch has been created as follows: * Change the text "slave_alloc" into "sdev_init" in all source files except those in drivers/net/ and Documentation/. * Change the text "slave_destroy" into "sdev_destroy" in all source files except those in drivers/net/ and Documentation/. * Rename lpfc_no_slave() into lpfc_no_sdev(). * Manually adjust whitespace where necessary to restore vertical alignment (dc395x driver and include/linux/libata.h). Acked-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241022180839.2712439-2-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ata/libata-scsi.c | 12 ++++++------ drivers/firewire/sbp2.c | 4 ++-- drivers/message/fusion/mptfc.c | 12 ++++++------ drivers/message/fusion/mptsas.c | 8 ++++---- drivers/message/fusion/mptscsih.c | 6 +++--- drivers/message/fusion/mptscsih.h | 2 +- drivers/message/fusion/mptspi.c | 12 ++++++------ drivers/s390/scsi/zfcp_scsi.c | 10 +++++----- drivers/s390/scsi/zfcp_sysfs.c | 2 +- drivers/s390/scsi/zfcp_unit.c | 2 +- drivers/scsi/53c700.c | 12 ++++++------ drivers/scsi/aic7xxx/aic79xx_osm.c | 4 ++-- drivers/scsi/aic7xxx/aic7xxx_osm.c | 4 ++-- drivers/scsi/bfa/bfad_im.c | 20 ++++++++++---------- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 2 +- drivers/scsi/csiostor/csio_scsi.c | 12 ++++++------ drivers/scsi/dc395x.c | 12 ++++++------ drivers/scsi/esp_scsi.c | 8 ++++---- drivers/scsi/fcoe/fcoe.c | 2 +- drivers/scsi/fnic/fnic_main.c | 4 ++-- drivers/scsi/hisi_sas/hisi_sas.h | 2 +- drivers/scsi/hisi_sas/hisi_sas_main.c | 6 +++--- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 2 +- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 2 +- drivers/scsi/hpsa.c | 12 ++++++------ drivers/scsi/ibmvscsi/ibmvfc.c | 6 +++--- drivers/scsi/ipr.c | 12 ++++++------ drivers/scsi/libfc/fc_fcp.c | 6 +++--- drivers/scsi/libsas/sas_scsi_host.c | 4 ++-- drivers/scsi/lpfc/lpfc_scsi.c | 22 +++++++++++----------- drivers/scsi/megaraid/megaraid_sas_base.c | 8 ++++---- drivers/scsi/mpi3mr/mpi3mr_os.c | 12 ++++++------ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 16 ++++++++-------- drivers/scsi/myrb.c | 16 ++++++++-------- drivers/scsi/myrs.c | 8 ++++---- drivers/scsi/ncr53c8xx.c | 4 ++-- drivers/scsi/pmcraid.c | 14 +++++++------- drivers/scsi/qla2xxx/qla_os.c | 8 ++++---- drivers/scsi/qla4xxx/ql4_os.c | 6 +++--- drivers/scsi/scsi_debug.c | 12 ++++++------ drivers/scsi/scsi_scan.c | 8 ++++---- drivers/scsi/scsi_sysfs.c | 4 ++-- drivers/scsi/smartpqi/smartpqi_init.c | 8 ++++---- drivers/scsi/snic/snic_main.c | 6 +++--- drivers/scsi/storvsc_drv.c | 2 +- drivers/scsi/sym53c8xx_2/sym_glue.c | 10 +++++----- drivers/scsi/virtio_scsi.c | 2 +- drivers/scsi/xen-scsifront.c | 4 ++-- drivers/ufs/core/ufshcd.c | 12 ++++++------ drivers/usb/image/microtek.c | 4 ++-- drivers/usb/storage/scsiglue.c | 6 +++--- drivers/usb/storage/uas.c | 4 ++-- include/linux/libata.h | 8 ++++---- include/scsi/libfc.h | 2 +- include/scsi/libsas.h | 4 ++-- include/scsi/scsi_device.h | 4 ++-- include/scsi/scsi_host.h | 16 ++++++++-------- 58 files changed, 217 insertions(+), 217 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 2ce5befd2242..7bfb57b4e659 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1133,7 +1133,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim, } /** - * ata_scsi_slave_alloc - Early setup of SCSI device + * ata_scsi_sdev_init - Early setup of SCSI device * @sdev: SCSI device to examine * * This is called from scsi_alloc_sdev() when the scsi device @@ -1143,7 +1143,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim, * Defined by SCSI layer. We don't really care. */ -int ata_scsi_slave_alloc(struct scsi_device *sdev) +int ata_scsi_sdev_init(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct device_link *link; @@ -1166,7 +1166,7 @@ int ata_scsi_slave_alloc(struct scsi_device *sdev) return 0; } -EXPORT_SYMBOL_GPL(ata_scsi_slave_alloc); +EXPORT_SYMBOL_GPL(ata_scsi_sdev_init); /** * ata_scsi_device_configure - Set SCSI device attributes @@ -1195,7 +1195,7 @@ int ata_scsi_device_configure(struct scsi_device *sdev, EXPORT_SYMBOL_GPL(ata_scsi_device_configure); /** - * ata_scsi_slave_destroy - SCSI device is about to be destroyed + * ata_scsi_sdev_destroy - SCSI device is about to be destroyed * @sdev: SCSI device to be destroyed * * @sdev is about to be destroyed for hot/warm unplugging. If @@ -1208,7 +1208,7 @@ EXPORT_SYMBOL_GPL(ata_scsi_device_configure); * LOCKING: * Defined by SCSI layer. We don't really care. */ -void ata_scsi_slave_destroy(struct scsi_device *sdev) +void ata_scsi_sdev_destroy(struct scsi_device *sdev) { struct ata_port *ap = ata_shost_to_port(sdev->host); unsigned long flags; @@ -1228,7 +1228,7 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev) kfree(sdev->dma_drain_buf); } -EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy); +EXPORT_SYMBOL_GPL(ata_scsi_sdev_destroy); /** * ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 827dee0f57dd..4937df12c3fb 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1490,7 +1490,7 @@ static int sbp2_scsi_queuecommand(struct Scsi_Host *shost, return retval; } -static int sbp2_scsi_slave_alloc(struct scsi_device *sdev) +static int sbp2_scsi_sdev_init(struct scsi_device *sdev) { struct sbp2_logical_unit *lu = sdev->hostdata; @@ -1590,7 +1590,7 @@ static const struct scsi_host_template scsi_driver_template = { .name = "SBP-2 IEEE-1394", .proc_name = "sbp2", .queuecommand = sbp2_scsi_queuecommand, - .slave_alloc = sbp2_scsi_slave_alloc, + .sdev_init = sbp2_scsi_sdev_init, .device_configure = sbp2_scsi_device_configure, .eh_abort_handler = sbp2_scsi_abort, .this_id = -1, diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index 91242f26defb..f034ebacf974 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -96,7 +96,7 @@ static u8 mptfcTaskCtx = MPT_MAX_PROTOCOL_DRIVERS; static u8 mptfcInternalCtx = MPT_MAX_PROTOCOL_DRIVERS; static int mptfc_target_alloc(struct scsi_target *starget); -static int mptfc_slave_alloc(struct scsi_device *sdev); +static int mptfc_sdev_init(struct scsi_device *sdev); static int mptfc_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt); static void mptfc_target_destroy(struct scsi_target *starget); static void mptfc_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout); @@ -113,10 +113,10 @@ static const struct scsi_host_template mptfc_driver_template = { .info = mptscsih_info, .queuecommand = mptfc_qcmd, .target_alloc = mptfc_target_alloc, - .slave_alloc = mptfc_slave_alloc, + .sdev_init = mptfc_sdev_init, .slave_configure = mptscsih_slave_configure, .target_destroy = mptfc_target_destroy, - .slave_destroy = mptscsih_slave_destroy, + .sdev_destroy = mptscsih_sdev_destroy, .change_queue_depth = mptscsih_change_queue_depth, .eh_timed_out = fc_eh_timed_out, .eh_abort_handler = mptfc_abort, @@ -503,7 +503,7 @@ mptfc_register_dev(MPT_ADAPTER *ioc, int channel, FCDevicePage0_t *pg0) /* * if already mapped, remap here. If not mapped, * target_alloc will allocate vtarget and map, - * slave_alloc will fill in vdevice from vtarget. + * sdev_init will fill in vdevice from vtarget. */ if (ri->starget) { vtarget = ri->starget->hostdata; @@ -631,7 +631,7 @@ mptfc_dump_lun_info(MPT_ADAPTER *ioc, struct fc_rport *rport, struct scsi_device * Init memory once per LUN. */ static int -mptfc_slave_alloc(struct scsi_device *sdev) +mptfc_sdev_init(struct scsi_device *sdev) { MPT_SCSI_HOST *hd; VirtTarget *vtarget; @@ -651,7 +651,7 @@ mptfc_slave_alloc(struct scsi_device *sdev) vdevice = kzalloc(sizeof(VirtDevice), GFP_KERNEL); if (!vdevice) { - printk(MYIOC_s_ERR_FMT "slave_alloc kmalloc(%zd) FAILED!\n", + printk(MYIOC_s_ERR_FMT "sdev_init kmalloc(%zd) FAILED!\n", ioc->name, sizeof(VirtDevice)); return -ENOMEM; } diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index a798e26c6402..b884e634acc6 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -1867,7 +1867,7 @@ mptsas_target_destroy(struct scsi_target *starget) static int -mptsas_slave_alloc(struct scsi_device *sdev) +mptsas_sdev_init(struct scsi_device *sdev) { struct Scsi_Host *host = sdev->host; MPT_SCSI_HOST *hd = shost_priv(host); @@ -1880,7 +1880,7 @@ mptsas_slave_alloc(struct scsi_device *sdev) vdevice = kzalloc(sizeof(VirtDevice), GFP_KERNEL); if (!vdevice) { - printk(MYIOC_s_ERR_FMT "slave_alloc kzalloc(%zd) FAILED!\n", + printk(MYIOC_s_ERR_FMT "sdev_init kzalloc(%zd) FAILED!\n", ioc->name, sizeof(VirtDevice)); return -ENOMEM; } @@ -2005,10 +2005,10 @@ static const struct scsi_host_template mptsas_driver_template = { .info = mptscsih_info, .queuecommand = mptsas_qcmd, .target_alloc = mptsas_target_alloc, - .slave_alloc = mptsas_slave_alloc, + .sdev_init = mptsas_sdev_init, .slave_configure = mptsas_slave_configure, .target_destroy = mptsas_target_destroy, - .slave_destroy = mptscsih_slave_destroy, + .sdev_destroy = mptscsih_sdev_destroy, .change_queue_depth = mptscsih_change_queue_depth, .eh_timed_out = mptsas_eh_timed_out, .eh_abort_handler = mptscsih_abort, diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 6c3f25cc33ff..50ea4ee65d5c 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1071,7 +1071,7 @@ EXPORT_SYMBOL(mptscsih_flush_running_cmds); * * Returns: None. * - * Called from slave_destroy. + * Called from sdev_destroy. */ static void mptscsih_search_running_cmds(MPT_SCSI_HOST *hd, VirtDevice *vdevice) @@ -2331,7 +2331,7 @@ EXPORT_SYMBOL(mptscsih_raid_id_to_num); * Called if no device present or device being unloaded */ void -mptscsih_slave_destroy(struct scsi_device *sdev) +mptscsih_sdev_destroy(struct scsi_device *sdev) { struct Scsi_Host *host = sdev->host; MPT_SCSI_HOST *hd = shost_priv(host); @@ -3302,7 +3302,7 @@ EXPORT_SYMBOL(mptscsih_resume); EXPORT_SYMBOL(mptscsih_show_info); EXPORT_SYMBOL(mptscsih_info); EXPORT_SYMBOL(mptscsih_qcmd); -EXPORT_SYMBOL(mptscsih_slave_destroy); +EXPORT_SYMBOL(mptscsih_sdev_destroy); EXPORT_SYMBOL(mptscsih_slave_configure); EXPORT_SYMBOL(mptscsih_abort); EXPORT_SYMBOL(mptscsih_dev_reset); diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h index e3d92c392673..9f1cde8e76a8 100644 --- a/drivers/message/fusion/mptscsih.h +++ b/drivers/message/fusion/mptscsih.h @@ -116,7 +116,7 @@ extern const char * mptscsih_info(struct Scsi_Host *SChost); extern int mptscsih_qcmd(struct scsi_cmnd *SCpnt); extern int mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, u64 lun, int ctx2abort, ulong timeout); -extern void mptscsih_slave_destroy(struct scsi_device *device); +extern void mptscsih_sdev_destroy(struct scsi_device *device); extern int mptscsih_slave_configure(struct scsi_device *device); extern int mptscsih_abort(struct scsi_cmnd * SCpnt); extern int mptscsih_dev_reset(struct scsi_cmnd * SCpnt); diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 574b882c9a85..09828c34f8fb 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -713,7 +713,7 @@ static void mptspi_dv_device(struct _MPT_SCSI_HOST *hd, mptspi_read_parameters(sdev->sdev_target); } -static int mptspi_slave_alloc(struct scsi_device *sdev) +static int mptspi_sdev_init(struct scsi_device *sdev) { MPT_SCSI_HOST *hd = shost_priv(sdev->host); VirtTarget *vtarget; @@ -727,7 +727,7 @@ static int mptspi_slave_alloc(struct scsi_device *sdev) vdevice = kzalloc(sizeof(VirtDevice), GFP_KERNEL); if (!vdevice) { - printk(MYIOC_s_ERR_FMT "slave_alloc kmalloc(%zd) FAILED!\n", + printk(MYIOC_s_ERR_FMT "sdev_init kmalloc(%zd) FAILED!\n", ioc->name, sizeof(VirtDevice)); return -ENOMEM; } @@ -799,7 +799,7 @@ mptspi_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt) return mptscsih_qcmd(SCpnt); } -static void mptspi_slave_destroy(struct scsi_device *sdev) +static void mptspi_sdev_destroy(struct scsi_device *sdev) { struct scsi_target *starget = scsi_target(sdev); VirtTarget *vtarget = starget->hostdata; @@ -817,7 +817,7 @@ static void mptspi_slave_destroy(struct scsi_device *sdev) mptspi_write_spi_device_pg1(starget, &pg1); } - mptscsih_slave_destroy(sdev); + mptscsih_sdev_destroy(sdev); } static const struct scsi_host_template mptspi_driver_template = { @@ -828,10 +828,10 @@ static const struct scsi_host_template mptspi_driver_template = { .info = mptscsih_info, .queuecommand = mptspi_qcmd, .target_alloc = mptspi_target_alloc, - .slave_alloc = mptspi_slave_alloc, + .sdev_init = mptspi_sdev_init, .slave_configure = mptspi_slave_configure, .target_destroy = mptspi_target_destroy, - .slave_destroy = mptspi_slave_destroy, + .sdev_destroy = mptspi_sdev_destroy, .change_queue_depth = mptscsih_change_queue_depth, .eh_abort_handler = mptscsih_abort, .eh_device_reset_handler = mptscsih_dev_reset, diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index b2a8cd792266..c9cb9db8c2ee 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -37,11 +37,11 @@ static bool allow_lun_scan = true; module_param(allow_lun_scan, bool, 0600); MODULE_PARM_DESC(allow_lun_scan, "For NPIV, scan and attach all storage LUNs"); -static void zfcp_scsi_slave_destroy(struct scsi_device *sdev) +static void zfcp_scsi_sdev_destroy(struct scsi_device *sdev) { struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev); - /* if previous slave_alloc returned early, there is nothing to do */ + /* if previous sdev_init returned early, there is nothing to do */ if (!zfcp_sdev->port) return; @@ -110,7 +110,7 @@ int zfcp_scsi_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scpnt) return ret; } -static int zfcp_scsi_slave_alloc(struct scsi_device *sdev) +static int zfcp_scsi_sdev_init(struct scsi_device *sdev) { struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); struct zfcp_adapter *adapter = @@ -427,9 +427,9 @@ static const struct scsi_host_template zfcp_scsi_host_template = { .eh_device_reset_handler = zfcp_scsi_eh_device_reset_handler, .eh_target_reset_handler = zfcp_scsi_eh_target_reset_handler, .eh_host_reset_handler = zfcp_scsi_eh_host_reset_handler, - .slave_alloc = zfcp_scsi_slave_alloc, + .sdev_init = zfcp_scsi_sdev_init, .slave_configure = zfcp_scsi_slave_configure, - .slave_destroy = zfcp_scsi_slave_destroy, + .sdev_destroy = zfcp_scsi_sdev_destroy, .change_queue_depth = scsi_change_queue_depth, .host_reset = zfcp_scsi_sysfs_host_reset, .proc_name = "zfcp", diff --git a/drivers/s390/scsi/zfcp_sysfs.c b/drivers/s390/scsi/zfcp_sysfs.c index 304b81bb5f90..41e36af35488 100644 --- a/drivers/s390/scsi/zfcp_sysfs.c +++ b/drivers/s390/scsi/zfcp_sysfs.c @@ -284,7 +284,7 @@ static bool zfcp_sysfs_port_in_use(struct zfcp_port *const port) goto unlock_host_lock; } - /* port is about to be removed, so no more unit_add or slave_alloc */ + /* port is about to be removed, so no more unit_add or sdev_init */ zfcp_sysfs_port_set_removing(port); in_use = false; diff --git a/drivers/s390/scsi/zfcp_unit.c b/drivers/s390/scsi/zfcp_unit.c index 60f2a04f0869..4ef2a635d34f 100644 --- a/drivers/s390/scsi/zfcp_unit.c +++ b/drivers/s390/scsi/zfcp_unit.c @@ -170,7 +170,7 @@ int zfcp_unit_add(struct zfcp_port *port, u64 fcp_lun) write_unlock_irq(&port->unit_list_lock); /* * lock order: shost->scan_mutex before zfcp_sysfs_port_units_mutex - * due to zfcp_unit_scsi_scan() => zfcp_scsi_slave_alloc() + * due to zfcp_unit_scsi_scan() => zfcp_scsi_sdev_init() */ mutex_unlock(&zfcp_sysfs_port_units_mutex); diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 85439e976143..71c1d25f93b1 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -158,9 +158,9 @@ STATIC int NCR_700_abort(struct scsi_cmnd * SCpnt); STATIC int NCR_700_host_reset(struct scsi_cmnd * SCpnt); STATIC void NCR_700_chip_setup(struct Scsi_Host *host); STATIC void NCR_700_chip_reset(struct Scsi_Host *host); -STATIC int NCR_700_slave_alloc(struct scsi_device *SDpnt); +STATIC int NCR_700_sdev_init(struct scsi_device *SDpnt); STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt); -STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt); +STATIC void NCR_700_sdev_destroy(struct scsi_device *SDpnt); static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth); STATIC const struct attribute_group *NCR_700_dev_groups[]; @@ -331,8 +331,8 @@ NCR_700_detect(struct scsi_host_template *tpnt, tpnt->sg_tablesize = NCR_700_SG_SEGMENTS; tpnt->cmd_per_lun = NCR_700_CMD_PER_LUN; tpnt->slave_configure = NCR_700_slave_configure; - tpnt->slave_destroy = NCR_700_slave_destroy; - tpnt->slave_alloc = NCR_700_slave_alloc; + tpnt->sdev_destroy = NCR_700_sdev_destroy; + tpnt->sdev_init = NCR_700_sdev_init; tpnt->change_queue_depth = NCR_700_change_queue_depth; if(tpnt->name == NULL) @@ -2017,7 +2017,7 @@ NCR_700_set_offset(struct scsi_target *STp, int offset) } STATIC int -NCR_700_slave_alloc(struct scsi_device *SDp) +NCR_700_sdev_init(struct scsi_device *SDp) { SDp->hostdata = kzalloc(sizeof(struct NCR_700_Device_Parameters), GFP_KERNEL); @@ -2052,7 +2052,7 @@ NCR_700_slave_configure(struct scsi_device *SDp) } STATIC void -NCR_700_slave_destroy(struct scsi_device *SDp) +NCR_700_sdev_destroy(struct scsi_device *SDp) { kfree(SDp->hostdata); SDp->hostdata = NULL; diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 4202059815a0..2a137629e48d 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -672,7 +672,7 @@ ahd_linux_target_destroy(struct scsi_target *starget) } static int -ahd_linux_slave_alloc(struct scsi_device *sdev) +ahd_linux_sdev_init(struct scsi_device *sdev) { struct ahd_softc *ahd = *((struct ahd_softc **)sdev->host->hostdata); @@ -906,7 +906,7 @@ struct scsi_host_template aic79xx_driver_template = { .this_id = -1, .max_sectors = 8192, .cmd_per_lun = 2, - .slave_alloc = ahd_linux_slave_alloc, + .sdev_init = ahd_linux_sdev_init, .slave_configure = ahd_linux_slave_configure, .target_alloc = ahd_linux_target_alloc, .target_destroy = ahd_linux_target_destroy, diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index b0c4f2345321..9066ab2ef6e3 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -632,7 +632,7 @@ ahc_linux_target_destroy(struct scsi_target *starget) } static int -ahc_linux_slave_alloc(struct scsi_device *sdev) +ahc_linux_sdev_init(struct scsi_device *sdev) { struct ahc_softc *ahc = *((struct ahc_softc **)sdev->host->hostdata); @@ -791,7 +791,7 @@ struct scsi_host_template aic7xxx_driver_template = { .this_id = -1, .max_sectors = 8192, .cmd_per_lun = 2, - .slave_alloc = ahc_linux_slave_alloc, + .sdev_init = ahc_linux_sdev_init, .slave_configure = ahc_linux_slave_configure, .target_alloc = ahc_linux_target_alloc, .target_destroy = ahc_linux_target_destroy, diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c index 66fb701401de..5729f429ed8b 100644 --- a/drivers/scsi/bfa/bfad_im.c +++ b/drivers/scsi/bfa/bfad_im.c @@ -25,7 +25,7 @@ struct scsi_transport_template *bfad_im_scsi_transport_template; struct scsi_transport_template *bfad_im_scsi_vport_transport_template; static void bfad_im_itnim_work_handler(struct work_struct *work); static int bfad_im_queuecommand(struct Scsi_Host *h, struct scsi_cmnd *cmnd); -static int bfad_im_slave_alloc(struct scsi_device *sdev); +static int bfad_im_sdev_init(struct scsi_device *sdev); static void bfad_im_fc_rport_add(struct bfad_im_port_s *im_port, struct bfad_itnim_s *itnim); @@ -404,10 +404,10 @@ bfad_im_reset_target_handler(struct scsi_cmnd *cmnd) } /* - * Scsi_Host template entry slave_destroy. + * Scsi_Host template entry sdev_destroy. */ static void -bfad_im_slave_destroy(struct scsi_device *sdev) +bfad_im_sdev_destroy(struct scsi_device *sdev) { sdev->hostdata = NULL; return; @@ -800,9 +800,9 @@ struct scsi_host_template bfad_im_scsi_host_template = { .eh_device_reset_handler = bfad_im_reset_lun_handler, .eh_target_reset_handler = bfad_im_reset_target_handler, - .slave_alloc = bfad_im_slave_alloc, + .sdev_init = bfad_im_sdev_init, .slave_configure = bfad_im_slave_configure, - .slave_destroy = bfad_im_slave_destroy, + .sdev_destroy = bfad_im_sdev_destroy, .this_id = -1, .sg_tablesize = BFAD_IO_MAX_SGE, @@ -823,9 +823,9 @@ struct scsi_host_template bfad_im_vport_template = { .eh_device_reset_handler = bfad_im_reset_lun_handler, .eh_target_reset_handler = bfad_im_reset_target_handler, - .slave_alloc = bfad_im_slave_alloc, + .sdev_init = bfad_im_sdev_init, .slave_configure = bfad_im_slave_configure, - .slave_destroy = bfad_im_slave_destroy, + .sdev_destroy = bfad_im_sdev_destroy, .this_id = -1, .sg_tablesize = BFAD_IO_MAX_SGE, @@ -915,7 +915,7 @@ bfad_get_itnim(struct bfad_im_port_s *im_port, int id) } /* - * Function is invoked from the SCSI Host Template slave_alloc() entry point. + * Function is invoked from the SCSI Host Template sdev_init() entry point. * Has the logic to query the LUN Mask database to check if this LUN needs to * be made visible to the SCSI mid-layer or not. * @@ -946,10 +946,10 @@ bfad_im_check_if_make_lun_visible(struct scsi_device *sdev, } /* - * Scsi_Host template entry slave_alloc + * Scsi_Host template entry sdev_init */ static int -bfad_im_slave_alloc(struct scsi_device *sdev) +bfad_im_sdev_init(struct scsi_device *sdev) { struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); struct bfad_itnim_data_s *itnim_data; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index f49783b89d04..938e38a01326 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -2951,7 +2951,7 @@ static struct scsi_host_template bnx2fc_shost_template = { .eh_device_reset_handler = bnx2fc_eh_device_reset, /* lun reset */ .eh_target_reset_handler = bnx2fc_eh_target_reset, /* tgt reset */ .eh_host_reset_handler = fc_eh_host_reset, - .slave_alloc = fc_slave_alloc, + .sdev_init = fc_sdev_init, .change_queue_depth = scsi_change_queue_depth, .this_id = -1, .cmd_per_lun = 3, diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c index 8329f0cab4e7..527530b17a51 100644 --- a/drivers/scsi/csiostor/csio_scsi.c +++ b/drivers/scsi/csiostor/csio_scsi.c @@ -2224,7 +2224,7 @@ fail: } static int -csio_slave_alloc(struct scsi_device *sdev) +csio_sdev_init(struct scsi_device *sdev) { struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); @@ -2244,7 +2244,7 @@ csio_slave_configure(struct scsi_device *sdev) } static void -csio_slave_destroy(struct scsi_device *sdev) +csio_sdev_destroy(struct scsi_device *sdev) { sdev->hostdata = NULL; } @@ -2276,9 +2276,9 @@ struct scsi_host_template csio_fcoe_shost_template = { .eh_timed_out = fc_eh_timed_out, .eh_abort_handler = csio_eh_abort_handler, .eh_device_reset_handler = csio_eh_lun_reset_handler, - .slave_alloc = csio_slave_alloc, + .sdev_init = csio_sdev_init, .slave_configure = csio_slave_configure, - .slave_destroy = csio_slave_destroy, + .sdev_destroy = csio_sdev_destroy, .scan_finished = csio_scan_finished, .this_id = -1, .sg_tablesize = CSIO_SCSI_MAX_SGE, @@ -2295,9 +2295,9 @@ struct scsi_host_template csio_fcoe_shost_vport_template = { .eh_timed_out = fc_eh_timed_out, .eh_abort_handler = csio_eh_abort_handler, .eh_device_reset_handler = csio_eh_lun_reset_handler, - .slave_alloc = csio_slave_alloc, + .sdev_init = csio_sdev_init, .slave_configure = csio_slave_configure, - .slave_destroy = csio_slave_destroy, + .sdev_destroy = csio_sdev_destroy, .scan_finished = csio_scan_finished, .this_id = -1, .sg_tablesize = CSIO_SCSI_MAX_SGE, diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index d108a86e196e..235ebc851ca9 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -3715,13 +3715,13 @@ static void adapter_remove_and_free_all_devices(struct AdapterCtlBlk* acb) /** - * dc395x_slave_alloc - Called by the scsi mid layer to tell us about a new + * dc395x_sdev_init - Called by the scsi mid layer to tell us about a new * scsi device that we need to deal with. We allocate a new device and then * insert that device into the adapters device list. * * @scsi_device: The new scsi device that we need to handle. **/ -static int dc395x_slave_alloc(struct scsi_device *scsi_device) +static int dc395x_sdev_init(struct scsi_device *scsi_device) { struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)scsi_device->host->hostdata; struct DeviceCtlBlk *dcb; @@ -3736,12 +3736,12 @@ static int dc395x_slave_alloc(struct scsi_device *scsi_device) /** - * dc395x_slave_destroy - Called by the scsi mid layer to tell us about a + * dc395x_sdev_destroy - Called by the scsi mid layer to tell us about a * device that is going away. * * @scsi_device: The new scsi device that we need to handle. **/ -static void dc395x_slave_destroy(struct scsi_device *scsi_device) +static void dc395x_sdev_destroy(struct scsi_device *scsi_device) { struct AdapterCtlBlk *acb = (struct AdapterCtlBlk *)scsi_device->host->hostdata; struct DeviceCtlBlk *dcb = find_dcb(acb, scsi_device->id, scsi_device->lun); @@ -4547,8 +4547,8 @@ static const struct scsi_host_template dc395x_driver_template = { .show_info = dc395x_show_info, .name = DC395X_BANNER " " DC395X_VERSION, .queuecommand = dc395x_queue_command, - .slave_alloc = dc395x_slave_alloc, - .slave_destroy = dc395x_slave_destroy, + .sdev_init = dc395x_sdev_init, + .sdev_destroy = dc395x_sdev_destroy, .can_queue = DC395x_MAX_CAN_QUEUE, .this_id = 7, .sg_tablesize = DC395x_MAX_SG_TABLESIZE, diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c index 0175d2282b45..d14a3383abb0 100644 --- a/drivers/scsi/esp_scsi.c +++ b/drivers/scsi/esp_scsi.c @@ -2441,7 +2441,7 @@ static void esp_target_destroy(struct scsi_target *starget) tp->starget = NULL; } -static int esp_slave_alloc(struct scsi_device *dev) +static int esp_sdev_init(struct scsi_device *dev) { struct esp *esp = shost_priv(dev->host); struct esp_target_data *tp = &esp->target[dev->id]; @@ -2479,7 +2479,7 @@ static int esp_slave_configure(struct scsi_device *dev) return 0; } -static void esp_slave_destroy(struct scsi_device *dev) +static void esp_sdev_destroy(struct scsi_device *dev) { struct esp_lun_data *lp = dev->hostdata; @@ -2667,9 +2667,9 @@ const struct scsi_host_template scsi_esp_template = { .queuecommand = esp_queuecommand, .target_alloc = esp_target_alloc, .target_destroy = esp_target_destroy, - .slave_alloc = esp_slave_alloc, + .sdev_init = esp_sdev_init, .slave_configure = esp_slave_configure, - .slave_destroy = esp_slave_destroy, + .sdev_destroy = esp_sdev_destroy, .eh_abort_handler = esp_eh_abort_handler, .eh_bus_reset_handler = esp_eh_bus_reset_handler, .eh_host_reset_handler = esp_eh_host_reset_handler, diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 39aec710660c..038e38578676 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -269,7 +269,7 @@ static const struct scsi_host_template fcoe_shost_template = { .eh_abort_handler = fc_eh_abort, .eh_device_reset_handler = fc_eh_device_reset, .eh_host_reset_handler = fc_eh_host_reset, - .slave_alloc = fc_slave_alloc, + .sdev_init = fc_sdev_init, .change_queue_depth = scsi_change_queue_depth, .this_id = -1, .cmd_per_lun = 3, diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c index adec0df24bc4..a6b667054792 100644 --- a/drivers/scsi/fnic/fnic_main.c +++ b/drivers/scsi/fnic/fnic_main.c @@ -87,7 +87,7 @@ static struct libfc_function_template fnic_transport_template = { .exch_mgr_reset = fnic_exch_mgr_reset }; -static int fnic_slave_alloc(struct scsi_device *sdev) +static int fnic_sdev_init(struct scsi_device *sdev) { struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); @@ -106,7 +106,7 @@ static const struct scsi_host_template fnic_host_template = { .eh_abort_handler = fnic_abort_cmd, .eh_device_reset_handler = fnic_device_reset, .eh_host_reset_handler = fnic_host_reset, - .slave_alloc = fnic_slave_alloc, + .sdev_init = fnic_sdev_init, .change_queue_depth = scsi_change_queue_depth, .this_id = -1, .cmd_per_lun = 3, diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index a44768bceb9a..fee3cadfde1e 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -646,7 +646,7 @@ extern void hisi_sas_remove(struct platform_device *pdev); int hisi_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim); -extern int hisi_sas_slave_alloc(struct scsi_device *sdev); +extern int hisi_sas_sdev_init(struct scsi_device *sdev); extern int hisi_sas_scan_finished(struct Scsi_Host *shost, unsigned long time); extern void hisi_sas_scan_start(struct Scsi_Host *shost); extern int hisi_sas_host_reset(struct Scsi_Host *shost, int reset_type); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 53cb15f6714b..a80d8b2a9941 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -805,13 +805,13 @@ static int hisi_sas_init_device(struct domain_device *device) return rc; } -int hisi_sas_slave_alloc(struct scsi_device *sdev) +int hisi_sas_sdev_init(struct scsi_device *sdev) { struct domain_device *ddev = sdev_to_domain_dev(sdev); struct hisi_sas_device *sas_dev = ddev->lldd_dev; int rc; - rc = sas_slave_alloc(sdev); + rc = sas_sdev_init(sdev); if (rc) return rc; @@ -821,7 +821,7 @@ int hisi_sas_slave_alloc(struct scsi_device *sdev) sas_dev->dev_status = HISI_SAS_DEV_NORMAL; return 0; } -EXPORT_SYMBOL_GPL(hisi_sas_slave_alloc); +EXPORT_SYMBOL_GPL(hisi_sas_sdev_init); static int hisi_sas_dev_found(struct domain_device *device) { diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index c3e571be2222..098a9f07d143 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1757,7 +1757,7 @@ static const struct scsi_host_template sht_v1_hw = { .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, - .slave_alloc = hisi_sas_slave_alloc, + .sdev_init = hisi_sas_sdev_init, .shost_groups = host_v1_hw_groups, .host_reset = hisi_sas_host_reset, }; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 1a62b5d15eca..e16868801159 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3589,7 +3589,7 @@ static const struct scsi_host_template sht_v2_hw = { .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, - .slave_alloc = hisi_sas_slave_alloc, + .sdev_init = hisi_sas_sdev_init, .shost_groups = host_v2_hw_groups, .sdev_groups = sdev_groups_v2_hw, .host_reset = hisi_sas_host_reset, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 5db931663ae4..5758388508ec 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -3342,7 +3342,7 @@ static const struct scsi_host_template sht_v3_hw = { .map_queues = hisi_sas_map_queues, .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, .sg_prot_tablesize = HISI_SAS_SGE_PAGE_CNT, - .slave_alloc = hisi_sas_slave_alloc, + .sdev_init = hisi_sas_sdev_init, .shost_groups = host_v3_hw_groups, .sdev_groups = sdev_groups_v3_hw, .tag_alloc_policy = BLK_TAG_ALLOC_RR, diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 0c49414c1f35..91552254fff4 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -283,9 +283,9 @@ static int hpsa_scan_finished(struct Scsi_Host *sh, static int hpsa_change_queue_depth(struct scsi_device *sdev, int qdepth); static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd); -static int hpsa_slave_alloc(struct scsi_device *sdev); +static int hpsa_sdev_init(struct scsi_device *sdev); static int hpsa_slave_configure(struct scsi_device *sdev); -static void hpsa_slave_destroy(struct scsi_device *sdev); +static void hpsa_sdev_destroy(struct scsi_device *sdev); static void hpsa_update_scsi_devices(struct ctlr_info *h); static int check_for_unit_attention(struct ctlr_info *h, @@ -978,9 +978,9 @@ static const struct scsi_host_template hpsa_driver_template = { .this_id = -1, .eh_device_reset_handler = hpsa_eh_device_reset_handler, .ioctl = hpsa_ioctl, - .slave_alloc = hpsa_slave_alloc, + .sdev_init = hpsa_sdev_init, .slave_configure = hpsa_slave_configure, - .slave_destroy = hpsa_slave_destroy, + .sdev_destroy = hpsa_sdev_destroy, #ifdef CONFIG_COMPAT .compat_ioctl = hpsa_compat_ioctl, #endif @@ -2107,7 +2107,7 @@ static struct hpsa_scsi_dev_t *lookup_hpsa_scsi_dev(struct ctlr_info *h, return NULL; } -static int hpsa_slave_alloc(struct scsi_device *sdev) +static int hpsa_sdev_init(struct scsi_device *sdev) { struct hpsa_scsi_dev_t *sd = NULL; unsigned long flags; @@ -2173,7 +2173,7 @@ static int hpsa_slave_configure(struct scsi_device *sdev) return 0; } -static void hpsa_slave_destroy(struct scsi_device *sdev) +static void hpsa_sdev_destroy(struct scsi_device *sdev) { struct hpsa_scsi_dev_t *hdev = NULL; diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index e66c3ef74267..48417615bffe 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -3393,7 +3393,7 @@ static int ibmvfc_scan_finished(struct Scsi_Host *shost, unsigned long time) } /** - * ibmvfc_slave_alloc - Setup the device's task set value + * ibmvfc_sdev_init - Setup the device's task set value * @sdev: struct scsi_device device to configure * * Set the device's task set value so that error handling works as @@ -3402,7 +3402,7 @@ static int ibmvfc_scan_finished(struct Scsi_Host *shost, unsigned long time) * Returns: * 0 on success / -ENXIO if device does not exist **/ -static int ibmvfc_slave_alloc(struct scsi_device *sdev) +static int ibmvfc_sdev_init(struct scsi_device *sdev) { struct Scsi_Host *shost = sdev->host; struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); @@ -3696,7 +3696,7 @@ static const struct scsi_host_template driver_template = { .eh_device_reset_handler = ibmvfc_eh_device_reset_handler, .eh_target_reset_handler = ibmvfc_eh_target_reset_handler, .eh_host_reset_handler = ibmvfc_eh_host_reset_handler, - .slave_alloc = ibmvfc_slave_alloc, + .sdev_init = ibmvfc_sdev_init, .slave_configure = ibmvfc_slave_configure, .target_alloc = ibmvfc_target_alloc, .scan_finished = ibmvfc_scan_finished, diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 31cf2d31cceb..41bc54e27896 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4745,13 +4745,13 @@ static struct ipr_resource_entry *ipr_find_sdev(struct scsi_device *sdev) } /** - * ipr_slave_destroy - Unconfigure a SCSI device + * ipr_sdev_destroy - Unconfigure a SCSI device * @sdev: scsi device struct * * Return value: * nothing **/ -static void ipr_slave_destroy(struct scsi_device *sdev) +static void ipr_sdev_destroy(struct scsi_device *sdev) { struct ipr_resource_entry *res; struct ipr_ioa_cfg *ioa_cfg; @@ -4815,7 +4815,7 @@ static int ipr_device_configure(struct scsi_device *sdev, } /** - * ipr_slave_alloc - Prepare for commands to a device. + * ipr_sdev_init - Prepare for commands to a device. * @sdev: scsi device struct * * This function saves a pointer to the resource entry @@ -4826,7 +4826,7 @@ static int ipr_device_configure(struct scsi_device *sdev, * Return value: * 0 on success / -ENXIO if device does not exist **/ -static int ipr_slave_alloc(struct scsi_device *sdev) +static int ipr_sdev_init(struct scsi_device *sdev) { struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *) sdev->host->hostdata; struct ipr_resource_entry *res; @@ -6398,9 +6398,9 @@ static const struct scsi_host_template driver_template = { .eh_abort_handler = ipr_eh_abort, .eh_device_reset_handler = ipr_eh_dev_reset, .eh_host_reset_handler = ipr_eh_host_reset, - .slave_alloc = ipr_slave_alloc, + .sdev_init = ipr_sdev_init, .device_configure = ipr_device_configure, - .slave_destroy = ipr_slave_destroy, + .sdev_destroy = ipr_sdev_destroy, .scan_finished = ipr_scan_finished, .target_destroy = ipr_target_destroy, .change_queue_depth = ipr_change_queue_depth, diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c index 80be3a936d92..fd1ef06655cb 100644 --- a/drivers/scsi/libfc/fc_fcp.c +++ b/drivers/scsi/libfc/fc_fcp.c @@ -2222,13 +2222,13 @@ int fc_eh_host_reset(struct scsi_cmnd *sc_cmd) EXPORT_SYMBOL(fc_eh_host_reset); /** - * fc_slave_alloc() - Configure the queue depth of a Scsi_Host + * fc_sdev_init() - Configure the queue depth of a Scsi_Host * @sdev: The SCSI device that identifies the SCSI host * * Configures queue depth based on host's cmd_per_len. If not set * then we use the libfc default. */ -int fc_slave_alloc(struct scsi_device *sdev) +int fc_sdev_init(struct scsi_device *sdev) { struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); @@ -2238,7 +2238,7 @@ int fc_slave_alloc(struct scsi_device *sdev) scsi_change_queue_depth(sdev, FC_FCP_DFLT_QUEUE_DEPTH); return 0; } -EXPORT_SYMBOL(fc_slave_alloc); +EXPORT_SYMBOL(fc_sdev_init); /** * fc_fcp_destroy() - Tear down the FCP layer for a given local port diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index da11d32840e2..e7d5093f7c5d 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -1194,14 +1194,14 @@ void sas_task_abort(struct sas_task *task) } EXPORT_SYMBOL_GPL(sas_task_abort); -int sas_slave_alloc(struct scsi_device *sdev) +int sas_sdev_init(struct scsi_device *sdev) { if (dev_is_sata(sdev_to_domain_dev(sdev)) && sdev->lun) return -ENXIO; return 0; } -EXPORT_SYMBOL_GPL(sas_slave_alloc); +EXPORT_SYMBOL_GPL(sas_sdev_init); void sas_target_destroy(struct scsi_target *starget) { diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 905026a4782c..10f6e8ff682f 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -6226,7 +6226,7 @@ error: } /** - * lpfc_slave_alloc - scsi_host_template slave_alloc entry point + * lpfc_sdev_init - scsi_host_template sdev_init entry point * @sdev: Pointer to scsi_device. * * This routine populates the cmds_per_lun count + 2 scsi_bufs into this host's @@ -6239,7 +6239,7 @@ error: * 0 - Success **/ static int -lpfc_slave_alloc(struct scsi_device *sdev) +lpfc_sdev_init(struct scsi_device *sdev) { struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata; struct lpfc_hba *phba = vport->phba; @@ -6371,13 +6371,13 @@ lpfc_slave_configure(struct scsi_device *sdev) } /** - * lpfc_slave_destroy - slave_destroy entry point of SHT data structure + * lpfc_sdev_destroy - sdev_destroy entry point of SHT data structure * @sdev: Pointer to scsi_device. * * This routine sets @sdev hostatdata filed to null. **/ static void -lpfc_slave_destroy(struct scsi_device *sdev) +lpfc_sdev_destroy(struct scsi_device *sdev) { struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata; struct lpfc_hba *phba = vport->phba; @@ -6737,7 +6737,7 @@ lpfc_no_command(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) } static int -lpfc_no_slave(struct scsi_device *sdev) +lpfc_no_sdev(struct scsi_device *sdev) { return -ENODEV; } @@ -6748,8 +6748,8 @@ struct scsi_host_template lpfc_template_nvme = { .proc_name = LPFC_DRIVER_NAME, .info = lpfc_info, .queuecommand = lpfc_no_command, - .slave_alloc = lpfc_no_slave, - .slave_configure = lpfc_no_slave, + .sdev_init = lpfc_no_sdev, + .slave_configure = lpfc_no_sdev, .scan_finished = lpfc_scan_finished, .this_id = -1, .sg_tablesize = 1, @@ -6772,9 +6772,9 @@ struct scsi_host_template lpfc_template = { .eh_device_reset_handler = lpfc_device_reset_handler, .eh_target_reset_handler = lpfc_target_reset_handler, .eh_host_reset_handler = lpfc_host_reset_handler, - .slave_alloc = lpfc_slave_alloc, + .sdev_init = lpfc_sdev_init, .slave_configure = lpfc_slave_configure, - .slave_destroy = lpfc_slave_destroy, + .sdev_destroy = lpfc_sdev_destroy, .scan_finished = lpfc_scan_finished, .this_id = -1, .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, @@ -6799,9 +6799,9 @@ struct scsi_host_template lpfc_vport_template = { .eh_target_reset_handler = lpfc_target_reset_handler, .eh_bus_reset_handler = NULL, .eh_host_reset_handler = NULL, - .slave_alloc = lpfc_slave_alloc, + .sdev_init = lpfc_sdev_init, .slave_configure = lpfc_slave_configure, - .slave_destroy = lpfc_slave_destroy, + .sdev_destroy = lpfc_sdev_destroy, .scan_finished = lpfc_scan_finished, .this_id = -1, .sg_tablesize = LPFC_DEFAULT_SG_SEG_CNT, diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 8e75e2e279a4..7bdd28af62ea 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2109,7 +2109,7 @@ static int megasas_device_configure(struct scsi_device *sdev, return 0; } -static int megasas_slave_alloc(struct scsi_device *sdev) +static int megasas_sdev_init(struct scsi_device *sdev) { u16 pd_index = 0, ld_tgt_id; struct megasas_instance *instance ; @@ -2154,7 +2154,7 @@ scan_target: return 0; } -static void megasas_slave_destroy(struct scsi_device *sdev) +static void megasas_sdev_destroy(struct scsi_device *sdev) { u16 ld_tgt_id; struct megasas_instance *instance; @@ -3511,8 +3511,8 @@ static const struct scsi_host_template megasas_template = { .name = "Avago SAS based MegaRAID driver", .proc_name = "megaraid_sas", .device_configure = megasas_device_configure, - .slave_alloc = megasas_slave_alloc, - .slave_destroy = megasas_slave_destroy, + .sdev_init = megasas_sdev_init, + .sdev_destroy = megasas_sdev_destroy, .queuecommand = megasas_queue_command, .eh_target_reset_handler = megasas_reset_target, .eh_abort_handler = megasas_task_abort, diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 5f2f67acf8bf..6b9f453565f2 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -4465,14 +4465,14 @@ static int mpi3mr_scan_finished(struct Scsi_Host *shost, } /** - * mpi3mr_slave_destroy - Slave destroy callback handler + * mpi3mr_sdev_destroy - Slave destroy callback handler * @sdev: SCSI device reference * * Cleanup and free per device(lun) private data. * * Return: Nothing. */ -static void mpi3mr_slave_destroy(struct scsi_device *sdev) +static void mpi3mr_sdev_destroy(struct scsi_device *sdev) { struct Scsi_Host *shost; struct mpi3mr_ioc *mrioc; @@ -4599,14 +4599,14 @@ static int mpi3mr_device_configure(struct scsi_device *sdev, } /** - * mpi3mr_slave_alloc -Slave alloc callback handler + * mpi3mr_sdev_init -Slave alloc callback handler * @sdev: SCSI device reference * * Allocate per device(lun) private data and initialize it. * * Return: 0 on success -ENOMEM on memory allocation failure. */ -static int mpi3mr_slave_alloc(struct scsi_device *sdev) +static int mpi3mr_sdev_init(struct scsi_device *sdev) { struct Scsi_Host *shost; struct mpi3mr_ioc *mrioc; @@ -5062,10 +5062,10 @@ static const struct scsi_host_template mpi3mr_driver_template = { .proc_name = MPI3MR_DRIVER_NAME, .queuecommand = mpi3mr_qcmd, .target_alloc = mpi3mr_target_alloc, - .slave_alloc = mpi3mr_slave_alloc, + .sdev_init = mpi3mr_sdev_init, .device_configure = mpi3mr_device_configure, .target_destroy = mpi3mr_target_destroy, - .slave_destroy = mpi3mr_slave_destroy, + .sdev_destroy = mpi3mr_sdev_destroy, .scan_finished = mpi3mr_scan_finished, .scan_start = mpi3mr_scan_start, .change_queue_depth = mpi3mr_change_queue_depth, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index f2a55aa5fe65..f8df6a83c9b7 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2026,14 +2026,14 @@ scsih_target_destroy(struct scsi_target *starget) } /** - * scsih_slave_alloc - device add routine + * scsih_sdev_init - device add routine * @sdev: scsi device struct * * Return: 0 if ok. Any other return is assumed to be an error and * the device is ignored. */ static int -scsih_slave_alloc(struct scsi_device *sdev) +scsih_sdev_init(struct scsi_device *sdev) { struct Scsi_Host *shost; struct MPT3SAS_ADAPTER *ioc; @@ -2108,11 +2108,11 @@ scsih_slave_alloc(struct scsi_device *sdev) } /** - * scsih_slave_destroy - device destroy routine + * scsih_sdev_destroy - device destroy routine * @sdev: scsi device struct */ static void -scsih_slave_destroy(struct scsi_device *sdev) +scsih_sdev_destroy(struct scsi_device *sdev) { struct MPT3SAS_TARGET *sas_target_priv_data; struct scsi_target *starget; @@ -11905,10 +11905,10 @@ static const struct scsi_host_template mpt2sas_driver_template = { .proc_name = MPT2SAS_DRIVER_NAME, .queuecommand = scsih_qcmd, .target_alloc = scsih_target_alloc, - .slave_alloc = scsih_slave_alloc, + .sdev_init = scsih_sdev_init, .device_configure = scsih_device_configure, .target_destroy = scsih_target_destroy, - .slave_destroy = scsih_slave_destroy, + .sdev_destroy = scsih_sdev_destroy, .scan_finished = scsih_scan_finished, .scan_start = scsih_scan_start, .change_queue_depth = scsih_change_queue_depth, @@ -11943,10 +11943,10 @@ static const struct scsi_host_template mpt3sas_driver_template = { .proc_name = MPT3SAS_DRIVER_NAME, .queuecommand = scsih_qcmd, .target_alloc = scsih_target_alloc, - .slave_alloc = scsih_slave_alloc, + .sdev_init = scsih_sdev_init, .device_configure = scsih_device_configure, .target_destroy = scsih_target_destroy, - .slave_destroy = scsih_slave_destroy, + .sdev_destroy = scsih_sdev_destroy, .scan_finished = scsih_scan_finished, .scan_start = scsih_scan_start, .change_queue_depth = scsih_change_queue_depth, diff --git a/drivers/scsi/myrb.c b/drivers/scsi/myrb.c index a7e64b867c8e..154ec80bcd11 100644 --- a/drivers/scsi/myrb.c +++ b/drivers/scsi/myrb.c @@ -1619,7 +1619,7 @@ static int myrb_queuecommand(struct Scsi_Host *shost, return myrb_pthru_queuecommand(shost, scmd); } -static int myrb_ldev_slave_alloc(struct scsi_device *sdev) +static int myrb_ldev_sdev_init(struct scsi_device *sdev) { struct myrb_hba *cb = shost_priv(sdev->host); struct myrb_ldev_info *ldev_info; @@ -1665,7 +1665,7 @@ static int myrb_ldev_slave_alloc(struct scsi_device *sdev) return 0; } -static int myrb_pdev_slave_alloc(struct scsi_device *sdev) +static int myrb_pdev_sdev_init(struct scsi_device *sdev) { struct myrb_hba *cb = shost_priv(sdev->host); struct myrb_pdev_state *pdev_info; @@ -1701,7 +1701,7 @@ static int myrb_pdev_slave_alloc(struct scsi_device *sdev) return 0; } -static int myrb_slave_alloc(struct scsi_device *sdev) +static int myrb_sdev_init(struct scsi_device *sdev) { if (sdev->channel > myrb_logical_channel(sdev->host)) return -ENXIO; @@ -1710,9 +1710,9 @@ static int myrb_slave_alloc(struct scsi_device *sdev) return -ENXIO; if (sdev->channel == myrb_logical_channel(sdev->host)) - return myrb_ldev_slave_alloc(sdev); + return myrb_ldev_sdev_init(sdev); - return myrb_pdev_slave_alloc(sdev); + return myrb_pdev_sdev_init(sdev); } static int myrb_slave_configure(struct scsi_device *sdev) @@ -1741,7 +1741,7 @@ static int myrb_slave_configure(struct scsi_device *sdev) return 0; } -static void myrb_slave_destroy(struct scsi_device *sdev) +static void myrb_sdev_destroy(struct scsi_device *sdev) { kfree(sdev->hostdata); } @@ -2208,9 +2208,9 @@ static const struct scsi_host_template myrb_template = { .proc_name = "myrb", .queuecommand = myrb_queuecommand, .eh_host_reset_handler = myrb_host_reset, - .slave_alloc = myrb_slave_alloc, + .sdev_init = myrb_sdev_init, .slave_configure = myrb_slave_configure, - .slave_destroy = myrb_slave_destroy, + .sdev_destroy = myrb_sdev_destroy, .bios_param = myrb_biosparam, .cmd_size = sizeof(struct myrb_cmdblk), .shost_groups = myrb_shost_groups, diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 1469d0c54e45..a485e95d2d35 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -1786,7 +1786,7 @@ static unsigned short myrs_translate_ldev(struct myrs_hba *cs, return ldev_num; } -static int myrs_slave_alloc(struct scsi_device *sdev) +static int myrs_sdev_init(struct scsi_device *sdev) { struct myrs_hba *cs = shost_priv(sdev->host); unsigned char status; @@ -1910,7 +1910,7 @@ static int myrs_slave_configure(struct scsi_device *sdev) return 0; } -static void myrs_slave_destroy(struct scsi_device *sdev) +static void myrs_sdev_destroy(struct scsi_device *sdev) { kfree(sdev->hostdata); } @@ -1921,9 +1921,9 @@ static const struct scsi_host_template myrs_template = { .proc_name = "myrs", .queuecommand = myrs_queuecommand, .eh_host_reset_handler = myrs_host_reset, - .slave_alloc = myrs_slave_alloc, + .sdev_init = myrs_sdev_init, .slave_configure = myrs_slave_configure, - .slave_destroy = myrs_slave_destroy, + .sdev_destroy = myrs_sdev_destroy, .cmd_size = sizeof(struct myrs_cmdblk), .shost_groups = myrs_shost_groups, .sdev_groups = myrs_sdev_groups, diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c index 35869b4f9329..8753eab526ab 100644 --- a/drivers/scsi/ncr53c8xx.c +++ b/drivers/scsi/ncr53c8xx.c @@ -7786,7 +7786,7 @@ static void __init ncr_getclock (struct ncb *np, int mult) /*===================== LINUX ENTRY POINTS SECTION ==========================*/ -static int ncr53c8xx_slave_alloc(struct scsi_device *device) +static int ncr53c8xx_sdev_init(struct scsi_device *device) { struct Scsi_Host *host = device->host; struct ncb *np = ((struct host_data *) host->hostdata)->ncb; @@ -8094,7 +8094,7 @@ struct Scsi_Host * __init ncr_attach(struct scsi_host_template *tpnt, tpnt->queuecommand = ncr53c8xx_queue_command; tpnt->slave_configure = ncr53c8xx_slave_configure; - tpnt->slave_alloc = ncr53c8xx_slave_alloc; + tpnt->sdev_init = ncr53c8xx_sdev_init; tpnt->eh_bus_reset_handler = ncr53c8xx_bus_reset; tpnt->can_queue = SCSI_NCR_CAN_QUEUE; tpnt->this_id = 7; diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index 4c5881917d76..d9e6e75e801c 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -125,7 +125,7 @@ MODULE_DEVICE_TABLE(pci, pmcraid_pci_table); /** - * pmcraid_slave_alloc - Prepare for commands to a device + * pmcraid_sdev_init - Prepare for commands to a device * @scsi_dev: scsi device struct * * This function is called by mid-layer prior to sending any command to the new @@ -136,7 +136,7 @@ MODULE_DEVICE_TABLE(pci, pmcraid_pci_table); * Return value: * 0 on success / -ENXIO if device does not exist */ -static int pmcraid_slave_alloc(struct scsi_device *scsi_dev) +static int pmcraid_sdev_init(struct scsi_device *scsi_dev) { struct pmcraid_resource_entry *temp, *res = NULL; struct pmcraid_instance *pinstance; @@ -248,17 +248,17 @@ static int pmcraid_device_configure(struct scsi_device *scsi_dev, } /** - * pmcraid_slave_destroy - Unconfigure a SCSI device before removing it + * pmcraid_sdev_destroy - Unconfigure a SCSI device before removing it * * @scsi_dev: scsi device struct * * This is called by mid-layer before removing a device. Pointer assignments - * done in pmcraid_slave_alloc will be reset to NULL here. + * done in pmcraid_sdev_init will be reset to NULL here. * * Return value * none */ -static void pmcraid_slave_destroy(struct scsi_device *scsi_dev) +static void pmcraid_sdev_destroy(struct scsi_device *scsi_dev) { struct pmcraid_resource_entry *res; @@ -3668,9 +3668,9 @@ static const struct scsi_host_template pmcraid_host_template = { .eh_device_reset_handler = pmcraid_eh_device_reset_handler, .eh_host_reset_handler = pmcraid_eh_host_reset_handler, - .slave_alloc = pmcraid_slave_alloc, + .sdev_init = pmcraid_sdev_init, .device_configure = pmcraid_device_configure, - .slave_destroy = pmcraid_slave_destroy, + .sdev_destroy = pmcraid_sdev_destroy, .change_queue_depth = pmcraid_change_queue_depth, .can_queue = PMCRAID_MAX_IO_CMD, .this_id = -1, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 7f980e6141c2..686e69d3ec98 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1934,7 +1934,7 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) } static int -qla2xxx_slave_alloc(struct scsi_device *sdev) +qla2xxx_sdev_init(struct scsi_device *sdev) { struct fc_rport *rport = starget_to_rport(scsi_target(sdev)); @@ -1957,7 +1957,7 @@ qla2xxx_slave_configure(struct scsi_device *sdev) } static void -qla2xxx_slave_destroy(struct scsi_device *sdev) +qla2xxx_sdev_destroy(struct scsi_device *sdev) { sdev->hostdata = NULL; } @@ -8088,8 +8088,8 @@ struct scsi_host_template qla2xxx_driver_template = { .slave_configure = qla2xxx_slave_configure, - .slave_alloc = qla2xxx_slave_alloc, - .slave_destroy = qla2xxx_slave_destroy, + .sdev_init = qla2xxx_sdev_init, + .sdev_destroy = qla2xxx_sdev_destroy, .scan_finished = qla2xxx_scan_finished, .scan_start = qla2xxx_scan_start, .change_queue_depth = scsi_change_queue_depth, diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index d91f54a6e752..863071f80d39 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -160,7 +160,7 @@ static int qla4xxx_eh_abort(struct scsi_cmnd *cmd); static int qla4xxx_eh_device_reset(struct scsi_cmnd *cmd); static int qla4xxx_eh_target_reset(struct scsi_cmnd *cmd); static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd); -static int qla4xxx_slave_alloc(struct scsi_device *device); +static int qla4xxx_sdev_init(struct scsi_device *device); static umode_t qla4_attr_is_visible(int param_type, int param); static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type); @@ -234,7 +234,7 @@ static struct scsi_host_template qla4xxx_driver_template = { .eh_host_reset_handler = qla4xxx_eh_host_reset, .eh_timed_out = qla4xxx_eh_cmd_timed_out, - .slave_alloc = qla4xxx_slave_alloc, + .sdev_init = qla4xxx_sdev_init, .change_queue_depth = scsi_change_queue_depth, .this_id = -1, @@ -9052,7 +9052,7 @@ static void qla4xxx_config_dma_addressing(struct scsi_qla_host *ha) } } -static int qla4xxx_slave_alloc(struct scsi_device *sdev) +static int qla4xxx_sdev_init(struct scsi_device *sdev) { struct iscsi_cls_session *cls_sess; struct iscsi_session *sess; diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index b52513eeeafa..8d7b6773b3a5 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -5879,10 +5879,10 @@ static struct sdebug_dev_info *find_build_dev_info(struct scsi_device *sdev) return open_devip; } -static int scsi_debug_slave_alloc(struct scsi_device *sdp) +static int scsi_debug_sdev_init(struct scsi_device *sdp) { if (sdebug_verbose) - pr_info("slave_alloc <%u %u %u %llu>\n", + pr_info("sdev_init <%u %u %u %llu>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); return 0; @@ -5927,14 +5927,14 @@ static int scsi_debug_slave_configure(struct scsi_device *sdp) return 0; } -static void scsi_debug_slave_destroy(struct scsi_device *sdp) +static void scsi_debug_sdev_destroy(struct scsi_device *sdp) { struct sdebug_dev_info *devip = (struct sdebug_dev_info *)sdp->hostdata; struct sdebug_err_inject *err; if (sdebug_verbose) - pr_info("slave_destroy <%u %u %u %llu>\n", + pr_info("sdev_destroy <%u %u %u %llu>\n", sdp->host->host_no, sdp->channel, sdp->id, sdp->lun); if (!devip) @@ -8712,9 +8712,9 @@ static struct scsi_host_template sdebug_driver_template = { .proc_name = sdebug_proc_name, .name = "SCSI DEBUG", .info = scsi_debug_info, - .slave_alloc = scsi_debug_slave_alloc, + .sdev_init = scsi_debug_sdev_init, .slave_configure = scsi_debug_slave_configure, - .slave_destroy = scsi_debug_slave_destroy, + .sdev_destroy = scsi_debug_sdev_destroy, .ioctl = scsi_debug_ioctl, .queuecommand = scsi_debug_queuecommand, .change_queue_depth = sdebug_change_qdepth, diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 042329b74c6e..f981098ad6bc 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -265,7 +265,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, * scsi_alloc_sdev - allocate and setup a scsi_Device * @starget: which target to allocate a &scsi_device for * @lun: which lun - * @hostdata: usually NULL and set by ->slave_alloc instead + * @hostdata: usually NULL and set by ->sdev_init instead * * Description: * Allocate, initialize for io, and return a pointer to a scsi_Device. @@ -312,7 +312,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->sdev_target = starget; - /* usually NULL and set by ->slave_alloc instead */ + /* usually NULL and set by ->sdev_init instead */ sdev->hostdata = hostdata; /* if the device needs this changing, it may do so in the @@ -363,8 +363,8 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, scsi_sysfs_device_initialize(sdev); - if (shost->hostt->slave_alloc) { - ret = shost->hostt->slave_alloc(sdev); + if (shost->hostt->sdev_init) { + ret = shost->hostt->sdev_init(sdev); if (ret) { /* * if LLDD reports slave not present, don't clutter diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index f3a1ecb42128..5f164a4a0317 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -1513,8 +1513,8 @@ void __scsi_remove_device(struct scsi_device *sdev) kref_put(&sdev->host->tagset_refcnt, scsi_mq_free_tags); cancel_work_sync(&sdev->requeue_work); - if (sdev->host->hostt->slave_destroy) - sdev->host->hostt->slave_destroy(sdev); + if (sdev->host->hostt->sdev_destroy) + sdev->host->hostt->sdev_destroy(sdev); transport_destroy_device(dev); /* diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 870f37b70546..0a739c0cf832 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -6490,7 +6490,7 @@ out: return SUCCESS; } -static int pqi_slave_alloc(struct scsi_device *sdev) +static int pqi_sdev_init(struct scsi_device *sdev) { struct pqi_scsi_dev *device; unsigned long flags; @@ -6574,7 +6574,7 @@ static int pqi_slave_configure(struct scsi_device *sdev) return rc; } -static void pqi_slave_destroy(struct scsi_device *sdev) +static void pqi_sdev_destroy(struct scsi_device *sdev) { struct pqi_ctrl_info *ctrl_info; struct pqi_scsi_dev *device; @@ -7549,9 +7549,9 @@ static const struct scsi_host_template pqi_driver_template = { .eh_device_reset_handler = pqi_eh_device_reset_handler, .eh_abort_handler = pqi_eh_abort_handler, .ioctl = pqi_ioctl, - .slave_alloc = pqi_slave_alloc, + .sdev_init = pqi_sdev_init, .slave_configure = pqi_slave_configure, - .slave_destroy = pqi_slave_destroy, + .sdev_destroy = pqi_sdev_destroy, .map_queues = pqi_map_queues, .sdev_groups = pqi_sdev_groups, .shost_groups = pqi_shost_groups, diff --git a/drivers/scsi/snic/snic_main.c b/drivers/scsi/snic/snic_main.c index 9be3f0193145..14830b6ab94b 100644 --- a/drivers/scsi/snic/snic_main.c +++ b/drivers/scsi/snic/snic_main.c @@ -42,11 +42,11 @@ module_param(snic_max_qdepth, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(snic_max_qdepth, "Queue depth to report for each LUN"); /* - * snic_slave_alloc : callback function to SCSI Mid Layer, called on + * snic_sdev_init : callback function to SCSI Mid Layer, called on * scsi device initialization. */ static int -snic_slave_alloc(struct scsi_device *sdev) +snic_sdev_init(struct scsi_device *sdev) { struct snic_tgt *tgt = starget_to_tgt(scsi_target(sdev)); @@ -107,7 +107,7 @@ static const struct scsi_host_template snic_host_template = { .eh_abort_handler = snic_abort_cmd, .eh_device_reset_handler = snic_device_reset, .eh_host_reset_handler = snic_host_reset, - .slave_alloc = snic_slave_alloc, + .sdev_init = snic_sdev_init, .slave_configure = snic_slave_configure, .change_queue_depth = snic_change_queue_depth, .this_id = -1, diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c index 7ceb982040a5..0f18591539e7 100644 --- a/drivers/scsi/storvsc_drv.c +++ b/drivers/scsi/storvsc_drv.c @@ -1875,7 +1875,7 @@ static struct scsi_host_template scsi_driver = { .eh_host_reset_handler = storvsc_host_reset_handler, .proc_name = "storvsc_host", .eh_timed_out = storvsc_eh_timed_out, - .slave_alloc = storvsc_device_alloc, + .sdev_init = storvsc_device_alloc, .slave_configure = storvsc_device_configure, .cmd_per_lun = 2048, .this_id = -1, diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index a2560cc807d3..e7a17f87027d 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c @@ -765,7 +765,7 @@ static void sym_tune_dev_queuing(struct sym_tcb *tp, int lun, u_short reqtags) } } -static int sym53c8xx_slave_alloc(struct scsi_device *sdev) +static int sym53c8xx_sdev_init(struct scsi_device *sdev) { struct sym_hcb *np = sym_get_hcb(sdev->host); struct sym_tcb *tp = &np->target[sdev->id]; @@ -861,14 +861,14 @@ static int sym53c8xx_slave_configure(struct scsi_device *sdev) return 0; } -static void sym53c8xx_slave_destroy(struct scsi_device *sdev) +static void sym53c8xx_sdev_destroy(struct scsi_device *sdev) { struct sym_hcb *np = sym_get_hcb(sdev->host); struct sym_tcb *tp = &np->target[sdev->id]; struct sym_lcb *lp = sym_lp(tp, sdev->lun); unsigned long flags; - /* if slave_alloc returned before allocating a sym_lcb, return */ + /* if sdev_init returned before allocating a sym_lcb, return */ if (!lp) return; @@ -1684,9 +1684,9 @@ static const struct scsi_host_template sym2_template = { .info = sym53c8xx_info, .cmd_size = sizeof(struct sym_ucmd), .queuecommand = sym53c8xx_queue_command, - .slave_alloc = sym53c8xx_slave_alloc, + .sdev_init = sym53c8xx_sdev_init, .slave_configure = sym53c8xx_slave_configure, - .slave_destroy = sym53c8xx_slave_destroy, + .sdev_destroy = sym53c8xx_sdev_destroy, .eh_abort_handler = sym53c8xx_eh_abort_handler, .eh_target_reset_handler = sym53c8xx_eh_target_reset_handler, .eh_bus_reset_handler = sym53c8xx_eh_bus_reset_handler, diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index 8471f38b730e..4dab566c913b 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -801,7 +801,7 @@ static const struct scsi_host_template virtscsi_host_template = { .eh_abort_handler = virtscsi_abort, .eh_device_reset_handler = virtscsi_device_reset, .eh_timed_out = virtscsi_eh_timed_out, - .slave_alloc = virtscsi_device_alloc, + .sdev_init = virtscsi_device_alloc, .dma_boundary = UINT_MAX, .map_queues = virtscsi_map_queues, diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 9ec55ddc1204..8ac3b284c2ef 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -777,7 +777,7 @@ static const struct scsi_host_template scsifront_sht = { .eh_abort_handler = scsifront_eh_abort_handler, .eh_device_reset_handler = scsifront_dev_reset_handler, .slave_configure = scsifront_sdev_configure, - .slave_destroy = scsifront_sdev_destroy, + .sdev_destroy = scsifront_sdev_destroy, .cmd_per_lun = VSCSIIF_DEFAULT_CMD_PER_LUN, .can_queue = VSCSIIF_MAX_REQS, .this_id = -1, @@ -1075,7 +1075,7 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op) /* * Front device state path, used in slave_configure called - * on successfull scsi_add_device, and in slave_destroy called + * on successfull scsi_add_device, and in sdev_destroy called * on remove of a device. */ snprintf(info->dev_state_path, sizeof(info->dev_state_path), diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 6a2685333076..92b3abd6a963 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5158,12 +5158,12 @@ set_qdepth: } /** - * ufshcd_slave_alloc - handle initial SCSI device configurations + * ufshcd_sdev_init - handle initial SCSI device configurations * @sdev: pointer to SCSI device * * Return: success. */ -static int ufshcd_slave_alloc(struct scsi_device *sdev) +static int ufshcd_sdev_init(struct scsi_device *sdev) { struct ufs_hba *hba; @@ -5244,10 +5244,10 @@ static int ufshcd_device_configure(struct scsi_device *sdev, } /** - * ufshcd_slave_destroy - remove SCSI device configurations + * ufshcd_sdev_destroy - remove SCSI device configurations * @sdev: pointer to SCSI device */ -static void ufshcd_slave_destroy(struct scsi_device *sdev) +static void ufshcd_sdev_destroy(struct scsi_device *sdev) { struct ufs_hba *hba; unsigned long flags; @@ -8930,9 +8930,9 @@ static const struct scsi_host_template ufshcd_driver_template = { .map_queues = ufshcd_map_queues, .queuecommand = ufshcd_queuecommand, .mq_poll = ufshcd_poll, - .slave_alloc = ufshcd_slave_alloc, + .sdev_init = ufshcd_sdev_init, .device_configure = ufshcd_device_configure, - .slave_destroy = ufshcd_slave_destroy, + .sdev_destroy = ufshcd_sdev_destroy, .change_queue_depth = ufshcd_change_queue_depth, .eh_abort_handler = ufshcd_abort, .eh_device_reset_handler = ufshcd_eh_device_reset_handler, diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c index 9f758241d9d3..934ec5310fb9 100644 --- a/drivers/usb/image/microtek.c +++ b/drivers/usb/image/microtek.c @@ -322,7 +322,7 @@ static inline void mts_urb_abort(struct mts_desc* desc) { usb_kill_urb( desc->urb ); } -static int mts_slave_alloc (struct scsi_device *s) +static int mts_sdev_init (struct scsi_device *s) { s->inquiry_len = 0x24; return 0; @@ -626,7 +626,7 @@ static const struct scsi_host_template mts_scsi_host_template = { .this_id = -1, .emulated = 1, .dma_alignment = 511, - .slave_alloc = mts_slave_alloc, + .sdev_init = mts_sdev_init, .max_sectors= 256, /* 128 K */ }; diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index 8c8b5e6041cc..a64ce2aca9ec 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -64,7 +64,7 @@ static const char* host_info(struct Scsi_Host *host) return us->scsi_name; } -static int slave_alloc (struct scsi_device *sdev) +static int sdev_init (struct scsi_device *sdev) { struct us_data *us = host_to_us(sdev->host); @@ -127,7 +127,7 @@ static int device_configure(struct scsi_device *sdev, struct queue_limits *lim) lim->max_hw_sectors, dma_max_mapping_size(dev) >> SECTOR_SHIFT); /* - * We can't put these settings in slave_alloc() because that gets + * We can't put these settings in sdev_init() because that gets * called before the device type is known. Consequently these * settings can't be overridden via the scsi devinfo mechanism. */ @@ -637,7 +637,7 @@ static const struct scsi_host_template usb_stor_host_template = { /* unknown initiator id */ .this_id = -1, - .slave_alloc = slave_alloc, + .sdev_init = sdev_init, .device_configure = device_configure, .target_alloc = target_alloc, diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 03043d567fa1..7bd2a314b329 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -817,7 +817,7 @@ static int uas_target_alloc(struct scsi_target *starget) return 0; } -static int uas_slave_alloc(struct scsi_device *sdev) +static int uas_sdev_init(struct scsi_device *sdev) { struct uas_dev_info *devinfo = (struct uas_dev_info *)sdev->host->hostdata; @@ -905,7 +905,7 @@ static const struct scsi_host_template uas_host_template = { .name = "uas", .queuecommand = uas_queuecommand, .target_alloc = uas_target_alloc, - .slave_alloc = uas_slave_alloc, + .sdev_init = uas_sdev_init, .device_configure = uas_device_configure, .eh_abort_handler = uas_eh_abort_handler, .eh_device_reset_handler = uas_eh_device_reset_handler, diff --git a/include/linux/libata.h b/include/linux/libata.h index c1a85d46eba6..fb5e13b8d89f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1199,10 +1199,10 @@ extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); -extern int ata_scsi_slave_alloc(struct scsi_device *sdev); +extern int ata_scsi_sdev_init(struct scsi_device *sdev); int ata_scsi_device_configure(struct scsi_device *sdev, struct queue_limits *lim); -extern void ata_scsi_slave_destroy(struct scsi_device *sdev); +extern void ata_scsi_sdev_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); extern int ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev, @@ -1458,8 +1458,8 @@ extern const struct attribute_group *ata_common_sdev_groups[]; .this_id = ATA_SHT_THIS_ID, \ .emulated = ATA_SHT_EMULATED, \ .proc_name = drv_name, \ - .slave_alloc = ata_scsi_slave_alloc, \ - .slave_destroy = ata_scsi_slave_destroy, \ + .sdev_init = ata_scsi_sdev_init, \ + .sdev_destroy = ata_scsi_sdev_destroy, \ .bios_param = ata_std_bios_param, \ .unlock_native_capacity = ata_scsi_unlock_native_capacity,\ .max_sectors = ATA_MAX_SECTORS_LBA48 diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h index 4a9b4169e081..183d9fd50d2d 100644 --- a/include/scsi/libfc.h +++ b/include/scsi/libfc.h @@ -963,7 +963,7 @@ int fc_queuecommand(struct Scsi_Host *, struct scsi_cmnd *); int fc_eh_abort(struct scsi_cmnd *); int fc_eh_device_reset(struct scsi_cmnd *); int fc_eh_host_reset(struct scsi_cmnd *); -int fc_slave_alloc(struct scsi_device *); +int fc_sdev_init(struct scsi_device *); /* * ELS/CT interface diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 1324068dd950..f30f716ba045 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -703,7 +703,7 @@ int sas_eh_device_reset_handler(struct scsi_cmnd *cmd); int sas_eh_target_reset_handler(struct scsi_cmnd *cmd); extern void sas_target_destroy(struct scsi_target *); -extern int sas_slave_alloc(struct scsi_device *); +extern int sas_sdev_init(struct scsi_device *); extern int sas_ioctl(struct scsi_device *sdev, unsigned int cmd, void __user *arg); extern int sas_drain_work(struct sas_ha_struct *ha); @@ -751,7 +751,7 @@ void sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event, #define LIBSAS_SHT_BASE _LIBSAS_SHT_BASE \ .device_configure = sas_device_configure, \ - .slave_alloc = sas_slave_alloc, \ + .sdev_init = sas_sdev_init, \ #define LIBSAS_SHT_BASE_NO_SLAVE_INIT _LIBSAS_SHT_BASE diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 9c540f5468eb..7acd0ec82bb0 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -155,7 +155,7 @@ struct scsi_device { blist_flags_t sdev_bflags; /* black/white flags as also found in * scsi_devinfo.[hc]. For now used only to - * pass settings from slave_alloc to scsi + * pass settings from sdev_init to scsi * core. */ unsigned int eh_timeout; /* Error handling timeout */ @@ -357,7 +357,7 @@ struct scsi_target { atomic_t target_blocked; /* - * LLDs should set this in the slave_alloc host template callout. + * LLDs should set this in the sdev_init host template callout. * If set to zero then there is not limit. */ unsigned int can_queue; diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 2b4ab0369ffb..58b29cc6de09 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -168,20 +168,20 @@ struct scsi_host_template { * Return values: 0 on success, non-0 on failure * * Deallocation: If we didn't find any devices at this ID, you will - * get an immediate call to slave_destroy(). If we find something + * get an immediate call to sdev_destroy(). If we find something * here then you will get a call to slave_configure(), then the * device will be used for however long it is kept around, then when * the device is removed from the system (or * possibly at reboot - * time), you will then get a call to slave_destroy(). This is - * assuming you implement slave_configure and slave_destroy. + * time), you will then get a call to sdev_destroy(). This is + * assuming you implement slave_configure and sdev_destroy. * However, if you allocate memory and hang it off the device struct, - * then you must implement the slave_destroy() routine at a minimum + * then you must implement the sdev_destroy() routine at a minimum * in order to avoid leaking memory * each time a device is tore down. * * Status: OPTIONAL */ - int (* slave_alloc)(struct scsi_device *); + int (* sdev_init)(struct scsi_device *); /* * Once the device has responded to an INQUIRY and we know the @@ -206,7 +206,7 @@ struct scsi_host_template { * specific setup basis... * 6. Return 0 on success, non-0 on error. The device will be marked * as offline on error so that no access will occur. If you return - * non-0, your slave_destroy routine will never get called for this + * non-0, your sdev_destroy routine will never get called for this * device, so don't leave any loose memory hanging around, clean * up after yourself before returning non-0 * @@ -223,11 +223,11 @@ struct scsi_host_template { * has ceased the mid layer calls this point so that the low level * driver may completely detach itself from the scsi device and vice * versa. The low level driver is responsible for freeing any memory - * it allocated in the slave_alloc or slave_configure calls. + * it allocated in the sdev_init or slave_configure calls. * * Status: OPTIONAL */ - void (* slave_destroy)(struct scsi_device *); + void (* sdev_destroy)(struct scsi_device *); /* * Before the mid layer attempts to scan for a new device attached -- cgit v1.2.3 From 47c2e30afcec52968e50db01f92dda7d373042cb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Oct 2024 11:07:54 -0700 Subject: scsi: Rename .device_configure() into .sdev_configure() Improve naming consistency with the .sdev_prep() and .sdev_destroy() methods by renaming .device_configure() into .sdev_configure(). Cc: Christoph Hellwig Acked-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241022180839.2712439-3-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/ata/ahci.h | 2 +- drivers/ata/libata-sata.c | 8 ++++---- drivers/ata/libata-scsi.c | 7 +++---- drivers/ata/pata_macio.c | 8 ++++---- drivers/ata/sata_mv.c | 2 +- drivers/ata/sata_nv.c | 24 ++++++++++++------------ drivers/ata/sata_sil24.c | 2 +- drivers/firewire/sbp2.c | 6 +++--- drivers/scsi/hisi_sas/hisi_sas.h | 3 +-- drivers/scsi/hisi_sas/hisi_sas_main.c | 7 +++---- drivers/scsi/hisi_sas/hisi_sas_v1_hw.c | 2 +- drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 2 +- drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 8 ++++---- drivers/scsi/hptiop.c | 6 +++--- drivers/scsi/ipr.c | 8 ++++---- drivers/scsi/iscsi_tcp.c | 6 +++--- drivers/scsi/libsas/sas_scsi_host.c | 7 +++---- drivers/scsi/megaraid/megaraid_sas_base.c | 6 +++--- drivers/scsi/mpi3mr/mpi3mr_os.c | 8 ++++---- drivers/scsi/mpt3sas/mpt3sas_scsih.c | 8 ++++---- drivers/scsi/pmcraid.c | 8 ++++---- drivers/scsi/scsi_scan.c | 10 +++++----- drivers/ufs/core/ufshcd.c | 8 ++++---- drivers/usb/storage/scsiglue.c | 4 ++-- drivers/usb/storage/uas.c | 6 +++--- include/linux/libata.h | 11 +++++------ include/scsi/libsas.h | 5 ++--- include/scsi/scsi_host.h | 4 ++-- 28 files changed, 90 insertions(+), 96 deletions(-) (limited to 'include') diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index 8f40f75ba08c..75cdf51a7f74 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -397,7 +397,7 @@ extern const struct attribute_group *ahci_sdev_groups[]; .sdev_groups = ahci_sdev_groups, \ .change_queue_depth = ata_scsi_change_queue_depth, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .sdev_configure = ata_scsi_sdev_configure extern struct ata_port_operations ahci_ops; extern struct ata_port_operations ahci_platform_ops; diff --git a/drivers/ata/libata-sata.c b/drivers/ata/libata-sata.c index 9c76fb1ad2ec..ba300cc0a3a3 100644 --- a/drivers/ata/libata-sata.c +++ b/drivers/ata/libata-sata.c @@ -1313,7 +1313,7 @@ int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth) EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth); /** - * ata_sas_device_configure - Default device_configure routine for libata + * ata_sas_sdev_configure - Default sdev_configure routine for libata * devices * @sdev: SCSI device to configure * @lim: queue limits @@ -1323,14 +1323,14 @@ EXPORT_SYMBOL_GPL(ata_scsi_change_queue_depth); * Zero. */ -int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, - struct ata_port *ap) +int ata_sas_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim, + struct ata_port *ap) { ata_scsi_sdev_config(sdev); return ata_scsi_dev_config(sdev, lim, ap->link.device); } -EXPORT_SYMBOL_GPL(ata_sas_device_configure); +EXPORT_SYMBOL_GPL(ata_sas_sdev_configure); /** * ata_sas_queuecmd - Issue SCSI cdb to libata-managed device diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 7bfb57b4e659..2796c0da8257 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -1169,7 +1169,7 @@ int ata_scsi_sdev_init(struct scsi_device *sdev) EXPORT_SYMBOL_GPL(ata_scsi_sdev_init); /** - * ata_scsi_device_configure - Set SCSI device attributes + * ata_scsi_sdev_configure - Set SCSI device attributes * @sdev: SCSI device to examine * @lim: queue limits * @@ -1181,8 +1181,7 @@ EXPORT_SYMBOL_GPL(ata_scsi_sdev_init); * Defined by SCSI layer. We don't really care. */ -int ata_scsi_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +int ata_scsi_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = __ata_scsi_find_dev(ap, sdev); @@ -1192,7 +1191,7 @@ int ata_scsi_device_configure(struct scsi_device *sdev, return 0; } -EXPORT_SYMBOL_GPL(ata_scsi_device_configure); +EXPORT_SYMBOL_GPL(ata_scsi_sdev_configure); /** * ata_scsi_sdev_destroy - SCSI device is about to be destroyed diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index f2f36e55a1f4..a8e2989d0469 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -812,8 +812,8 @@ static void pata_macio_reset_hw(struct pata_macio_priv *priv, int resume) /* Hook the standard slave config to fixup some HW related alignment * restrictions */ -static int pata_macio_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int pata_macio_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct pata_macio_priv *priv = ap->private_data; @@ -822,7 +822,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev, int rc; /* First call original */ - rc = ata_scsi_device_configure(sdev, lim); + rc = ata_scsi_sdev_configure(sdev, lim); if (rc) return rc; @@ -932,7 +932,7 @@ static const struct scsi_host_template pata_macio_sht = { /* We may not need that strict one */ .dma_boundary = ATA_DMA_BOUNDARY, .max_segment_size = PATA_MACIO_MAX_SEGMENT_SIZE, - .device_configure = pata_macio_device_configure, + .sdev_configure = pata_macio_sdev_configure, .sdev_groups = ata_common_sdev_groups, .can_queue = ATA_DEF_QUEUE, .tag_alloc_policy = BLK_TAG_ALLOC_RR, diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index b8f363370e1a..caf7b1303d4c 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -673,7 +673,7 @@ static const struct scsi_host_template mv6_sht = { .sdev_groups = ata_ncq_sdev_groups, .change_queue_depth = ata_scsi_change_queue_depth, .tag_alloc_policy = BLK_TAG_ALLOC_RR, - .device_configure = ata_scsi_device_configure + .sdev_configure = ata_scsi_sdev_configure }; static struct ata_port_operations mv5_ops = { diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c index 36d99043ef50..b62b8ebdd89f 100644 --- a/drivers/ata/sata_nv.c +++ b/drivers/ata/sata_nv.c @@ -296,8 +296,8 @@ static void nv_nf2_freeze(struct ata_port *ap); static void nv_nf2_thaw(struct ata_port *ap); static void nv_ck804_freeze(struct ata_port *ap); static void nv_ck804_thaw(struct ata_port *ap); -static int nv_adma_device_configure(struct scsi_device *sdev, - struct queue_limits *lim); +static int nv_adma_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim); static int nv_adma_check_atapi_dma(struct ata_queued_cmd *qc); static enum ata_completion_errors nv_adma_qc_prep(struct ata_queued_cmd *qc); static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc); @@ -319,8 +319,8 @@ static void nv_adma_tf_read(struct ata_port *ap, struct ata_taskfile *tf); static void nv_mcp55_thaw(struct ata_port *ap); static void nv_mcp55_freeze(struct ata_port *ap); static void nv_swncq_error_handler(struct ata_port *ap); -static int nv_swncq_device_configure(struct scsi_device *sdev, - struct queue_limits *lim); +static int nv_swncq_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim); static int nv_swncq_port_start(struct ata_port *ap); static enum ata_completion_errors nv_swncq_qc_prep(struct ata_queued_cmd *qc); static void nv_swncq_fill_sg(struct ata_queued_cmd *qc); @@ -382,7 +382,7 @@ static const struct scsi_host_template nv_adma_sht = { .can_queue = NV_ADMA_MAX_CPBS, .sg_tablesize = NV_ADMA_SGTBL_TOTAL_LEN, .dma_boundary = NV_ADMA_DMA_BOUNDARY, - .device_configure = nv_adma_device_configure, + .sdev_configure = nv_adma_sdev_configure, .sdev_groups = ata_ncq_sdev_groups, .change_queue_depth = ata_scsi_change_queue_depth, .tag_alloc_policy = BLK_TAG_ALLOC_RR, @@ -393,7 +393,7 @@ static const struct scsi_host_template nv_swncq_sht = { .can_queue = ATA_MAX_QUEUE - 1, .sg_tablesize = LIBATA_MAX_PRD, .dma_boundary = ATA_DMA_BOUNDARY, - .device_configure = nv_swncq_device_configure, + .sdev_configure = nv_swncq_sdev_configure, .sdev_groups = ata_ncq_sdev_groups, .change_queue_depth = ata_scsi_change_queue_depth, .tag_alloc_policy = BLK_TAG_ALLOC_RR, @@ -663,8 +663,8 @@ static void nv_adma_mode(struct ata_port *ap) pp->flags &= ~NV_ADMA_PORT_REGISTER_MODE; } -static int nv_adma_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int nv_adma_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct nv_adma_port_priv *pp = ap->private_data; @@ -676,7 +676,7 @@ static int nv_adma_device_configure(struct scsi_device *sdev, int adma_enable; u32 current_reg, new_reg, config_mask; - rc = ata_scsi_device_configure(sdev, lim); + rc = ata_scsi_sdev_configure(sdev, lim); if (sdev->id >= ATA_MAX_DEVICES || sdev->channel || sdev->lun) /* Not a proper libata device, ignore */ @@ -1871,8 +1871,8 @@ static void nv_swncq_host_init(struct ata_host *host) writel(~0x0, mmio + NV_INT_STATUS_MCP55); } -static int nv_swncq_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int nv_swncq_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct ata_port *ap = ata_shost_to_port(sdev->host); struct pci_dev *pdev = to_pci_dev(ap->host->dev); @@ -1882,7 +1882,7 @@ static int nv_swncq_device_configure(struct scsi_device *sdev, u8 check_maxtor = 0; unsigned char model_num[ATA_ID_PROD_LEN + 1]; - rc = ata_scsi_device_configure(sdev, lim); + rc = ata_scsi_sdev_configure(sdev, lim); if (sdev->id >= ATA_MAX_DEVICES || sdev->channel || sdev->lun) /* Not a proper libata device, ignore */ return rc; diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c index 72c03cbdaff4..3e0be0399619 100644 --- a/drivers/ata/sata_sil24.c +++ b/drivers/ata/sata_sil24.c @@ -381,7 +381,7 @@ static const struct scsi_host_template sil24_sht = { .tag_alloc_policy = BLK_TAG_ALLOC_FIFO, .sdev_groups = ata_ncq_sdev_groups, .change_queue_depth = ata_scsi_change_queue_depth, - .device_configure = ata_scsi_device_configure + .sdev_configure = ata_scsi_sdev_configure }; static struct ata_port_operations sil24_ops = { diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 4937df12c3fb..1a19828114cf 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1506,8 +1506,8 @@ static int sbp2_scsi_sdev_init(struct scsi_device *sdev) return 0; } -static int sbp2_scsi_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int sbp2_scsi_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct sbp2_logical_unit *lu = sdev->hostdata; @@ -1591,7 +1591,7 @@ static const struct scsi_host_template scsi_driver_template = { .proc_name = "sbp2", .queuecommand = sbp2_scsi_queuecommand, .sdev_init = sbp2_scsi_sdev_init, - .device_configure = sbp2_scsi_device_configure, + .sdev_configure = sbp2_scsi_sdev_configure, .eh_abort_handler = sbp2_scsi_abort, .this_id = -1, .sg_tablesize = SG_ALL, diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h index fee3cadfde1e..45ea313b342b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas.h +++ b/drivers/scsi/hisi_sas/hisi_sas.h @@ -644,8 +644,7 @@ extern int hisi_sas_probe(struct platform_device *pdev, const struct hisi_sas_hw *ops); extern void hisi_sas_remove(struct platform_device *pdev); -int hisi_sas_device_configure(struct scsi_device *sdev, - struct queue_limits *lim); +int hisi_sas_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim); extern int hisi_sas_sdev_init(struct scsi_device *sdev); extern int hisi_sas_scan_finished(struct Scsi_Host *shost, unsigned long time); extern void hisi_sas_scan_start(struct Scsi_Host *shost); diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index a80d8b2a9941..da4a2ed8ee86 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -868,11 +868,10 @@ err_out: return rc; } -int hisi_sas_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +int hisi_sas_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) { struct domain_device *dev = sdev_to_domain_dev(sdev); - int ret = sas_device_configure(sdev, lim); + int ret = sas_sdev_configure(sdev, lim); if (ret) return ret; @@ -881,7 +880,7 @@ int hisi_sas_device_configure(struct scsi_device *sdev, return 0; } -EXPORT_SYMBOL_GPL(hisi_sas_device_configure); +EXPORT_SYMBOL_GPL(hisi_sas_sdev_configure); void hisi_sas_scan_start(struct Scsi_Host *shost) { diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 098a9f07d143..bb78e53c66e2 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1753,7 +1753,7 @@ static int check_fw_info_v1_hw(struct hisi_hba *hisi_hba) static const struct scsi_host_template sht_v1_hw = { LIBSAS_SHT_BASE_NO_SLAVE_INIT - .device_configure = hisi_sas_device_configure, + .sdev_configure = hisi_sas_sdev_configure, .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index e16868801159..71cd5b4450c2 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -3585,7 +3585,7 @@ static int check_fw_info_v2_hw(struct hisi_hba *hisi_hba) static const struct scsi_host_template sht_v2_hw = { LIBSAS_SHT_BASE_NO_SLAVE_INIT - .device_configure = hisi_sas_device_configure, + .sdev_configure = hisi_sas_sdev_configure, .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, .sg_tablesize = HISI_SAS_SGE_PAGE_CNT, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 5758388508ec..6766995e227b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -2908,12 +2908,12 @@ static ssize_t iopoll_q_cnt_v3_hw_show(struct device *dev, } static DEVICE_ATTR_RO(iopoll_q_cnt_v3_hw); -static int device_configure_v3_hw(struct scsi_device *sdev, - struct queue_limits *lim) +static int sdev_configure_v3_hw(struct scsi_device *sdev, + struct queue_limits *lim) { struct Scsi_Host *shost = dev_to_shost(&sdev->sdev_gendev); struct hisi_hba *hisi_hba = shost_priv(shost); - int ret = hisi_sas_device_configure(sdev, lim); + int ret = hisi_sas_sdev_configure(sdev, lim); struct device *dev = hisi_hba->dev; if (ret) @@ -3336,7 +3336,7 @@ static void hisi_sas_map_queues(struct Scsi_Host *shost) static const struct scsi_host_template sht_v3_hw = { LIBSAS_SHT_BASE_NO_SLAVE_INIT - .device_configure = device_configure_v3_hw, + .sdev_configure = sdev_configure_v3_hw, .scan_finished = hisi_sas_scan_finished, .scan_start = hisi_sas_scan_start, .map_queues = hisi_sas_map_queues, diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c index e889f268601b..55eee5b0f0d7 100644 --- a/drivers/scsi/hptiop.c +++ b/drivers/scsi/hptiop.c @@ -1151,8 +1151,8 @@ static struct attribute *hptiop_host_attrs[] = { ATTRIBUTE_GROUPS(hptiop_host); -static int hptiop_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int hptiop_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { if (sdev->type == TYPE_TAPE) lim->max_hw_sectors = 8192; @@ -1168,7 +1168,7 @@ static const struct scsi_host_template driver_template = { .emulated = 0, .proc_name = driver_name, .shost_groups = hptiop_host_groups, - .device_configure = hptiop_device_configure, + .sdev_configure = hptiop_sdev_configure, .this_id = -1, .change_queue_depth = hptiop_adjust_disk_queue_depth, .cmd_size = sizeof(struct hpt_cmd_priv), diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 41bc54e27896..f0444faf776a 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4769,7 +4769,7 @@ static void ipr_sdev_destroy(struct scsi_device *sdev) } /** - * ipr_device_configure - Configure a SCSI device + * ipr_sdev_configure - Configure a SCSI device * @sdev: scsi device struct * @lim: queue limits * @@ -4778,8 +4778,8 @@ static void ipr_sdev_destroy(struct scsi_device *sdev) * Return value: * 0 on success **/ -static int ipr_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int ipr_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *) sdev->host->hostdata; struct ipr_resource_entry *res; @@ -6399,7 +6399,7 @@ static const struct scsi_host_template driver_template = { .eh_device_reset_handler = ipr_eh_dev_reset, .eh_host_reset_handler = ipr_eh_host_reset, .sdev_init = ipr_sdev_init, - .device_configure = ipr_device_configure, + .sdev_configure = ipr_sdev_configure, .sdev_destroy = ipr_sdev_destroy, .scan_finished = ipr_scan_finished, .target_destroy = ipr_target_destroy, diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index c708e1059638..e81f60985193 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -1057,8 +1057,8 @@ static umode_t iscsi_sw_tcp_attr_is_visible(int param_type, int param) return 0; } -static int iscsi_sw_tcp_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int iscsi_sw_tcp_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct iscsi_sw_tcp_host *tcp_sw_host = iscsi_host_priv(sdev->host); struct iscsi_session *session = tcp_sw_host->session; @@ -1083,7 +1083,7 @@ static const struct scsi_host_template iscsi_sw_tcp_sht = { .eh_device_reset_handler= iscsi_eh_device_reset, .eh_target_reset_handler = iscsi_eh_recover_target, .dma_boundary = PAGE_SIZE - 1, - .device_configure = iscsi_sw_tcp_device_configure, + .sdev_configure = iscsi_sw_tcp_sdev_configure, .proc_name = "iscsi_tcp", .this_id = -1, .track_queue_depth = 1, diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index e7d5093f7c5d..55ce7892f217 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -804,15 +804,14 @@ EXPORT_SYMBOL_GPL(sas_target_alloc); #define SAS_DEF_QD 256 -int sas_device_configure(struct scsi_device *scsi_dev, - struct queue_limits *lim) +int sas_sdev_configure(struct scsi_device *scsi_dev, struct queue_limits *lim) { struct domain_device *dev = sdev_to_domain_dev(scsi_dev); BUG_ON(dev->rphy->identify.device_type != SAS_END_DEVICE); if (dev_is_sata(dev)) { - ata_sas_device_configure(scsi_dev, lim, dev->sata_dev.ap); + ata_sas_sdev_configure(scsi_dev, lim, dev->sata_dev.ap); return 0; } @@ -830,7 +829,7 @@ int sas_device_configure(struct scsi_device *scsi_dev, return 0; } -EXPORT_SYMBOL_GPL(sas_device_configure); +EXPORT_SYMBOL_GPL(sas_sdev_configure); int sas_change_queue_depth(struct scsi_device *sdev, int depth) { diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 7bdd28af62ea..9739f389b1d0 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -2068,8 +2068,8 @@ static void megasas_set_static_target_properties(struct scsi_device *sdev, } -static int megasas_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int megasas_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { u16 pd_index = 0; struct megasas_instance *instance; @@ -3510,7 +3510,7 @@ static const struct scsi_host_template megasas_template = { .module = THIS_MODULE, .name = "Avago SAS based MegaRAID driver", .proc_name = "megaraid_sas", - .device_configure = megasas_device_configure, + .sdev_configure = megasas_sdev_configure, .sdev_init = megasas_sdev_init, .sdev_destroy = megasas_sdev_destroy, .queuecommand = megasas_queue_command, diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 6b9f453565f2..f84bdce8c65c 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -4552,7 +4552,7 @@ static void mpi3mr_target_destroy(struct scsi_target *starget) } /** - * mpi3mr_device_configure - Slave configure callback handler + * mpi3mr_sdev_configure - Slave configure callback handler * @sdev: SCSI device reference * @lim: queue limits * @@ -4561,8 +4561,8 @@ static void mpi3mr_target_destroy(struct scsi_target *starget) * * Return: 0 always. */ -static int mpi3mr_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int mpi3mr_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct scsi_target *starget; struct Scsi_Host *shost; @@ -5063,7 +5063,7 @@ static const struct scsi_host_template mpi3mr_driver_template = { .queuecommand = mpi3mr_qcmd, .target_alloc = mpi3mr_target_alloc, .sdev_init = mpi3mr_sdev_init, - .device_configure = mpi3mr_device_configure, + .sdev_configure = mpi3mr_sdev_configure, .target_destroy = mpi3mr_target_destroy, .sdev_destroy = mpi3mr_sdev_destroy, .scan_finished = mpi3mr_scan_finished, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index f8df6a83c9b7..5f566df24a34 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -2497,7 +2497,7 @@ _scsih_enable_tlr(struct MPT3SAS_ADAPTER *ioc, struct scsi_device *sdev) } /** - * scsih_device_configure - device configure routine. + * scsih_sdev_configure - device configure routine. * @sdev: scsi device struct * @lim: queue limits * @@ -2505,7 +2505,7 @@ _scsih_enable_tlr(struct MPT3SAS_ADAPTER *ioc, struct scsi_device *sdev) * the device is ignored. */ static int -scsih_device_configure(struct scsi_device *sdev, struct queue_limits *lim) +scsih_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) { struct Scsi_Host *shost = sdev->host; struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); @@ -11906,7 +11906,7 @@ static const struct scsi_host_template mpt2sas_driver_template = { .queuecommand = scsih_qcmd, .target_alloc = scsih_target_alloc, .sdev_init = scsih_sdev_init, - .device_configure = scsih_device_configure, + .sdev_configure = scsih_sdev_configure, .target_destroy = scsih_target_destroy, .sdev_destroy = scsih_sdev_destroy, .scan_finished = scsih_scan_finished, @@ -11944,7 +11944,7 @@ static const struct scsi_host_template mpt3sas_driver_template = { .queuecommand = scsih_qcmd, .target_alloc = scsih_target_alloc, .sdev_init = scsih_sdev_init, - .device_configure = scsih_device_configure, + .sdev_configure = scsih_sdev_configure, .target_destroy = scsih_target_destroy, .sdev_destroy = scsih_sdev_destroy, .scan_finished = scsih_scan_finished, diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index d9e6e75e801c..2342ded7ee78 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -197,7 +197,7 @@ static int pmcraid_sdev_init(struct scsi_device *scsi_dev) } /** - * pmcraid_device_configure - Configures a SCSI device + * pmcraid_sdev_configure - Configures a SCSI device * @scsi_dev: scsi device struct * @lim: queue limits * @@ -210,8 +210,8 @@ static int pmcraid_sdev_init(struct scsi_device *scsi_dev) * Return value: * 0 on success */ -static int pmcraid_device_configure(struct scsi_device *scsi_dev, - struct queue_limits *lim) +static int pmcraid_sdev_configure(struct scsi_device *scsi_dev, + struct queue_limits *lim) { struct pmcraid_resource_entry *res = scsi_dev->hostdata; @@ -3669,7 +3669,7 @@ static const struct scsi_host_template pmcraid_host_template = { .eh_host_reset_handler = pmcraid_eh_host_reset_handler, .sdev_init = pmcraid_sdev_init, - .device_configure = pmcraid_device_configure, + .sdev_configure = pmcraid_sdev_configure, .sdev_destroy = pmcraid_sdev_destroy, .change_queue_depth = pmcraid_change_queue_depth, .can_queue = PMCRAID_MAX_IO_CMD, diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index f981098ad6bc..b8419561058f 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -227,7 +227,7 @@ static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, /* * realloc if new shift is calculated, which is caused by setting - * up one new default queue depth after calling ->device_configure + * up one new default queue depth after calling ->sdev_configure */ if (!need_alloc && new_shift != sdev->budget_map.shift) need_alloc = need_free = true; @@ -1074,8 +1074,8 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, else if (*bflags & BLIST_MAX_1024) lim.max_hw_sectors = 1024; - if (hostt->device_configure) - ret = hostt->device_configure(sdev, &lim); + if (hostt->sdev_configure) + ret = hostt->sdev_configure(sdev, &lim); else if (hostt->slave_configure) ret = hostt->slave_configure(sdev); if (ret) { @@ -1097,12 +1097,12 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, } /* - * The queue_depth is often changed in ->device_configure. + * The queue_depth is often changed in ->sdev_configure. * * Set up budget map again since memory consumption of the map depends * on actual queue depth. */ - if (hostt->device_configure || hostt->slave_configure) + if (hostt->sdev_configure || hostt->slave_configure) scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth); if (sdev->scsi_level >= SCSI_3) diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 92b3abd6a963..661f9811ea4b 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -5206,14 +5206,14 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth) } /** - * ufshcd_device_configure - adjust SCSI device configurations + * ufshcd_sdev_configure - adjust SCSI device configurations * @sdev: pointer to SCSI device * @lim: queue limits * * Return: 0 (success). */ -static int ufshcd_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int ufshcd_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct ufs_hba *hba = shost_priv(sdev->host); struct request_queue *q = sdev->request_queue; @@ -8931,7 +8931,7 @@ static const struct scsi_host_template ufshcd_driver_template = { .queuecommand = ufshcd_queuecommand, .mq_poll = ufshcd_poll, .sdev_init = ufshcd_sdev_init, - .device_configure = ufshcd_device_configure, + .sdev_configure = ufshcd_sdev_configure, .sdev_destroy = ufshcd_sdev_destroy, .change_queue_depth = ufshcd_change_queue_depth, .eh_abort_handler = ufshcd_abort, diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c index a64ce2aca9ec..7e2ee926200d 100644 --- a/drivers/usb/storage/scsiglue.c +++ b/drivers/usb/storage/scsiglue.c @@ -88,7 +88,7 @@ static int sdev_init (struct scsi_device *sdev) return 0; } -static int device_configure(struct scsi_device *sdev, struct queue_limits *lim) +static int sdev_configure(struct scsi_device *sdev, struct queue_limits *lim) { struct us_data *us = host_to_us(sdev->host); struct device *dev = us->pusb_dev->bus->sysdev; @@ -638,7 +638,7 @@ static const struct scsi_host_template usb_stor_host_template = { .this_id = -1, .sdev_init = sdev_init, - .device_configure = device_configure, + .sdev_configure = sdev_configure, .target_alloc = target_alloc, /* lots of sg segments can be handled */ diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c index 7bd2a314b329..0bb9ea875843 100644 --- a/drivers/usb/storage/uas.c +++ b/drivers/usb/storage/uas.c @@ -832,8 +832,8 @@ static int uas_sdev_init(struct scsi_device *sdev) return 0; } -static int uas_device_configure(struct scsi_device *sdev, - struct queue_limits *lim) +static int uas_sdev_configure(struct scsi_device *sdev, + struct queue_limits *lim) { struct uas_dev_info *devinfo = sdev->hostdata; @@ -906,7 +906,7 @@ static const struct scsi_host_template uas_host_template = { .queuecommand = uas_queuecommand, .target_alloc = uas_target_alloc, .sdev_init = uas_sdev_init, - .device_configure = uas_device_configure, + .sdev_configure = uas_sdev_configure, .eh_abort_handler = uas_eh_abort_handler, .eh_device_reset_handler = uas_eh_device_reset_handler, .this_id = -1, diff --git a/include/linux/libata.h b/include/linux/libata.h index fb5e13b8d89f..7717f06a548d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1200,8 +1200,7 @@ extern int ata_std_bios_param(struct scsi_device *sdev, sector_t capacity, int geom[]); extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev); extern int ata_scsi_sdev_init(struct scsi_device *sdev); -int ata_scsi_device_configure(struct scsi_device *sdev, - struct queue_limits *lim); +int ata_scsi_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim); extern void ata_scsi_sdev_destroy(struct scsi_device *sdev); extern int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth); @@ -1301,8 +1300,8 @@ extern struct ata_port *ata_port_alloc(struct ata_host *host); extern void ata_port_free(struct ata_port *ap); extern int ata_tport_add(struct device *parent, struct ata_port *ap); extern void ata_tport_delete(struct ata_port *ap); -int ata_sas_device_configure(struct scsi_device *sdev, struct queue_limits *lim, - struct ata_port *ap); +int ata_sas_sdev_configure(struct scsi_device *sdev, struct queue_limits *lim, + struct ata_port *ap); extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap); extern void ata_tf_to_fis(const struct ata_taskfile *tf, u8 pmp, int is_cmd, u8 *fis); @@ -1468,13 +1467,13 @@ extern const struct attribute_group *ata_common_sdev_groups[]; __ATA_BASE_SHT(drv_name), \ .can_queue = ATA_DEF_QUEUE, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .sdev_configure = ata_scsi_sdev_configure #define ATA_SUBBASE_SHT_QD(drv_name, drv_qd) \ __ATA_BASE_SHT(drv_name), \ .can_queue = drv_qd, \ .tag_alloc_policy = BLK_TAG_ALLOC_RR, \ - .device_configure = ata_scsi_device_configure + .sdev_configure = ata_scsi_sdev_configure #define ATA_BASE_SHT(drv_name) \ ATA_SUBBASE_SHT(drv_name), \ diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index f30f716ba045..ba460b6c0374 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -683,8 +683,7 @@ int sas_phy_reset(struct sas_phy *phy, int hard_reset); int sas_phy_enable(struct sas_phy *phy, int enable); extern int sas_queuecommand(struct Scsi_Host *, struct scsi_cmnd *); extern int sas_target_alloc(struct scsi_target *); -int sas_device_configure(struct scsi_device *dev, - struct queue_limits *lim); +int sas_sdev_configure(struct scsi_device *dev, struct queue_limits *lim); extern int sas_change_queue_depth(struct scsi_device *, int new_depth); extern int sas_bios_param(struct scsi_device *, struct block_device *, sector_t capacity, int *hsc); @@ -750,7 +749,7 @@ void sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event, #endif #define LIBSAS_SHT_BASE _LIBSAS_SHT_BASE \ - .device_configure = sas_device_configure, \ + .sdev_configure = sas_sdev_configure, \ .sdev_init = sas_sdev_init, \ #define LIBSAS_SHT_BASE_NO_SLAVE_INIT _LIBSAS_SHT_BASE diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 58b29cc6de09..7e1b1a54e46a 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -212,10 +212,10 @@ struct scsi_host_template { * * Status: OPTIONAL * - * Note: slave_configure is the legacy version, use device_configure for + * Note: slave_configure is the legacy version, use sdev_configure for * all new code. A driver must never define both. */ - int (* device_configure)(struct scsi_device *, struct queue_limits *lim); + int (* sdev_configure)(struct scsi_device *, struct queue_limits *lim); int (* slave_configure)(struct scsi_device *); /* -- cgit v1.2.3 From 0f98212d96a2af52e4091a199ef1d35d478d0c60 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 22 Oct 2024 11:07:56 -0700 Subject: scsi: core: Remove the .slave_configure() method Now that all SCSI drivers have been converted from .slave_configure() to .sdev_configure(), remove support for .slave_configure() from the SCSI core. Reviewed-by: Damien Le Moal Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20241022180839.2712439-5-bvanassche@acm.org Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_scan.c | 4 +--- include/scsi/scsi_host.h | 10 +++------- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index faf3dd72e21e..e770480fec5c 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1076,8 +1076,6 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (hostt->sdev_configure) ret = hostt->sdev_configure(sdev, &lim); - else if (hostt->slave_configure) - ret = hostt->slave_configure(sdev); if (ret) { queue_limits_cancel_update(sdev->request_queue); /* @@ -1102,7 +1100,7 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, * Set up budget map again since memory consumption of the map depends * on actual queue depth. */ - if (hostt->sdev_configure || hostt->slave_configure) + if (hostt->sdev_configure) scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth); if (sdev->scsi_level >= SCSI_3) diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 7e1b1a54e46a..8fccfd27393e 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -169,11 +169,11 @@ struct scsi_host_template { * * Deallocation: If we didn't find any devices at this ID, you will * get an immediate call to sdev_destroy(). If we find something - * here then you will get a call to slave_configure(), then the + * here then you will get a call to sdev_configure(), then the * device will be used for however long it is kept around, then when * the device is removed from the system (or * possibly at reboot * time), you will then get a call to sdev_destroy(). This is - * assuming you implement slave_configure and sdev_destroy. + * assuming you implement sdev_configure and sdev_destroy. * However, if you allocate memory and hang it off the device struct, * then you must implement the sdev_destroy() routine at a minimum * in order to avoid leaking memory @@ -211,19 +211,15 @@ struct scsi_host_template { * up after yourself before returning non-0 * * Status: OPTIONAL - * - * Note: slave_configure is the legacy version, use sdev_configure for - * all new code. A driver must never define both. */ int (* sdev_configure)(struct scsi_device *, struct queue_limits *lim); - int (* slave_configure)(struct scsi_device *); /* * Immediately prior to deallocating the device and after all activity * has ceased the mid layer calls this point so that the low level * driver may completely detach itself from the scsi device and vice * versa. The low level driver is responsible for freeing any memory - * it allocated in the sdev_init or slave_configure calls. + * it allocated in the sdev_init or sdev_configure calls. * * Status: OPTIONAL */ -- cgit v1.2.3 From d48da4d5ed7b4a022a4e54f210575baac71f58af Mon Sep 17 00:00:00 2001 From: Jordan Rome Date: Wed, 4 Dec 2024 07:59:11 -0800 Subject: security: add trace event for cap_capable In cases where we want a stable way to observe/trace cap_capable (e.g. protection from inlining and API updates) add a tracepoint that passes: - The credentials used - The user namespace of the resource being accessed - The user namespace in which the credential provides the capability to access the targeted resource - The capability to check for - The return value of the check Signed-off-by: Jordan Rome Acked-by: Andrii Nakryiko Reviewed-by: Paul Moore Reviewed-by: Serge Hallyn Link: https://lore.kernel.org/r/20241204155911.1817092-1-linux@jordanrome.com Signed-off-by: Serge Hallyn --- MAINTAINERS | 1 + include/trace/events/capability.h | 57 +++++++++++++++++++++++++++++++++++++++ security/commoncap.c | 54 ++++++++++++++++++++++++++++--------- 3 files changed, 99 insertions(+), 13 deletions(-) create mode 100644 include/trace/events/capability.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 1e930c7a58b1..33fde7f660d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5147,6 +5147,7 @@ M: Serge Hallyn L: linux-security-module@vger.kernel.org S: Supported F: include/linux/capability.h +F: include/trace/events/capability.h F: include/uapi/linux/capability.h F: kernel/capability.c F: security/commoncap.c diff --git a/include/trace/events/capability.h b/include/trace/events/capability.h new file mode 100644 index 000000000000..17340257946c --- /dev/null +++ b/include/trace/events/capability.h @@ -0,0 +1,57 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM capability + +#if !defined(_TRACE_CAPABILITY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_CAPABILITY_H + +#include +#include +#include + +/** + * cap_capable - called after it's determined if a task has a particular + * effective capability + * + * @cred: The credentials used + * @target_ns: The user namespace of the resource being accessed + * @capable_ns: The user namespace in which the credential provides the + * capability to access the targeted resource. + * This will be NULL if ret is not 0. + * @cap: The capability to check for + * @ret: The return value of the check: 0 if it does, -ve if it does not + * + * Allows to trace calls to cap_capable in commoncap.c + */ +TRACE_EVENT(cap_capable, + + TP_PROTO(const struct cred *cred, struct user_namespace *target_ns, + const struct user_namespace *capable_ns, int cap, int ret), + + TP_ARGS(cred, target_ns, capable_ns, cap, ret), + + TP_STRUCT__entry( + __field(const struct cred *, cred) + __field(struct user_namespace *, target_ns) + __field(const struct user_namespace *, capable_ns) + __field(int, cap) + __field(int, ret) + ), + + TP_fast_assign( + __entry->cred = cred; + __entry->target_ns = target_ns; + __entry->capable_ns = ret == 0 ? capable_ns : NULL; + __entry->cap = cap; + __entry->ret = ret; + ), + + TP_printk("cred %p, target_ns %p, capable_ns %p, cap %d, ret %d", + __entry->cred, __entry->target_ns, __entry->capable_ns, __entry->cap, + __entry->ret) +); + +#endif /* _TRACE_CAPABILITY_H */ + +/* This part must be outside protection */ +#include diff --git a/security/commoncap.c b/security/commoncap.c index 3d103069903b..bdd7603fabb8 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -27,6 +27,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + /* * If a non-root user executes a setuid-root binary in * !secure(SECURE_NOROOT) mode, then we raise capabilities. @@ -50,24 +53,24 @@ static void warn_setuid_and_fcaps_mixed(const char *fname) } /** - * cap_capable - Determine whether a task has a particular effective capability + * cap_capable_helper - Determine whether a task has a particular effective + * capability. * @cred: The credentials to use - * @targ_ns: The user namespace in which we need the capability + * @target_ns: The user namespace of the resource being accessed + * @cred_ns: The user namespace of the credentials * @cap: The capability to check for - * @opts: Bitmask of options defined in include/linux/security.h * * Determine whether the nominated task has the specified capability amongst * its effective set, returning 0 if it does, -ve if it does not. * - * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable() - * and has_capability() functions. That is, it has the reverse semantics: - * cap_has_capability() returns 0 when a task has a capability, but the - * kernel's capable() and has_capability() returns 1 for this case. + * See cap_capable for more details. */ -int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, - int cap, unsigned int opts) +static inline int cap_capable_helper(const struct cred *cred, + struct user_namespace *target_ns, + const struct user_namespace *cred_ns, + int cap) { - struct user_namespace *ns = targ_ns; + struct user_namespace *ns = target_ns; /* See if cred has the capability in the target user namespace * by examining the target user namespace and all of the target @@ -75,21 +78,21 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, */ for (;;) { /* Do we have the necessary capabilities? */ - if (ns == cred->user_ns) + if (likely(ns == cred_ns)) return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM; /* * If we're already at a lower level than we're looking for, * we're done searching. */ - if (ns->level <= cred->user_ns->level) + if (ns->level <= cred_ns->level) return -EPERM; /* * The owner of the user namespace in the parent of the * user namespace has all caps. */ - if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid)) + if ((ns->parent == cred_ns) && uid_eq(ns->owner, cred->euid)) return 0; /* @@ -102,6 +105,31 @@ int cap_capable(const struct cred *cred, struct user_namespace *targ_ns, /* We never get here */ } +/** + * cap_capable - Determine whether a task has a particular effective capability + * @cred: The credentials to use + * @target_ns: The user namespace of the resource being accessed + * @cap: The capability to check for + * @opts: Bitmask of options defined in include/linux/security.h (unused) + * + * Determine whether the nominated task has the specified capability amongst + * its effective set, returning 0 if it does, -ve if it does not. + * + * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable() + * and has_capability() functions. That is, it has the reverse semantics: + * cap_has_capability() returns 0 when a task has a capability, but the + * kernel's capable() and has_capability() returns 1 for this case. + */ +int cap_capable(const struct cred *cred, struct user_namespace *target_ns, + int cap, unsigned int opts) +{ + const struct user_namespace *cred_ns = cred->user_ns; + int ret = cap_capable_helper(cred, target_ns, cred_ns, cap); + + trace_cap_capable(cred, target_ns, cred_ns, cap, ret); + return ret; +} + /** * cap_settime - Determine whether the current process may set the system clock * @ts: The time to set -- cgit v1.2.3 From a61b19f4a6586590a9ae6baf2ac4a25a852e547f Mon Sep 17 00:00:00 2001 From: Maksym Kutsevol Date: Mon, 2 Dec 2024 11:55:07 -0800 Subject: netpoll: Make netpoll_send_udp return status instead of void netpoll_send_udp can return if send was successful. It will allow client code to be aware of the send status. Possible return values are the result of __netpoll_send_skb (cast to int) and -ENOMEM. This doesn't cover the case when TX was not successful instantaneously and was scheduled for later, __netpoll__send_skb returns success in that case. Signed-off-by: Maksym Kutsevol Link: https://patch.msgid.link/20241202-netcons-add-udp-send-fail-statistics-to-netconsole-v5-1-70e82239f922@kutsevol.com Signed-off-by: Jakub Kicinski --- include/linux/netpoll.h | 2 +- net/core/netpoll.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index b34301650c47..f91e50a76efd 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -57,7 +57,7 @@ static inline void netpoll_poll_disable(struct net_device *dev) { return; } static inline void netpoll_poll_enable(struct net_device *dev) { return; } #endif -void netpoll_send_udp(struct netpoll *np, const char *msg, int len); +int netpoll_send_udp(struct netpoll *np, const char *msg, int len); void netpoll_print_options(struct netpoll *np); int netpoll_parse_options(struct netpoll *np, char *opt); int __netpoll_setup(struct netpoll *np, struct net_device *ndev); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 99e5aa9cc992..6f2647b000b8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -390,7 +390,7 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) } EXPORT_SYMBOL(netpoll_send_skb); -void netpoll_send_udp(struct netpoll *np, const char *msg, int len) +int netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; @@ -414,7 +414,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb = find_skb(np, total_len + np->dev->needed_tailroom, total_len - len); if (!skb) - return; + return -ENOMEM; skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); @@ -490,7 +490,7 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) skb->dev = np->dev; - netpoll_send_skb(np, skb); + return (int)netpoll_send_skb(np, skb); } EXPORT_SYMBOL(netpoll_send_udp); -- cgit v1.2.3 From b4c7698dd95f253c6958d8c6ac219098009bf28a Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:02 +0000 Subject: net: phy: add phy_inband_caps() Add a method to query the PHY's in-band capabilities for a PHY interface mode. Where the interface mode does not have in-band capability, or the PHY driver has not been updated to return this information, then phy_inband_caps() should return zero. Otherwise, PHY drivers will return a value consisting of the following flags: LINK_INBAND_DISABLE indicates that the hardware does not support in-band signalling, or can have in-band signalling configured via software to be disabled. LINK_INBAND_ENABLE indicates that the hardware will use in-band signalling, or can have in-band signalling configured via software to be enabled. LINK_INBAND_BYPASS indicates that the hardware has the ability to bypass in-band signalling when enabled after a timeout if the link partner does not respond to its in-band signalling. This reports the PHY capabilities for the particular interface mode, not the current configuration. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUre-006ITz-KF@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 21 +++++++++++++++++++++ include/linux/phy.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 0d20b534122b..f42cd6584841 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1005,6 +1005,27 @@ static int phy_check_link_status(struct phy_device *phydev) return 0; } +/** + * phy_inband_caps - query which in-band signalling modes are supported + * @phydev: a pointer to a &struct phy_device + * @interface: the interface mode for the PHY + * + * Returns zero if it is unknown what in-band signalling is supported by the + * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise, + * returns a bit mask of the LINK_INBAND_* values from + * &enum link_inband_signalling to describe which inband modes are supported + * by the PHY for this interface mode. + */ +unsigned int phy_inband_caps(struct phy_device *phydev, + phy_interface_t interface) +{ + if (phydev->drv && phydev->drv->inband_caps) + return phydev->drv->inband_caps(phydev, interface); + + return 0; +} +EXPORT_SYMBOL_GPL(phy_inband_caps); + /** * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index 563c46205685..ccb93d892da9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -817,6 +817,24 @@ struct phy_tdr_config { }; #define PHY_PAIR_ALL -1 +/** + * enum link_inband_signalling - in-band signalling modes that are supported + * + * @LINK_INBAND_DISABLE: in-band signalling can be disabled + * @LINK_INBAND_ENABLE: in-band signalling can be enabled without bypass + * @LINK_INBAND_BYPASS: in-band signalling can be enabled with bypass + * + * The possible and required bits can only be used if the valid bit is set. + * If possible is clear, that means inband signalling can not be used. + * Required is only valid when possible is set, and means that inband + * signalling must be used. + */ +enum link_inband_signalling { + LINK_INBAND_DISABLE = BIT(0), + LINK_INBAND_ENABLE = BIT(1), + LINK_INBAND_BYPASS = BIT(2), +}; + /** * struct phy_plca_cfg - Configuration of the PLCA (Physical Layer Collision * Avoidance) Reconciliation Sublayer. @@ -956,6 +974,14 @@ struct phy_driver { */ int (*get_features)(struct phy_device *phydev); + /** + * @inband_caps: query whether in-band is supported for the given PHY + * interface mode. Returns a bitmask of bits defined by enum + * link_inband_signalling. + */ + unsigned int (*inband_caps)(struct phy_device *phydev, + phy_interface_t interface); + /** * @get_rate_matching: Get the supported type of rate matching for a * particular phy interface. This is used by phy consumers to determine @@ -1818,6 +1844,8 @@ int phy_config_aneg(struct phy_device *phydev); int _phy_start_aneg(struct phy_device *phydev); int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); +unsigned int phy_inband_caps(struct phy_device *phydev, + phy_interface_t interface); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); bool phy_check_valid(int speed, int duplex, unsigned long *features); -- cgit v1.2.3 From 5d58a890c02770ba8d790b1f3c6e8c0e20514dc2 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:18 +0000 Subject: net: phy: add phy_config_inband() Add a method to configure the PHY's in-band mode. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUru-006IUI-08@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy.c | 32 ++++++++++++++++++++++++++++++++ include/linux/phy.h | 6 ++++++ 2 files changed, 38 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index f42cd6584841..0c228aa18019 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1026,6 +1026,38 @@ unsigned int phy_inband_caps(struct phy_device *phydev, } EXPORT_SYMBOL_GPL(phy_inband_caps); +/** + * phy_config_inband - configure the desired PHY in-band mode + * @phydev: the phy_device struct + * @modes: in-band modes to configure + * + * Description: disables, enables or enables-with-bypass in-band signalling + * between the PHY and host system. + * + * Returns: zero on success, or negative errno value. + */ +int phy_config_inband(struct phy_device *phydev, unsigned int modes) +{ + int err; + + if (!!(modes & LINK_INBAND_DISABLE) + + !!(modes & LINK_INBAND_ENABLE) + + !!(modes & LINK_INBAND_BYPASS) != 1) + return -EINVAL; + + mutex_lock(&phydev->lock); + if (!phydev->drv) + err = -EIO; + else if (!phydev->drv->config_inband) + err = -EOPNOTSUPP; + else + err = phydev->drv->config_inband(phydev, modes); + mutex_unlock(&phydev->lock); + + return err; +} +EXPORT_SYMBOL(phy_config_inband); + /** * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct diff --git a/include/linux/phy.h b/include/linux/phy.h index ccb93d892da9..61a1bc81f597 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -982,6 +982,11 @@ struct phy_driver { unsigned int (*inband_caps)(struct phy_device *phydev, phy_interface_t interface); + /** + * @config_inband: configure in-band mode for the PHY + */ + int (*config_inband)(struct phy_device *phydev, unsigned int modes); + /** * @get_rate_matching: Get the supported type of rate matching for a * particular phy interface. This is used by phy consumers to determine @@ -1846,6 +1851,7 @@ int phy_start_aneg(struct phy_device *phydev); int phy_aneg_done(struct phy_device *phydev); unsigned int phy_inband_caps(struct phy_device *phydev, phy_interface_t interface); +int phy_config_inband(struct phy_device *phydev, unsigned int modes); int phy_speed_down(struct phy_device *phydev, bool sync); int phy_speed_up(struct phy_device *phydev); bool phy_check_valid(int speed, int duplex, unsigned long *features); -- cgit v1.2.3 From df874f9e52c340cc6f0a0014a97b778f67d46849 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 3 Dec 2024 15:31:28 +0000 Subject: net: phylink: add pcs_inband_caps() method Add a pcs_inband_caps() method to query the PCS for its inband link capabilities, and use this to determine whether link modes used with optical SFPs can be supported. When a PCS does not provide a method, we allow inband negotiation to be either on or off, making this a no-op until the pcs_inband_caps() method is implemented by a PCS driver. Reviewed-by: Andrew Lunn Signed-off-by: Russell King (Oracle) Link: https://patch.msgid.link/E1tIUs4-006IUU-7K@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phylink.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/phylink.h | 17 ++++++++++++++ 2 files changed, 77 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index fda53dd58285..42f3c7ccbf38 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -990,6 +990,15 @@ static void phylink_resolve_an_pause(struct phylink_link_state *state) } } +static unsigned int phylink_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + if (pcs && pcs->ops->pcs_inband_caps) + return pcs->ops->pcs_inband_caps(pcs, interface); + + return 0; +} + static void phylink_pcs_pre_config(struct phylink_pcs *pcs, phy_interface_t interface) { @@ -1043,6 +1052,24 @@ static void phylink_pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode, pcs->ops->pcs_link_up(pcs, neg_mode, interface, speed, duplex); } +/* Query inband for a specific interface mode, asking the MAC for the + * PCS which will be used to handle the interface mode. + */ +static unsigned int phylink_inband_caps(struct phylink *pl, + phy_interface_t interface) +{ + struct phylink_pcs *pcs; + + if (!pl->mac_ops->mac_select_pcs) + return 0; + + pcs = pl->mac_ops->mac_select_pcs(pl->config, interface); + if (!pcs) + return 0; + + return phylink_pcs_inband_caps(pcs, interface); +} + static void phylink_pcs_poll_stop(struct phylink *pl) { if (pl->cfg_link_an_mode == MLO_AN_INBAND) @@ -2532,6 +2559,26 @@ int phylink_ethtool_ksettings_get(struct phylink *pl, } EXPORT_SYMBOL_GPL(phylink_ethtool_ksettings_get); +static bool phylink_validate_pcs_inband_autoneg(struct phylink *pl, + phy_interface_t interface, + unsigned long *adv) +{ + unsigned int inband = phylink_inband_caps(pl, interface); + unsigned int mask; + + /* If the PCS doesn't implement inband support, be permissive. */ + if (!inband) + return true; + + if (linkmode_test_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, adv)) + mask = LINK_INBAND_ENABLE; + else + mask = LINK_INBAND_DISABLE; + + /* Check whether the PCS implements the required mode */ + return !!(inband & mask); +} + /** * phylink_ethtool_ksettings_set() - set the link settings * @pl: a pointer to a &struct phylink returned from phylink_create() @@ -2662,6 +2709,13 @@ int phylink_ethtool_ksettings_set(struct phylink *pl, phylink_is_empty_linkmode(config.advertising)) return -EINVAL; + /* Validate the autonegotiation state. We don't have a PHY in this + * situation, so the PCS is the media-facing entity. + */ + if (!phylink_validate_pcs_inband_autoneg(pl, config.interface, + config.advertising)) + return -EINVAL; + mutex_lock(&pl->state_mutex); pl->link_config.speed = config.speed; pl->link_config.duplex = config.duplex; @@ -3341,6 +3395,12 @@ static int phylink_sfp_config_optical(struct phylink *pl) phylink_dbg(pl, "optical SFP: chosen %s interface\n", phy_modes(interface)); + if (!phylink_validate_pcs_inband_autoneg(pl, interface, + config.advertising)) { + phylink_err(pl, "autoneg setting not compatible with PCS"); + return -EINVAL; + } + config.interface = interface; /* Ignore errors if we're expecting a PHY to attach later */ diff --git a/include/linux/phylink.h b/include/linux/phylink.h index 5c01048860c4..5462cc6a37dc 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -419,6 +419,7 @@ struct phylink_pcs { /** * struct phylink_pcs_ops - MAC PCS operations structure. * @pcs_validate: validate the link configuration. + * @pcs_inband_caps: query inband support for interface mode. * @pcs_enable: enable the PCS. * @pcs_disable: disable the PCS. * @pcs_pre_config: pre-mac_config method (for errata) @@ -434,6 +435,8 @@ struct phylink_pcs { struct phylink_pcs_ops { int (*pcs_validate)(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); + unsigned int (*pcs_inband_caps)(struct phylink_pcs *pcs, + phy_interface_t interface); int (*pcs_enable)(struct phylink_pcs *pcs); void (*pcs_disable)(struct phylink_pcs *pcs); void (*pcs_pre_config)(struct phylink_pcs *pcs, @@ -470,6 +473,20 @@ struct phylink_pcs_ops { int pcs_validate(struct phylink_pcs *pcs, unsigned long *supported, const struct phylink_link_state *state); +/** + * pcs_inband_caps - query PCS in-band capabilities for interface mode. + * @pcs: a pointer to a &struct phylink_pcs. + * @interface: interface mode to be queried + * + * Returns zero if it is unknown what in-band signalling is supported by the + * PHY (e.g. because the PHY driver doesn't implement the method.) Otherwise, + * returns a bit mask of the LINK_INBAND_* values from + * &enum link_inband_signalling to describe which inband modes are supported + * for this interface mode. + */ +unsigned int pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface); + /** * pcs_enable() - enable the PCS. * @pcs: a pointer to a &struct phylink_pcs. -- cgit v1.2.3 From 64e844505bc08cde3f346f193cbbbab0096fef54 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:06:59 -0500 Subject: include: uapi: protocol number and packet structs for AGGFRAG in ESP Add the RFC assigned IP protocol number for AGGFRAG. Add the on-wire basic and congestion-control IP-TFS packet headers. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/in.h | 2 ++ include/uapi/linux/ip.h | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/include/uapi/linux/in.h +++ b/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h index 283dec7e3645..5bd7ce934d74 100644 --- a/include/uapi/linux/ip.h +++ b/include/uapi/linux/ip.h @@ -137,6 +137,22 @@ struct ip_beet_phdr { __u8 reserved; }; +struct ip_iptfs_hdr { + __u8 subtype; /* 0*: basic, 1: CC */ + __u8 flags; + __be16 block_offset; +}; + +struct ip_iptfs_cc_hdr { + __u8 subtype; /* 0: basic, 1*: CC */ + __u8 flags; + __be16 block_offset; + __be32 loss_rate; + __be64 rtt_adelay_xdelay; + __be32 tval; + __be32 techo; +}; + /* index values for the variables in ipv4_devconf */ enum { -- cgit v1.2.3 From f69eb4f65c58f5a081dbafb76011dad73757420c Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:00 -0500 Subject: xfrm: netlink: add config (netlink) options Add netlink options for configuring IP-TFS SAs. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/uapi/linux/xfrm.h | 9 +++++++- net/xfrm/xfrm_compat.c | 10 +++++++-- net/xfrm/xfrm_user.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index d73a97e3030a..a23495c0e0a1 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -158,7 +158,8 @@ enum { #define XFRM_MODE_ROUTEOPTIMIZATION 2 #define XFRM_MODE_IN_TRIGGER 3 #define XFRM_MODE_BEET 4 -#define XFRM_MODE_MAX 5 +#define XFRM_MODE_IPTFS 5 +#define XFRM_MODE_MAX 6 /* Netlink configuration messages. */ enum { @@ -323,6 +324,12 @@ enum xfrm_attr_type_t { XFRMA_SA_DIR, /* __u8 */ XFRMA_NAT_KEEPALIVE_INTERVAL, /* __u32 in seconds for NAT keepalive */ XFRMA_SA_PCPU, /* __u32 */ + XFRMA_IPTFS_DROP_TIME, /* __u32 in: usec to wait for next seq */ + XFRMA_IPTFS_REORDER_WINDOW, /* __u16 in: reorder window size (pkts) */ + XFRMA_IPTFS_DONT_FRAG, /* out: don't use fragmentation */ + XFRMA_IPTFS_INIT_DELAY, /* __u32 out: initial packet wait delay (usec) */ + XFRMA_IPTFS_MAX_QSIZE, /* __u32 out: max ingress queue size (octets) */ + XFRMA_IPTFS_PKT_SIZE, /* __u32 out: size of outer packet, 0 for PMTU */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 5b9ee63e30b6..b8d2e6930041 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -284,9 +284,15 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SA_DIR: case XFRMA_NAT_KEEPALIVE_INTERVAL: case XFRMA_SA_PCPU: + case XFRMA_IPTFS_DROP_TIME: + case XFRMA_IPTFS_REORDER_WINDOW: + case XFRMA_IPTFS_DONT_FRAG: + case XFRMA_IPTFS_INIT_DELAY: + case XFRMA_IPTFS_MAX_QSIZE: + case XFRMA_IPTFS_PKT_SIZE: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -441,7 +447,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_SA_PCPU); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_IPTFS_PKT_SIZE); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b2876e09328b..749ec56101ac 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -301,6 +301,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode"); goto out; } + if ((attrs[XFRMA_IPTFS_DROP_TIME] || + attrs[XFRMA_IPTFS_REORDER_WINDOW] || + attrs[XFRMA_IPTFS_DONT_FRAG] || + attrs[XFRMA_IPTFS_INIT_DELAY] || + attrs[XFRMA_IPTFS_MAX_QSIZE] || + attrs[XFRMA_IPTFS_PKT_SIZE]) && + p->mode != XFRM_MODE_IPTFS) { + NL_SET_ERR_MSG(extack, "IP-TFS options can only be used in IP-TFS mode"); + goto out; + } break; case IPPROTO_COMP: @@ -421,6 +431,18 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; } + if (attrs[XFRMA_IPTFS_DROP_TIME]) { + NL_SET_ERR_MSG(extack, "IP-TFS drop time should not be set for output SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_REORDER_WINDOW]) { + NL_SET_ERR_MSG(extack, "IP-TFS reorder window should not be set for output SA"); + err = -EINVAL; + goto out; + } + if (attrs[XFRMA_REPLAY_VAL]) { struct xfrm_replay_state *replay; @@ -458,6 +480,30 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, } } + + if (attrs[XFRMA_IPTFS_DONT_FRAG]) { + NL_SET_ERR_MSG(extack, "IP-TFS don't fragment should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_INIT_DELAY]) { + NL_SET_ERR_MSG(extack, "IP-TFS initial delay should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_MAX_QSIZE]) { + NL_SET_ERR_MSG(extack, "IP-TFS max queue size should not be set for input SA"); + err = -EINVAL; + goto out; + } + + if (attrs[XFRMA_IPTFS_PKT_SIZE]) { + NL_SET_ERR_MSG(extack, "IP-TFS packet size should not be set for input SA"); + err = -EINVAL; + goto out; + } } if (!sa_dir && attrs[XFRMA_SA_PCPU]) { @@ -3220,6 +3266,12 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_DIR] = NLA_POLICY_RANGE(NLA_U8, XFRM_SA_DIR_IN, XFRM_SA_DIR_OUT), [XFRMA_NAT_KEEPALIVE_INTERVAL] = { .type = NLA_U32 }, [XFRMA_SA_PCPU] = { .type = NLA_U32 }, + [XFRMA_IPTFS_DROP_TIME] = { .type = NLA_U32 }, + [XFRMA_IPTFS_REORDER_WINDOW] = { .type = NLA_U16 }, + [XFRMA_IPTFS_DONT_FRAG] = { .type = NLA_FLAG }, + [XFRMA_IPTFS_INIT_DELAY] = { .type = NLA_U32 }, + [XFRMA_IPTFS_MAX_QSIZE] = { .type = NLA_U32 }, + [XFRMA_IPTFS_PKT_SIZE] = { .type = NLA_U32 }, }; EXPORT_SYMBOL_GPL(xfrma_policy); -- cgit v1.2.3 From 7ac64f4598b4daa3f955f82759760666e047bdf8 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:01 -0500 Subject: xfrm: add mode_cbs module functionality Add a set of callbacks xfrm_mode_cbs to xfrm_state. These callbacks enable the addition of new xfrm modes, such as IP-TFS to be defined in modules. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 43 ++++++++++++++++++++++++++++++ net/xfrm/xfrm_device.c | 3 ++- net/xfrm/xfrm_input.c | 18 +++++++++++-- net/xfrm/xfrm_output.c | 2 ++ net/xfrm/xfrm_policy.c | 18 ++++++++----- net/xfrm/xfrm_state.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 13 +++++++++ 7 files changed, 159 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 32c09e85a64c..1ebc09cde627 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -213,6 +213,7 @@ struct xfrm_state { u16 family; xfrm_address_t saddr; int header_len; + int enc_hdr_len; int trailer_len; u32 extra_flags; struct xfrm_mark smark; @@ -303,6 +304,9 @@ struct xfrm_state { * interpreted by xfrm_type methods. */ void *data; u8 dir; + + const struct xfrm_mode_cbs *mode_cbs; + void *mode_data; }; static inline struct net *xs_net(struct xfrm_state *x) @@ -460,6 +464,45 @@ struct xfrm_type_offload { int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); void xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family); +/** + * struct xfrm_mode_cbs - XFRM mode callbacks + * @owner: module owner or NULL + * @init_state: Add/init mode specific state in `xfrm_state *x` + * @clone_state: Copy mode specific values from `orig` to new state `x` + * @destroy_state: Cleanup mode specific state from `xfrm_state *x` + * @user_init: Process mode specific netlink attributes from user + * @copy_to_user: Add netlink attributes to `attrs` based on state in `x` + * @sa_len: Return space required to store mode specific netlink attributes + * @get_inner_mtu: Return avail payload space after removing encap overhead + * @input: Process received packet from SA using mode + * @output: Output given packet using mode + * @prepare_output: Add mode specific encapsulation to packet in skb. On return + * `transport_header` should point at ESP header, `network_header` should + * point at outer IP header and `mac_header` should opint at the + * protocol/nexthdr field of the outer IP. + * + * One should examine and understand the specific uses of these callbacks in + * xfrm for further detail on how and when these functions are called. RTSL. + */ +struct xfrm_mode_cbs { + struct module *owner; + int (*init_state)(struct xfrm_state *x); + int (*clone_state)(struct xfrm_state *x, struct xfrm_state *orig); + void (*destroy_state)(struct xfrm_state *x); + int (*user_init)(struct net *net, struct xfrm_state *x, + struct nlattr **attrs, + struct netlink_ext_ack *extack); + int (*copy_to_user)(struct xfrm_state *x, struct sk_buff *skb); + unsigned int (*sa_len)(const struct xfrm_state *x); + u32 (*get_inner_mtu)(struct xfrm_state *x, int outer_mtu); + int (*input)(struct xfrm_state *x, struct sk_buff *skb); + int (*output)(struct net *net, struct sock *sk, struct sk_buff *skb); + int (*prepare_output)(struct xfrm_state *x, struct sk_buff *skb); +}; + +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs); +void xfrm_unregister_mode_cbs(u8 mode); + static inline int xfrm_af2proto(unsigned int family) { switch(family) { diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index b33c4591e09a..1fe1b07d879d 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -42,7 +42,8 @@ static void __xfrm_mode_tunnel_prep(struct xfrm_state *x, struct sk_buff *skb, skb->transport_header = skb->network_header + hsize; skb_reset_mac_len(skb); - pskb_pull(skb, skb->mac_len + x->props.header_len); + pskb_pull(skb, + skb->mac_len + x->props.header_len - x->props.enc_hdr_len); } static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 841a60a6fbfe..2c4ae61e7e3a 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -446,6 +446,9 @@ static int xfrm_inner_mode_input(struct xfrm_state *x, WARN_ON_ONCE(1); break; default: + if (x->mode_cbs && x->mode_cbs->input) + return x->mode_cbs->input(x, skb); + WARN_ON_ONCE(1); break; } @@ -453,6 +456,10 @@ static int xfrm_inner_mode_input(struct xfrm_state *x, return -EOPNOTSUPP; } +/* NOTE: encap_type - In addition to the normal (non-negative) values for + * encap_type, a negative value of -1 or -2 can be used to resume/restart this + * function after a previous invocation early terminated for async operation. + */ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { const struct xfrm_state_afinfo *afinfo; @@ -489,6 +496,10 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) family = x->props.family; + /* An encap_type of -2 indicates reconstructed inner packet */ + if (encap_type == -2) + goto resume_decapped; + /* An encap_type of -1 indicates async resumption. */ if (encap_type == -1) { async = 1; @@ -679,11 +690,14 @@ resume: XFRM_MODE_SKB_CB(skb)->protocol = nexthdr; - if (xfrm_inner_mode_input(x, skb)) { + err = xfrm_inner_mode_input(x, skb); + if (err == -EINPROGRESS) + return 0; + else if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); goto drop; } - +resume_decapped: if (x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL) { decaps = 1; break; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index e5722c95b8bb..ef81359e4038 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -472,6 +472,8 @@ static int xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb) WARN_ON_ONCE(1); break; default: + if (x->mode_cbs && x->mode_cbs->prepare_output) + return x->mode_cbs->prepare_output(x, skb); WARN_ON_ONCE(1); break; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4408c11c0835..c04014ee623f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2748,13 +2748,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->input = dst_discard; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - dst1->output = afinfo->output; - else - dst1->output = dst_discard_out; - rcu_read_unlock(); + if (xfrm[i]->mode_cbs && xfrm[i]->mode_cbs->output) { + dst1->output = xfrm[i]->mode_cbs->output; + } else { + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + dst1->output = afinfo->output; + else + dst1->output = dst_discard_out; + rcu_read_unlock(); + } xdst_prev = xdst; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 67ca7ac955a3..cf68ba891729 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -515,6 +515,60 @@ static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) return NULL; } +static const struct xfrm_mode_cbs __rcu *xfrm_mode_cbs_map[XFRM_MODE_MAX]; +static DEFINE_SPINLOCK(xfrm_mode_cbs_map_lock); + +int xfrm_register_mode_cbs(u8 mode, const struct xfrm_mode_cbs *mode_cbs) +{ + if (mode >= XFRM_MODE_MAX) + return -EINVAL; + + spin_lock_bh(&xfrm_mode_cbs_map_lock); + rcu_assign_pointer(xfrm_mode_cbs_map[mode], mode_cbs); + spin_unlock_bh(&xfrm_mode_cbs_map_lock); + + return 0; +} +EXPORT_SYMBOL(xfrm_register_mode_cbs); + +void xfrm_unregister_mode_cbs(u8 mode) +{ + if (mode >= XFRM_MODE_MAX) + return; + + spin_lock_bh(&xfrm_mode_cbs_map_lock); + RCU_INIT_POINTER(xfrm_mode_cbs_map[mode], NULL); + spin_unlock_bh(&xfrm_mode_cbs_map_lock); + synchronize_rcu(); +} +EXPORT_SYMBOL(xfrm_unregister_mode_cbs); + +static const struct xfrm_mode_cbs *xfrm_get_mode_cbs(u8 mode) +{ + const struct xfrm_mode_cbs *cbs; + bool try_load = true; + + if (mode >= XFRM_MODE_MAX) + return NULL; + +retry: + rcu_read_lock(); + + cbs = rcu_dereference(xfrm_mode_cbs_map[mode]); + if (cbs && !try_module_get(cbs->owner)) + cbs = NULL; + + rcu_read_unlock(); + + if (mode == XFRM_MODE_IPTFS && !cbs && try_load) { + request_module("xfrm-iptfs"); + try_load = false; + goto retry; + } + + return cbs; +} + void xfrm_state_free(struct xfrm_state *x) { kmem_cache_free(xfrm_state_cache, x); @@ -523,6 +577,8 @@ EXPORT_SYMBOL(xfrm_state_free); static void ___xfrm_state_destroy(struct xfrm_state *x) { + if (x->mode_cbs && x->mode_cbs->destroy_state) + x->mode_cbs->destroy_state(x); hrtimer_cancel(&x->mtimer); del_timer_sync(&x->rtimer); kfree(x->aead); @@ -682,6 +738,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net) x->replay_maxdiff = 0; x->pcpu_num = UINT_MAX; spin_lock_init(&x->lock); + x->mode_data = NULL; } return x; } @@ -1945,6 +2002,12 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->new_mapping_sport = 0; x->dir = orig->dir; + x->mode_cbs = orig->mode_cbs; + if (x->mode_cbs && x->mode_cbs->clone_state) { + if (x->mode_cbs->clone_state(x, orig)) + goto error; + } + return x; error: @@ -2986,6 +3049,9 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) case XFRM_MODE_TUNNEL: break; default: + if (x->mode_cbs && x->mode_cbs->get_inner_mtu) + return x->mode_cbs->get_inner_mtu(x, mtu); + WARN_ON_ONCE(1); break; } @@ -3086,6 +3152,12 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, } } + x->mode_cbs = xfrm_get_mode_cbs(x->props.mode); + if (x->mode_cbs) { + if (x->mode_cbs->init_state) + err = x->mode_cbs->init_state(x); + module_put(x->mode_cbs->owner); + } error: return err; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 749ec56101ac..71b452fff8db 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -932,6 +932,12 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (x->mode_cbs && x->mode_cbs->user_init) { + err = x->mode_cbs->user_init(net, x, attrs, extack); + if (err) + goto error; + } + return x; error: @@ -1347,6 +1353,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } + if (x->mode_cbs && x->mode_cbs->copy_to_user) + ret = x->mode_cbs->copy_to_user(x, skb); + if (ret) + goto out; if (x->mapping_maxage) { ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); if (ret) @@ -3606,6 +3616,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->nat_keepalive_interval) l += nla_total_size(sizeof(x->nat_keepalive_interval)); + if (x->mode_cbs && x->mode_cbs->sa_len) + l += x->mode_cbs->sa_len(x); + return l; } -- cgit v1.2.3 From d1716d5a44c37e5743bf6ea4e5cdbdab37727f27 Mon Sep 17 00:00:00 2001 From: Christian Hopps Date: Thu, 14 Nov 2024 02:07:02 -0500 Subject: xfrm: add generic iptfs defines and functionality Define `XFRM_MODE_IPTFS` and `IPSEC_MODE_IPTFS` constants, and add these to switch case and conditionals adjacent with the existing TUNNEL modes. Signed-off-by: Christian Hopps Tested-by: Antony Antony Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + include/uapi/linux/ipsec.h | 3 ++- include/uapi/linux/snmp.h | 2 ++ net/ipv4/esp4.c | 3 ++- net/ipv6/esp6.c | 3 ++- net/netfilter/nft_xfrm.c | 3 ++- net/xfrm/xfrm_device.c | 1 + net/xfrm/xfrm_output.c | 4 ++++ net/xfrm/xfrm_policy.c | 8 ++++++-- net/xfrm/xfrm_proc.c | 2 ++ net/xfrm/xfrm_state.c | 12 ++++++++++++ net/xfrm/xfrm_user.c | 12 ++++++++++++ 12 files changed, 48 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 1ebc09cde627..4b0677e48190 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -38,6 +38,7 @@ #define XFRM_PROTO_COMP 108 #define XFRM_PROTO_IPIP 4 #define XFRM_PROTO_IPV6 41 +#define XFRM_PROTO_IPTFS IPPROTO_AGGFRAG #define XFRM_PROTO_ROUTING IPPROTO_ROUTING #define XFRM_PROTO_DSTOPTS IPPROTO_DSTOPTS diff --git a/include/uapi/linux/ipsec.h b/include/uapi/linux/ipsec.h index 50d8ee1791e2..696b790f4346 100644 --- a/include/uapi/linux/ipsec.h +++ b/include/uapi/linux/ipsec.h @@ -14,7 +14,8 @@ enum { IPSEC_MODE_ANY = 0, /* We do not support this for SA */ IPSEC_MODE_TRANSPORT = 1, IPSEC_MODE_TUNNEL = 2, - IPSEC_MODE_BEET = 3 + IPSEC_MODE_BEET = 3, + IPSEC_MODE_IPTFS = 4 }; enum { diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index adf5fd78dd50..5a2553511190 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -339,6 +339,8 @@ enum LINUX_MIB_XFRMACQUIREERROR, /* XfrmAcquireError */ LINUX_MIB_XFRMOUTSTATEDIRERROR, /* XfrmOutStateDirError */ LINUX_MIB_XFRMINSTATEDIRERROR, /* XfrmInStateDirError */ + LINUX_MIB_XFRMINIPTFSERROR, /* XfrmInIptfsError */ + LINUX_MIB_XFRMOUTNOQSPACE, /* XfrmOutNoQueueSpace */ __LINUX_MIB_XFRMMAX }; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index f3281312eb5e..b0fbf804bbba 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -816,7 +816,8 @@ int esp_input_done2(struct sk_buff *skb, int err) } skb_pull_rcsum(skb, hlen); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL || + x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index b2400c226a32..5f3d0cc1555a 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -859,7 +859,8 @@ int esp6_input_done2(struct sk_buff *skb, int err) skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull_rcsum(skb, hlen); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode == XFRM_MODE_TUNNEL || + x->props.mode == XFRM_MODE_IPTFS) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -hdr_len); diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 8a07b46cc8fb..3210cfc966ab 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -112,7 +112,8 @@ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode) return true; } - return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL; + return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL || + mode == XFRM_MODE_IPTFS; } static void nft_xfrm_state_get_key(const struct nft_xfrm *priv, diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 1fe1b07d879d..d1fa94e52cea 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -69,6 +69,7 @@ static void __xfrm_mode_beet_prep(struct xfrm_state *x, struct sk_buff *skb, static void xfrm_outer_mode_prep(struct xfrm_state *x, struct sk_buff *skb) { switch (x->outer_mode.encap) { + case XFRM_MODE_IPTFS: case XFRM_MODE_TUNNEL: if (x->outer_mode.family == AF_INET) return __xfrm_mode_tunnel_prep(x, skb, diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ef81359e4038..b5025cf6136e 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -677,6 +677,10 @@ static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) return; } + if (x->outer_mode.encap == XFRM_MODE_IPTFS) { + xo->inner_ipproto = IPPROTO_AGGFRAG; + return; + } /* non-Tunnel Mode */ if (!skb->encapsulation) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c04014ee623f..9e510021ee91 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2497,6 +2497,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; if (tmpl->mode == XFRM_MODE_TUNNEL || + tmpl->mode == XFRM_MODE_IPTFS || tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; @@ -3294,7 +3295,8 @@ no_transform: ok: xfrm_pols_put(pols, drop_pols); if (dst && dst->xfrm && - dst->xfrm->props.mode == XFRM_MODE_TUNNEL) + (dst->xfrm->props.mode == XFRM_MODE_TUNNEL || + dst->xfrm->props.mode == XFRM_MODE_IPTFS)) dst->flags |= DST_XFRM_TUNNEL; return dst; @@ -4523,6 +4525,7 @@ static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tm switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, m->old_family) && xfrm_addr_equal(&t->saddr, &m->old_saddr, @@ -4565,7 +4568,8 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, continue; n++; if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL && - pol->xfrm_vec[i].mode != XFRM_MODE_BEET) + pol->xfrm_vec[i].mode != XFRM_MODE_BEET && + pol->xfrm_vec[i].mode != XFRM_MODE_IPTFS) continue; /* update endpoints */ memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr, diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index eeb984be03a7..8e07dd614b0b 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -43,6 +43,8 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmAcquireError", LINUX_MIB_XFRMACQUIREERROR), SNMP_MIB_ITEM("XfrmOutStateDirError", LINUX_MIB_XFRMOUTSTATEDIRERROR), SNMP_MIB_ITEM("XfrmInStateDirError", LINUX_MIB_XFRMINSTATEDIRERROR), + SNMP_MIB_ITEM("XfrmInIptfsError", LINUX_MIB_XFRMINIPTFSERROR), + SNMP_MIB_ITEM("XfrmOutNoQueueSpace", LINUX_MIB_XFRMOUTNOQSPACE), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index cf68ba891729..34067cb8a479 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -467,6 +467,11 @@ static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { .flags = XFRM_MODE_FLAG_TUNNEL, .family = AF_INET, }, + [XFRM_MODE_IPTFS] = { + .encap = XFRM_MODE_IPTFS, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, }; static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { @@ -488,6 +493,11 @@ static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { .flags = XFRM_MODE_FLAG_TUNNEL, .family = AF_INET6, }, + [XFRM_MODE_IPTFS] = { + .encap = XFRM_MODE_IPTFS, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, }; static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) @@ -2334,6 +2344,7 @@ static int __xfrm6_state_sort_cmp(const void *p) #endif case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: return 4; } return 5; @@ -2360,6 +2371,7 @@ static int __xfrm6_tmpl_sort_cmp(const void *p) #endif case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: + case XFRM_MODE_IPTFS: return 3; } return 4; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 71b452fff8db..08c6d6f0179f 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -383,6 +383,16 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, case XFRM_MODE_ROUTEOPTIMIZATION: case XFRM_MODE_BEET: break; + case XFRM_MODE_IPTFS: + if (p->id.proto != IPPROTO_ESP) { + NL_SET_ERR_MSG(extack, "IP-TFS mode only supported with ESP"); + goto out; + } + if (sa_dir == 0) { + NL_SET_ERR_MSG(extack, "IP-TFS mode requires in or out direction attribute"); + goto out; + } + break; default: NL_SET_ERR_MSG(extack, "Unsupported mode"); @@ -2014,6 +2024,8 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family, return -EINVAL; } break; + case XFRM_MODE_IPTFS: + break; default: if (ut[i].family != prev_family) { NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change"); -- cgit v1.2.3 From f9a5b34f9251cf530fecf08ef039be64ead8c459 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 5 Dec 2024 00:09:21 +0200 Subject: net/mlx5: ifc: Reorganize mlx5_ifc_flow_table_context_bits The nested union at the end is not in the same style as the rest of the code, so un-nest it to make the style uniformly applied again. Signed-off-by: Cosmin Ratiu Reviewed-by: Saeed Mahameed Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4fbbcf35498b..f3650f989e68 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -6324,6 +6324,20 @@ struct mlx5_ifc_modify_other_hca_cap_in_bits { struct mlx5_ifc_other_hca_cap_bits other_capability; }; +struct mlx5_ifc_sw_owner_icm_root_params_bits { + u8 sw_owner_icm_root_1[0x40]; + + u8 sw_owner_icm_root_0[0x40]; +}; + +struct mlx5_ifc_rtc_params_bits { + u8 rtc_id_0[0x20]; + + u8 rtc_id_1[0x20]; + + u8 reserved_at_40[0x40]; +}; + struct mlx5_ifc_flow_table_context_bits { u8 reformat_en[0x1]; u8 decap_en[0x1]; @@ -6342,20 +6356,10 @@ struct mlx5_ifc_flow_table_context_bits { u8 lag_master_next_table_id[0x18]; u8 reserved_at_60[0x60]; - union { - struct { - u8 sw_owner_icm_root_1[0x40]; - - u8 sw_owner_icm_root_0[0x40]; - } sws; - struct { - u8 rtc_id_0[0x20]; - u8 rtc_id_1[0x20]; - - u8 reserved_at_100[0x40]; - - } hws; + union { + struct mlx5_ifc_sw_owner_icm_root_params_bits sws; + struct mlx5_ifc_rtc_params_bits hws; }; }; -- cgit v1.2.3 From e799ac9dd3c485a7cda3586f2a12784b030b9df0 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Thu, 5 Dec 2024 00:09:22 +0200 Subject: net/mlx5: Add ConnectX-8 device to ifc In preparation for ConnectX-8 SWS support, add enum for the new device type. Signed-off-by: Yevgeny Kliteynik Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-3-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f3650f989e68..bd9b1833408e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1590,6 +1590,7 @@ enum { MLX5_STEERING_FORMAT_CONNECTX_5 = 0, MLX5_STEERING_FORMAT_CONNECTX_6DX = 1, MLX5_STEERING_FORMAT_CONNECTX_7 = 2, + MLX5_STEERING_FORMAT_CONNECTX_8 = 3, }; struct mlx5_ifc_cmd_hca_cap_bits { -- cgit v1.2.3 From 03713108e0cccf325bb71941edd9ed6122142907 Mon Sep 17 00:00:00 2001 From: Carolina Jubran Date: Thu, 5 Dec 2024 00:09:23 +0200 Subject: net/mlx5: Add support for new scheduling elements Introduce new scheduling elements in the E-Switch QoS hierarchy to enhance traffic management capabilities. This patch adds support for: - Rate Limit scheduling elements: Enables bandwidth limitation across multiple nodes without a shared ancestor, providing a mechanism for more granular control of bandwidth allocation. - Traffic Class Transmit Scheduling Arbiter (TSAR): Introduces the infrastructure for creating Traffic Class TSARs, allowing hierarchical arbitration based on traffic classes. - Traffic Class Arbiter TSAR: Adds support for a TSAR capable of managing arbitration between multiple traffic classes, enabling improved bandwidth prioritization and traffic management. No functional changes are introduced in this patch. Signed-off-by: Carolina Jubran Reviewed-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-4-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/rl.c | 4 ++++ include/linux/mlx5/mlx5_ifc.h | 14 +++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index e393391966e0..39a209b9b684 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -56,6 +56,8 @@ bool mlx5_qos_tsar_type_supported(struct mlx5_core_dev *dev, int type, u8 hierar return cap & TSAR_TYPE_CAP_MASK_ROUND_ROBIN; case TSAR_ELEMENT_TSAR_TYPE_ETS: return cap & TSAR_TYPE_CAP_MASK_ETS; + case TSAR_ELEMENT_TSAR_TYPE_TC_ARB: + return cap & TSAR_TYPE_CAP_MASK_TC_ARB; } return false; @@ -87,6 +89,8 @@ bool mlx5_qos_element_type_supported(struct mlx5_core_dev *dev, int type, u8 hie return cap & ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; case SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP: return cap & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT: + return cap & ELEMENT_TYPE_CAP_MASK_RATE_LIMIT; } return false; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index bd9b1833408e..8b202521b774 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1103,7 +1103,8 @@ struct mlx5_ifc_qos_cap_bits { u8 packet_pacing_min_rate[0x20]; - u8 reserved_at_80[0x10]; + u8 reserved_at_80[0xb]; + u8 log_esw_max_rate_limit[0x5]; u8 packet_pacing_rate_table_size[0x10]; u8 esw_element_type[0x10]; @@ -4104,6 +4105,7 @@ enum { SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC = 0x2, SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC = 0x3, SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP = 0x4, + SCHEDULING_CONTEXT_ELEMENT_TYPE_RATE_LIMIT = 0x5, }; enum { @@ -4112,22 +4114,26 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, + ELEMENT_TYPE_CAP_MASK_RATE_LIMIT = 1 << 5, }; enum { TSAR_ELEMENT_TSAR_TYPE_DWRR = 0x0, TSAR_ELEMENT_TSAR_TYPE_ROUND_ROBIN = 0x1, TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, + TSAR_ELEMENT_TSAR_TYPE_TC_ARB = 0x3, }; enum { TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, TSAR_TYPE_CAP_MASK_ETS = 1 << 2, + TSAR_TYPE_CAP_MASK_TC_ARB = 1 << 3, }; struct mlx5_ifc_tsar_element_bits { - u8 reserved_at_0[0x8]; + u8 traffic_class[0x4]; + u8 reserved_at_4[0x4]; u8 tsar_type[0x8]; u8 reserved_at_10[0x10]; }; @@ -4164,7 +4170,9 @@ struct mlx5_ifc_scheduling_context_bits { u8 max_average_bw[0x20]; - u8 reserved_at_e0[0x120]; + u8 max_bw_obj_id[0x20]; + + u8 reserved_at_100[0x100]; }; struct mlx5_ifc_rqtc_bits { -- cgit v1.2.3 From f09ed834a946f9c77088d53af4d4806974728d7b Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Thu, 5 Dec 2024 00:09:24 +0200 Subject: net/mlx5: qos: Add ifc support for cross-esw scheduling This adds the capability bit and the vport element fields related to cross-esw scheduling. Signed-off-by: Cosmin Ratiu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241204220931.254964-5-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 8b202521b774..5451ff1d4356 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1095,7 +1095,9 @@ struct mlx5_ifc_qos_cap_bits { u8 log_esw_max_sched_depth[0x4]; u8 reserved_at_10[0x10]; - u8 reserved_at_20[0xb]; + u8 reserved_at_20[0x9]; + u8 esw_cross_esw_sched[0x1]; + u8 reserved_at_2a[0x1]; u8 log_max_qos_nic_queue_group[0x5]; u8 reserved_at_30[0x10]; @@ -4139,13 +4141,16 @@ struct mlx5_ifc_tsar_element_bits { }; struct mlx5_ifc_vport_element_bits { - u8 reserved_at_0[0x10]; + u8 reserved_at_0[0x4]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; struct mlx5_ifc_vport_tc_element_bits { u8 traffic_class[0x4]; - u8 reserved_at_4[0xc]; + u8 eswitch_owner_vhca_id_valid[0x1]; + u8 eswitch_owner_vhca_id[0xb]; u8 vport_number[0x10]; }; -- cgit v1.2.3 From 6d9d6ab3a82af50e36e13e7bc8e2d1b970e39f79 Mon Sep 17 00:00:00 2001 From: Takahiro Kuwano Date: Tue, 3 Dec 2024 11:46:49 +0900 Subject: mtd: spinand: Introduce a way to avoid raw access SkyHigh spinand device has ECC enable bit in configuration register but it must be always enabled. If ECC is disabled, read and write ops results in undetermined state. For such devices, a way to avoid raw access is needed. Introduce SPINAND_NO_RAW_ACCESS flag to advertise the device does not support raw access. In such devices, the on-die ECC engine ops returns error to I/O request in raw mode. Checking and marking BBM need to be cared as special case, by adding fallback mechanism that tries read/write OOB with ECC enabled. Signed-off-by: Takahiro Kuwano Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/core.c | 22 ++++++++++++++++++++-- include/linux/mtd/spinand.h | 1 + 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index 00e1bfa416ce..f46769eda388 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -294,6 +294,9 @@ static int spinand_ondie_ecc_prepare_io_req(struct nand_device *nand, struct spinand_device *spinand = nand_to_spinand(nand); bool enable = (req->mode != MTD_OPS_RAW); + if (!enable && spinand->flags & SPINAND_NO_RAW_ACCESS) + return -EOPNOTSUPP; + memset(spinand->oobbuf, 0xff, nanddev_per_page_oobsize(nand)); /* Only enable or disable the engine */ @@ -901,9 +904,17 @@ static bool spinand_isbad(struct nand_device *nand, const struct nand_pos *pos) .oobbuf.in = marker, .mode = MTD_OPS_RAW, }; + int ret; spinand_select_target(spinand, pos->target); - spinand_read_page(spinand, &req); + + ret = spinand_read_page(spinand, &req); + if (ret == -EOPNOTSUPP) { + /* Retry with ECC in case raw access is not supported */ + req.mode = MTD_OPS_PLACE_OOB; + spinand_read_page(spinand, &req); + } + if (marker[0] != 0xff || marker[1] != 0xff) return true; @@ -942,7 +953,14 @@ static int spinand_markbad(struct nand_device *nand, const struct nand_pos *pos) if (ret) return ret; - return spinand_write_page(spinand, &req); + ret = spinand_write_page(spinand, &req); + if (ret == -EOPNOTSUPP) { + /* Retry with ECC in case raw access is not supported */ + req.mode = MTD_OPS_PLACE_OOB; + ret = spinand_write_page(spinand, &req); + } + + return ret; } static int spinand_mtd_block_markbad(struct mtd_info *mtd, loff_t offs) diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 702e5fb13dae..5cf11005b41a 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -314,6 +314,7 @@ struct spinand_ecc_info { #define SPINAND_HAS_CR_FEAT_BIT BIT(1) #define SPINAND_HAS_PROG_PLANE_SELECT_BIT BIT(2) #define SPINAND_HAS_READ_PLANE_SELECT_BIT BIT(3) +#define SPINAND_NO_RAW_ACCESS BIT(4) /** * struct spinand_ondie_ecc_conf - private SPI-NAND on-die ECC engine structure -- cgit v1.2.3 From 1a50e3612de9187857f55ee14a573f7f8e7d4ebc Mon Sep 17 00:00:00 2001 From: Takahiro Kuwano Date: Tue, 3 Dec 2024 11:46:50 +0900 Subject: mtd: spinand: Add support for SkyHigh S35ML-3 family SkyHigh S35ML01G300, S35ML01G301, S35ML02G300, and S35ML04G300 are 1Gb, 2Gb, and 4Gb SLC SPI NAND flash family. This family of devices has on-die ECC which parity bits are stored to hidden area. In this family the on-die ECC cannot be disabled so raw access needs to be prevented. Link: https://www.skyhighmemory.com/download/SPI_S35ML01_04G3_002_19205.pdf?v=P Co-developed-by: KR Kim Signed-off-by: KR Kim Signed-off-by: Takahiro Kuwano Signed-off-by: Miquel Raynal --- drivers/mtd/nand/spi/Makefile | 2 +- drivers/mtd/nand/spi/core.c | 1 + drivers/mtd/nand/spi/skyhigh.c | 147 +++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/spinand.h | 1 + 4 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 drivers/mtd/nand/spi/skyhigh.c (limited to 'include') diff --git a/drivers/mtd/nand/spi/Makefile b/drivers/mtd/nand/spi/Makefile index 19cc77288ebb..1e61ab21893a 100644 --- a/drivers/mtd/nand/spi/Makefile +++ b/drivers/mtd/nand/spi/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 spinand-objs := core.o alliancememory.o ato.o esmt.o foresee.o gigadevice.o macronix.o -spinand-objs += micron.o paragon.o toshiba.o winbond.o xtx.o +spinand-objs += micron.o paragon.o skyhigh.o toshiba.o winbond.o xtx.o obj-$(CONFIG_MTD_SPI_NAND) += spinand.o diff --git a/drivers/mtd/nand/spi/core.c b/drivers/mtd/nand/spi/core.c index f46769eda388..45d7132e5f95 100644 --- a/drivers/mtd/nand/spi/core.c +++ b/drivers/mtd/nand/spi/core.c @@ -1131,6 +1131,7 @@ static const struct spinand_manufacturer *spinand_manufacturers[] = { ¯onix_spinand_manufacturer, µn_spinand_manufacturer, ¶gon_spinand_manufacturer, + &skyhigh_spinand_manufacturer, &toshiba_spinand_manufacturer, &winbond_spinand_manufacturer, &xtx_spinand_manufacturer, diff --git a/drivers/mtd/nand/spi/skyhigh.c b/drivers/mtd/nand/spi/skyhigh.c new file mode 100644 index 000000000000..8b32b74ff34b --- /dev/null +++ b/drivers/mtd/nand/spi/skyhigh.c @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2024 SkyHigh Memory Limited + * + * Author: Takahiro Kuwano + * Co-Author: KR Kim + */ + +#include +#include +#include + +#define SPINAND_MFR_SKYHIGH 0x01 +#define SKYHIGH_STATUS_ECC_1TO2_BITFLIPS (1 << 4) +#define SKYHIGH_STATUS_ECC_3TO6_BITFLIPS (2 << 4) +#define SKYHIGH_STATUS_ECC_UNCOR_ERROR (3 << 4) +#define SKYHIGH_CONFIG_PROTECT_EN BIT(1) + +static SPINAND_OP_VARIANTS(read_cache_variants, + SPINAND_PAGE_READ_FROM_CACHE_QUADIO_OP(0, 4, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X4_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_DUALIO_OP(0, 2, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_X2_OP(0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(true, 0, 1, NULL, 0), + SPINAND_PAGE_READ_FROM_CACHE_OP(false, 0, 1, NULL, 0)); + +static SPINAND_OP_VARIANTS(write_cache_variants, + SPINAND_PROG_LOAD_X4(true, 0, NULL, 0), + SPINAND_PROG_LOAD(true, 0, NULL, 0)); + +static SPINAND_OP_VARIANTS(update_cache_variants, + SPINAND_PROG_LOAD_X4(false, 0, NULL, 0), + SPINAND_PROG_LOAD(false, 0, NULL, 0)); + +static int skyhigh_spinand_ooblayout_ecc(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + /* ECC bytes are stored in hidden area. */ + return -ERANGE; +} + +static int skyhigh_spinand_ooblayout_free(struct mtd_info *mtd, int section, + struct mtd_oob_region *region) +{ + if (section) + return -ERANGE; + + /* ECC bytes are stored in hidden area. Reserve 2 bytes for the BBM. */ + region->offset = 2; + region->length = mtd->oobsize - 2; + + return 0; +} + +static const struct mtd_ooblayout_ops skyhigh_spinand_ooblayout = { + .ecc = skyhigh_spinand_ooblayout_ecc, + .free = skyhigh_spinand_ooblayout_free, +}; + +static int skyhigh_spinand_ecc_get_status(struct spinand_device *spinand, + u8 status) +{ + switch (status & STATUS_ECC_MASK) { + case STATUS_ECC_NO_BITFLIPS: + return 0; + + case SKYHIGH_STATUS_ECC_UNCOR_ERROR: + return -EBADMSG; + + case SKYHIGH_STATUS_ECC_1TO2_BITFLIPS: + return 2; + + case SKYHIGH_STATUS_ECC_3TO6_BITFLIPS: + return 6; + + default: + break; + } + + return -EINVAL; +} + +static const struct spinand_info skyhigh_spinand_table[] = { + SPINAND_INFO("S35ML01G301", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x15), + NAND_MEMORG(1, 2048, 64, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(6, 32), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_NO_RAW_ACCESS, + SPINAND_ECCINFO(&skyhigh_spinand_ooblayout, + skyhigh_spinand_ecc_get_status)), + SPINAND_INFO("S35ML01G300", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x14), + NAND_MEMORG(1, 2048, 128, 64, 1024, 20, 1, 1, 1), + NAND_ECCREQ(6, 32), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_NO_RAW_ACCESS, + SPINAND_ECCINFO(&skyhigh_spinand_ooblayout, + skyhigh_spinand_ecc_get_status)), + SPINAND_INFO("S35ML02G300", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x25), + NAND_MEMORG(1, 2048, 128, 64, 2048, 40, 2, 1, 1), + NAND_ECCREQ(6, 32), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_NO_RAW_ACCESS, + SPINAND_ECCINFO(&skyhigh_spinand_ooblayout, + skyhigh_spinand_ecc_get_status)), + SPINAND_INFO("S35ML04G300", + SPINAND_ID(SPINAND_READID_METHOD_OPCODE_DUMMY, 0x35), + NAND_MEMORG(1, 2048, 128, 64, 4096, 80, 2, 1, 1), + NAND_ECCREQ(6, 32), + SPINAND_INFO_OP_VARIANTS(&read_cache_variants, + &write_cache_variants, + &update_cache_variants), + SPINAND_NO_RAW_ACCESS, + SPINAND_ECCINFO(&skyhigh_spinand_ooblayout, + skyhigh_spinand_ecc_get_status)), +}; + +static int skyhigh_spinand_init(struct spinand_device *spinand) +{ + /* + * Config_Protect_En (bit 1 in Block Lock register) must be set to 1 + * before writing other bits. Do it here before core unlocks all blocks + * by writing block protection bits. + */ + return spinand_write_reg_op(spinand, REG_BLOCK_LOCK, + SKYHIGH_CONFIG_PROTECT_EN); +} + +static const struct spinand_manufacturer_ops skyhigh_spinand_manuf_ops = { + .init = skyhigh_spinand_init, +}; + +const struct spinand_manufacturer skyhigh_spinand_manufacturer = { + .id = SPINAND_MFR_SKYHIGH, + .name = "SkyHigh", + .chips = skyhigh_spinand_table, + .nchips = ARRAY_SIZE(skyhigh_spinand_table), + .ops = &skyhigh_spinand_manuf_ops, +}; diff --git a/include/linux/mtd/spinand.h b/include/linux/mtd/spinand.h index 5cf11005b41a..cbbcd44ac225 100644 --- a/include/linux/mtd/spinand.h +++ b/include/linux/mtd/spinand.h @@ -268,6 +268,7 @@ extern const struct spinand_manufacturer gigadevice_spinand_manufacturer; extern const struct spinand_manufacturer macronix_spinand_manufacturer; extern const struct spinand_manufacturer micron_spinand_manufacturer; extern const struct spinand_manufacturer paragon_spinand_manufacturer; +extern const struct spinand_manufacturer skyhigh_spinand_manufacturer; extern const struct spinand_manufacturer toshiba_spinand_manufacturer; extern const struct spinand_manufacturer winbond_spinand_manufacturer; extern const struct spinand_manufacturer xtx_spinand_manufacturer; -- cgit v1.2.3 From 0600cf40e9b36fe17f9c9f04d4f9cef249eaa5e7 Mon Sep 17 00:00:00 2001 From: Justin Iurman Date: Tue, 3 Dec 2024 13:49:42 +0100 Subject: include: net: add static inline dst_dev_overhead() to dst.h Add static inline dst_dev_overhead() function to include/net/dst.h. This helper function is used by ioam6_iptunnel, rpl_iptunnel and seg6_iptunnel to get the dev's overhead based on a cache entry (dst_entry). If the cache is empty, the default and generic value skb->mac_len is returned. Otherwise, LL_RESERVED_SPACE() over dst's dev is returned. Signed-off-by: Justin Iurman Cc: Alexander Lobakin Cc: Vadim Fedorenko Signed-off-by: Paolo Abeni --- include/net/dst.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/dst.h b/include/net/dst.h index 0f303cc60252..08647c99d79c 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -440,6 +440,15 @@ static inline void dst_set_expires(struct dst_entry *dst, int timeout) dst->expires = expires; } +static inline unsigned int dst_dev_overhead(struct dst_entry *dst, + struct sk_buff *skb) +{ + if (likely(dst)) + return LL_RESERVED_SPACE(dst->dev); + + return skb->mac_len; +} + INDIRECT_CALLABLE_DECLARE(int ip6_output(struct net *, struct sock *, struct sk_buff *)); INDIRECT_CALLABLE_DECLARE(int ip_output(struct net *, struct sock *, -- cgit v1.2.3 From 49922401c2190713c5cc03902dc68c3ecd3f13e8 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:47 -0800 Subject: ethtool: separate definitions that are gonna be generated Reshuffle definitions that are gonna be generated into ethtool_netlink_generated.h and match ynl spec order. This should make it easier to compare the output of the ynl-gen-c to the existing uapi header. No functional changes. Things that are still remaining to be manually defined: - ETHTOOL_FLAG_ALL - probably no good way to add to spec? - some of the cable test bits (not sure whether it's possible to move to spec) - some of the stats definitions (no way currently to move to spec) Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-7-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- MAINTAINERS | 2 +- include/uapi/linux/ethtool_netlink.h | 893 +----------------------- include/uapi/linux/ethtool_netlink_generated.h | 899 +++++++++++++++++++++++++ 3 files changed, 901 insertions(+), 893 deletions(-) create mode 100644 include/uapi/linux/ethtool_netlink_generated.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 686109008d8e..79756f2100e0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16280,7 +16280,7 @@ F: include/linux/inetdevice.h F: include/linux/netdev* F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h -F: include/uapi/linux/ethtool_netlink.h +F: include/uapi/linux/ethtool_netlink* F: include/uapi/linux/if_* F: include/uapi/linux/net_shaper.h F: include/uapi/linux/netdev* diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 283305f6b063..9c909ce733a5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -10,545 +10,12 @@ #define _UAPI_LINUX_ETHTOOL_NETLINK_H_ #include - -/* message types - userspace to kernel */ -enum { - ETHTOOL_MSG_USER_NONE, - ETHTOOL_MSG_STRSET_GET, - ETHTOOL_MSG_LINKINFO_GET, - ETHTOOL_MSG_LINKINFO_SET, - ETHTOOL_MSG_LINKMODES_GET, - ETHTOOL_MSG_LINKMODES_SET, - ETHTOOL_MSG_LINKSTATE_GET, - ETHTOOL_MSG_DEBUG_GET, - ETHTOOL_MSG_DEBUG_SET, - ETHTOOL_MSG_WOL_GET, - ETHTOOL_MSG_WOL_SET, - ETHTOOL_MSG_FEATURES_GET, - ETHTOOL_MSG_FEATURES_SET, - ETHTOOL_MSG_PRIVFLAGS_GET, - ETHTOOL_MSG_PRIVFLAGS_SET, - ETHTOOL_MSG_RINGS_GET, - ETHTOOL_MSG_RINGS_SET, - ETHTOOL_MSG_CHANNELS_GET, - ETHTOOL_MSG_CHANNELS_SET, - ETHTOOL_MSG_COALESCE_GET, - ETHTOOL_MSG_COALESCE_SET, - ETHTOOL_MSG_PAUSE_GET, - ETHTOOL_MSG_PAUSE_SET, - ETHTOOL_MSG_EEE_GET, - ETHTOOL_MSG_EEE_SET, - ETHTOOL_MSG_TSINFO_GET, - ETHTOOL_MSG_CABLE_TEST_ACT, - ETHTOOL_MSG_CABLE_TEST_TDR_ACT, - ETHTOOL_MSG_TUNNEL_INFO_GET, - ETHTOOL_MSG_FEC_GET, - ETHTOOL_MSG_FEC_SET, - ETHTOOL_MSG_MODULE_EEPROM_GET, - ETHTOOL_MSG_STATS_GET, - ETHTOOL_MSG_PHC_VCLOCKS_GET, - ETHTOOL_MSG_MODULE_GET, - ETHTOOL_MSG_MODULE_SET, - ETHTOOL_MSG_PSE_GET, - ETHTOOL_MSG_PSE_SET, - ETHTOOL_MSG_RSS_GET, - ETHTOOL_MSG_PLCA_GET_CFG, - ETHTOOL_MSG_PLCA_SET_CFG, - ETHTOOL_MSG_PLCA_GET_STATUS, - ETHTOOL_MSG_MM_GET, - ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_MODULE_FW_FLASH_ACT, - ETHTOOL_MSG_PHY_GET, - - /* add new constants above here */ - __ETHTOOL_MSG_USER_CNT, - ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 -}; - -/* message types - kernel to userspace */ -enum { - ETHTOOL_MSG_KERNEL_NONE, - ETHTOOL_MSG_STRSET_GET_REPLY, - ETHTOOL_MSG_LINKINFO_GET_REPLY, - ETHTOOL_MSG_LINKINFO_NTF, - ETHTOOL_MSG_LINKMODES_GET_REPLY, - ETHTOOL_MSG_LINKMODES_NTF, - ETHTOOL_MSG_LINKSTATE_GET_REPLY, - ETHTOOL_MSG_DEBUG_GET_REPLY, - ETHTOOL_MSG_DEBUG_NTF, - ETHTOOL_MSG_WOL_GET_REPLY, - ETHTOOL_MSG_WOL_NTF, - ETHTOOL_MSG_FEATURES_GET_REPLY, - ETHTOOL_MSG_FEATURES_SET_REPLY, - ETHTOOL_MSG_FEATURES_NTF, - ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, - ETHTOOL_MSG_PRIVFLAGS_NTF, - ETHTOOL_MSG_RINGS_GET_REPLY, - ETHTOOL_MSG_RINGS_NTF, - ETHTOOL_MSG_CHANNELS_GET_REPLY, - ETHTOOL_MSG_CHANNELS_NTF, - ETHTOOL_MSG_COALESCE_GET_REPLY, - ETHTOOL_MSG_COALESCE_NTF, - ETHTOOL_MSG_PAUSE_GET_REPLY, - ETHTOOL_MSG_PAUSE_NTF, - ETHTOOL_MSG_EEE_GET_REPLY, - ETHTOOL_MSG_EEE_NTF, - ETHTOOL_MSG_TSINFO_GET_REPLY, - ETHTOOL_MSG_CABLE_TEST_NTF, - ETHTOOL_MSG_CABLE_TEST_TDR_NTF, - ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, - ETHTOOL_MSG_FEC_GET_REPLY, - ETHTOOL_MSG_FEC_NTF, - ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, - ETHTOOL_MSG_STATS_GET_REPLY, - ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, - ETHTOOL_MSG_MODULE_GET_REPLY, - ETHTOOL_MSG_MODULE_NTF, - ETHTOOL_MSG_PSE_GET_REPLY, - ETHTOOL_MSG_RSS_GET_REPLY, - ETHTOOL_MSG_PLCA_GET_CFG_REPLY, - ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, - ETHTOOL_MSG_PLCA_NTF, - ETHTOOL_MSG_MM_GET_REPLY, - ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_MODULE_FW_FLASH_NTF, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_MSG_PHY_NTF, - - /* add new constants above here */ - __ETHTOOL_MSG_KERNEL_CNT, - ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 -}; - -/* request header */ - -enum ethtool_header_flags { - ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ - ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ - ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ -}; +#include #define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \ ETHTOOL_FLAG_OMIT_REPLY | \ ETHTOOL_FLAG_STATS) -enum { - ETHTOOL_A_HEADER_UNSPEC, - ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ - ETHTOOL_A_HEADER_DEV_NAME, /* string */ - ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_HEADER_CNT, - ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 -}; - -/* bit sets */ - -enum { - ETHTOOL_A_BITSET_BIT_UNSPEC, - ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ - ETHTOOL_A_BITSET_BIT_NAME, /* string */ - ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_BIT_CNT, - ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 -}; - -enum { - ETHTOOL_A_BITSET_BITS_UNSPEC, - ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_BITS_CNT, - ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 -}; - -enum { - ETHTOOL_A_BITSET_UNSPEC, - ETHTOOL_A_BITSET_NOMASK, /* flag */ - ETHTOOL_A_BITSET_SIZE, /* u32 */ - ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ - ETHTOOL_A_BITSET_VALUE, /* binary */ - ETHTOOL_A_BITSET_MASK, /* binary */ - - /* add new constants above here */ - __ETHTOOL_A_BITSET_CNT, - ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 -}; - -/* string sets */ - -enum { - ETHTOOL_A_STRING_UNSPEC, - ETHTOOL_A_STRING_INDEX, /* u32 */ - ETHTOOL_A_STRING_VALUE, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_STRING_CNT, - ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGS_UNSPEC, - ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGS_CNT, - ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGSET_UNSPEC, - ETHTOOL_A_STRINGSET_ID, /* u32 */ - ETHTOOL_A_STRINGSET_COUNT, /* u32 */ - ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGSET_CNT, - ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 -}; - -enum { - ETHTOOL_A_STRINGSETS_UNSPEC, - ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ - - /* add new constants above here */ - __ETHTOOL_A_STRINGSETS_CNT, - ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 -}; - -/* STRSET */ - -enum { - ETHTOOL_A_STRSET_UNSPEC, - ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ - ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ - - /* add new constants above here */ - __ETHTOOL_A_STRSET_CNT, - ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 -}; - -/* LINKINFO */ - -enum { - ETHTOOL_A_LINKINFO_UNSPEC, - ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKINFO_PORT, /* u8 */ - ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ - ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKINFO_CNT, - ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 -}; - -/* LINKMODES */ - -enum { - ETHTOOL_A_LINKMODES_UNSPEC, - ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ - ETHTOOL_A_LINKMODES_OURS, /* bitset */ - ETHTOOL_A_LINKMODES_PEER, /* bitset */ - ETHTOOL_A_LINKMODES_SPEED, /* u32 */ - ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ - ETHTOOL_A_LINKMODES_LANES, /* u32 */ - ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKMODES_CNT, - ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 -}; - -/* LINKSTATE */ - -enum { - ETHTOOL_A_LINKSTATE_UNSPEC, - ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKSTATE_LINK, /* u8 */ - ETHTOOL_A_LINKSTATE_SQI, /* u32 */ - ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ - ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_LINKSTATE_CNT, - ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 -}; - -/* DEBUG */ - -enum { - ETHTOOL_A_DEBUG_UNSPEC, - ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_DEBUG_CNT, - ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 -}; - -/* WOL */ - -enum { - ETHTOOL_A_WOL_UNSPEC, - ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_WOL_MODES, /* bitset */ - ETHTOOL_A_WOL_SOPASS, /* binary */ - - /* add new constants above here */ - __ETHTOOL_A_WOL_CNT, - ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 -}; - -/* FEATURES */ - -enum { - ETHTOOL_A_FEATURES_UNSPEC, - ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEATURES_HW, /* bitset */ - ETHTOOL_A_FEATURES_WANTED, /* bitset */ - ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ - ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_FEATURES_CNT, - ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 -}; - -/* PRIVFLAGS */ - -enum { - ETHTOOL_A_PRIVFLAGS_UNSPEC, - ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ - - /* add new constants above here */ - __ETHTOOL_A_PRIVFLAGS_CNT, - ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 -}; - -/* RINGS */ - -enum { - ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, - ETHTOOL_TCP_DATA_SPLIT_DISABLED, - ETHTOOL_TCP_DATA_SPLIT_ENABLED, -}; - -enum { - ETHTOOL_A_RINGS_UNSPEC, - ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_RINGS_RX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ - ETHTOOL_A_RINGS_TX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ - ETHTOOL_A_RINGS_TX, /* u32 */ - ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ - ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_RINGS_CNT, - ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) -}; - -/* CHANNELS */ - -enum { - ETHTOOL_A_CHANNELS_UNSPEC, - ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_CHANNELS_CNT, - ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) -}; - -/* COALESCE */ - -enum { - ETHTOOL_A_COALESCE_UNSPEC, - ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ - ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ - /* nest - _A_PROFILE_IRQ_MODERATION */ - ETHTOOL_A_COALESCE_RX_PROFILE, - /* nest - _A_PROFILE_IRQ_MODERATION */ - ETHTOOL_A_COALESCE_TX_PROFILE, - - /* add new constants above here */ - __ETHTOOL_A_COALESCE_CNT, - ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) -}; - -enum { - ETHTOOL_A_PROFILE_UNSPEC, - /* nest, _A_IRQ_MODERATION_* */ - ETHTOOL_A_PROFILE_IRQ_MODERATION, - __ETHTOOL_A_PROFILE_CNT, - ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) -}; - -enum { - ETHTOOL_A_IRQ_MODERATION_UNSPEC, - ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ - - __ETHTOOL_A_IRQ_MODERATION_CNT, - ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) -}; - -/* PAUSE */ - -enum { - ETHTOOL_A_PAUSE_UNSPEC, - ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ - ETHTOOL_A_PAUSE_RX, /* u8 */ - ETHTOOL_A_PAUSE_TX, /* u8 */ - ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ - ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PAUSE_CNT, - ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) -}; - -enum { - ETHTOOL_A_PAUSE_STAT_UNSPEC, - ETHTOOL_A_PAUSE_STAT_PAD, - - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, - ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - - /* add new constants above here - * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! - */ - __ETHTOOL_A_PAUSE_STAT_CNT, - ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) -}; - -/* EEE */ - -enum { - ETHTOOL_A_EEE_UNSPEC, - ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_EEE_MODES_OURS, /* bitset */ - ETHTOOL_A_EEE_MODES_PEER, /* bitset */ - ETHTOOL_A_EEE_ACTIVE, /* u8 */ - ETHTOOL_A_EEE_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_EEE_CNT, - ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) -}; - -/* TSINFO */ - -enum { - ETHTOOL_A_TSINFO_UNSPEC, - ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ - ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ - ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ - ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ - ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ - - /* add new constants above here */ - __ETHTOOL_A_TSINFO_CNT, - ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) -}; - -enum { - ETHTOOL_A_TS_STAT_UNSPEC, - - ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ - ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ - ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ - - /* add new constants above here */ - __ETHTOOL_A_TS_STAT_CNT, - ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) - -}; - -/* PHC VCLOCKS */ - -enum { - ETHTOOL_A_PHC_VCLOCKS_UNSPEC, - ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ - ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ - - /* add new constants above here */ - __ETHTOOL_A_PHC_VCLOCKS_CNT, - ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) -}; - -/* CABLE TEST */ - -enum { - ETHTOOL_A_CABLE_TEST_UNSPEC, - ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_CNT, - ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 -}; - /* CABLE TEST NOTIFY */ enum { ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC, @@ -582,74 +49,12 @@ enum { ETHTOOL_A_CABLE_INF_SRC_ALCD, }; -enum { - ETHTOOL_A_CABLE_RESULT_UNSPEC, - ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ - ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ - - __ETHTOOL_A_CABLE_RESULT_CNT, - ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) -}; - -enum { - ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, - ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ - ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ - - __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, - ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) -}; - enum { ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC, ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED, ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED }; -enum { - ETHTOOL_A_CABLE_NEST_UNSPEC, - ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ - ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ - __ETHTOOL_A_CABLE_NEST_CNT, - ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) -}; - -enum { - ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ - - __ETHTOOL_A_CABLE_TEST_NTF_CNT, - ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) -}; - -/* CABLE TEST TDR */ - -enum { - ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, - ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 -}; - -enum { - ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_CNT, - ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 -}; - /* CABLE TEST TDR NOTIFY */ enum { @@ -689,132 +94,6 @@ enum { ETHTOOL_A_CABLE_TDR_NEST_MAX = (__ETHTOOL_A_CABLE_TDR_NEST_CNT - 1) }; -enum { - ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ - - /* add new constants above here */ - __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, - ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 -}; - -/* TUNNEL INFO */ - -enum { - ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, - ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, - ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, - - __ETHTOOL_UDP_TUNNEL_TYPE_CNT -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ - ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, - ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ - ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ - ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, - ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_UDP_UNSPEC, - - ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_UDP_CNT, - ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) -}; - -enum { - ETHTOOL_A_TUNNEL_INFO_UNSPEC, - ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ - - /* add new constants above here */ - __ETHTOOL_A_TUNNEL_INFO_CNT, - ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) -}; - -/* FEC */ - -enum { - ETHTOOL_A_FEC_UNSPEC, - ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEC_MODES, /* bitset */ - ETHTOOL_A_FEC_AUTO, /* u8 */ - ETHTOOL_A_FEC_ACTIVE, /* u32 */ - ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ - - __ETHTOOL_A_FEC_CNT, - ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) -}; - -enum { - ETHTOOL_A_FEC_STAT_UNSPEC, - ETHTOOL_A_FEC_STAT_PAD, - - ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ - ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ - ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ - - /* add new constants above here */ - __ETHTOOL_A_FEC_STAT_CNT, - ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) -}; - -/* MODULE EEPROM */ - -enum { - ETHTOOL_A_MODULE_EEPROM_UNSPEC, - ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ - - __ETHTOOL_A_MODULE_EEPROM_CNT, - ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) -}; - -/* STATS */ - -enum { - ETHTOOL_A_STATS_UNSPEC, - ETHTOOL_A_STATS_PAD, - ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STATS_GROUPS, /* bitset */ - - ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ - - ETHTOOL_A_STATS_SRC, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_STATS_CNT, - ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) -}; - enum { ETHTOOL_STATS_ETH_PHY, ETHTOOL_STATS_ETH_MAC, @@ -825,27 +104,6 @@ enum { __ETHTOOL_STATS_CNT }; -enum { - ETHTOOL_A_STATS_GRP_UNSPEC, - ETHTOOL_A_STATS_GRP_PAD, - - ETHTOOL_A_STATS_GRP_ID, /* u32 */ - ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ - - ETHTOOL_A_STATS_GRP_STAT, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ - ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ - - /* add new constants above here */ - __ETHTOOL_A_STATS_GRP_CNT, - ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) -}; - enum { /* 30.3.2.1.5 aSymbolErrorDuringCarrier */ ETHTOOL_A_STATS_ETH_PHY_5_SYM_ERR, @@ -935,155 +193,6 @@ enum { ETHTOOL_A_STATS_RMON_MAX = (__ETHTOOL_A_STATS_RMON_CNT - 1) }; -/* MODULE */ - -enum { - ETHTOOL_A_MODULE_UNSPEC, - ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ - ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ - - /* add new constants above here */ - __ETHTOOL_A_MODULE_CNT, - ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) -}; - -/* Power Sourcing Equipment */ -enum { - ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, - ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ -}; - -enum { - ETHTOOL_A_PSE_UNSPEC, - ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ - ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ - ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ - - /* add new constants above here */ - __ETHTOOL_A_PSE_CNT, - ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) -}; - -enum { - ETHTOOL_A_RSS_UNSPEC, - ETHTOOL_A_RSS_HEADER, - ETHTOOL_A_RSS_CONTEXT, /* u32 */ - ETHTOOL_A_RSS_HFUNC, /* u32 */ - ETHTOOL_A_RSS_INDIR, /* binary */ - ETHTOOL_A_RSS_HKEY, /* binary */ - ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ - ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ - - __ETHTOOL_A_RSS_CNT, - ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), -}; - -/* PLCA */ - -enum { - ETHTOOL_A_PLCA_UNSPEC, - ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PLCA_VERSION, /* u16 */ - ETHTOOL_A_PLCA_ENABLED, /* u8 */ - ETHTOOL_A_PLCA_STATUS, /* u8 */ - ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ - ETHTOOL_A_PLCA_NODE_ID, /* u32 */ - ETHTOOL_A_PLCA_TO_TMR, /* u32 */ - ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ - ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PLCA_CNT, - ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) -}; - -/* MAC Merge (802.3) */ - -enum { - ETHTOOL_A_MM_STAT_UNSPEC, - ETHTOOL_A_MM_STAT_PAD, - - /* aMACMergeFrameAssErrorCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ - /* aMACMergeFrameSmdErrorCount */ - ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ - /* aMACMergeFrameAssOkCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ - /* aMACMergeFragCountRx */ - ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ - /* aMACMergeFragCountTx */ - ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ - /* aMACMergeHoldCount */ - ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ - - /* add new constants above here */ - __ETHTOOL_A_MM_STAT_CNT, - ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) -}; - -enum { - ETHTOOL_A_MM_UNSPEC, - ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ - ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ - ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ - - /* add new constants above here */ - __ETHTOOL_A_MM_CNT, - ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) -}; - -/* MODULE_FW_FLASH */ - -enum { - ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, - ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ - ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ - - /* add new constants above here */ - __ETHTOOL_A_MODULE_FW_FLASH_CNT, - ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) -}; - -enum { - ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_CNT, - ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) -}; /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h new file mode 100644 index 000000000000..4b4bf17d1a88 --- /dev/null +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -0,0 +1,899 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H +#define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H + +/* TUNNEL INFO */ + +enum { + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, + ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, + ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, + + __ETHTOOL_UDP_TUNNEL_TYPE_CNT +}; + +/* request header */ + +enum ethtool_header_flags { + ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ + ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ + ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ +}; + +enum { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_TCP_DATA_SPLIT_DISABLED, + ETHTOOL_TCP_DATA_SPLIT_ENABLED, +}; + +enum { + ETHTOOL_A_HEADER_UNSPEC, + ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_NAME, /* string */ + ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_HEADER_CNT, + ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 +}; + +/* bit sets */ + +enum { + ETHTOOL_A_BITSET_BIT_UNSPEC, + ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ + ETHTOOL_A_BITSET_BIT_NAME, /* string */ + ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_BIT_CNT, + ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 +}; + +enum { + ETHTOOL_A_BITSET_BITS_UNSPEC, + ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_BITS_CNT, + ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 +}; + +enum { + ETHTOOL_A_BITSET_UNSPEC, + ETHTOOL_A_BITSET_NOMASK, /* flag */ + ETHTOOL_A_BITSET_SIZE, /* u32 */ + ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ + ETHTOOL_A_BITSET_VALUE, /* binary */ + ETHTOOL_A_BITSET_MASK, /* binary */ + + /* add new constants above here */ + __ETHTOOL_A_BITSET_CNT, + ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 +}; + +/* string sets */ + +enum { + ETHTOOL_A_STRING_UNSPEC, + ETHTOOL_A_STRING_INDEX, /* u32 */ + ETHTOOL_A_STRING_VALUE, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_STRING_CNT, + ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGS_UNSPEC, + ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGS_CNT, + ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGSET_UNSPEC, + ETHTOOL_A_STRINGSET_ID, /* u32 */ + ETHTOOL_A_STRINGSET_COUNT, /* u32 */ + ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGSET_CNT, + ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 +}; + +enum { + ETHTOOL_A_STRINGSETS_UNSPEC, + ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ + + /* add new constants above here */ + __ETHTOOL_A_STRINGSETS_CNT, + ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 +}; + +/* STRSET */ + +enum { + ETHTOOL_A_STRSET_UNSPEC, + ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ + ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ + + /* add new constants above here */ + __ETHTOOL_A_STRSET_CNT, + ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 +}; + +/* PRIVFLAGS */ + +enum { + ETHTOOL_A_PRIVFLAGS_UNSPEC, + ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_PRIVFLAGS_CNT, + ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 +}; + +/* RINGS */ + +enum { + ETHTOOL_A_RINGS_UNSPEC, + ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_RINGS_RX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ + ETHTOOL_A_RINGS_TX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ + ETHTOOL_A_RINGS_TX, /* u32 */ + ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ + ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_RINGS_CNT, + ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) +}; + +/* MAC Merge (802.3) */ + +enum { + ETHTOOL_A_MM_STAT_UNSPEC, + ETHTOOL_A_MM_STAT_PAD, + + /* aMACMergeFrameAssErrorCount */ + ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ + /* aMACMergeFrameSmdErrorCount */ + ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ + /* aMACMergeFrameAssOkCount */ + ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ + /* aMACMergeFragCountRx */ + ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ + /* aMACMergeFragCountTx */ + ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ + /* aMACMergeHoldCount */ + ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_MM_STAT_CNT, + ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_MM_UNSPEC, + ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ + ETHTOOL_A_MM_TX_ENABLED, /* u8 */ + ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ + ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ + ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ + ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ + ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ + ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ + ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ + ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ + + /* add new constants above here */ + __ETHTOOL_A_MM_CNT, + ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) +}; + +/* LINKINFO */ + +enum { + ETHTOOL_A_LINKINFO_UNSPEC, + ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKINFO_PORT, /* u8 */ + ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ + ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ + ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ + ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKINFO_CNT, + ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 +}; + +/* LINKMODES */ + +enum { + ETHTOOL_A_LINKMODES_UNSPEC, + ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ + ETHTOOL_A_LINKMODES_OURS, /* bitset */ + ETHTOOL_A_LINKMODES_PEER, /* bitset */ + ETHTOOL_A_LINKMODES_SPEED, /* u32 */ + ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ + ETHTOOL_A_LINKMODES_LANES, /* u32 */ + ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKMODES_CNT, + ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 +}; + +/* LINKSTATE */ + +enum { + ETHTOOL_A_LINKSTATE_UNSPEC, + ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_LINKSTATE_LINK, /* u8 */ + ETHTOOL_A_LINKSTATE_SQI, /* u32 */ + ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ + ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_LINKSTATE_CNT, + ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 +}; + +/* DEBUG */ + +enum { + ETHTOOL_A_DEBUG_UNSPEC, + ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_DEBUG_CNT, + ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 +}; + +/* WOL */ + +enum { + ETHTOOL_A_WOL_UNSPEC, + ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_WOL_MODES, /* bitset */ + ETHTOOL_A_WOL_SOPASS, /* binary */ + + /* add new constants above here */ + __ETHTOOL_A_WOL_CNT, + ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 +}; + +/* FEATURES */ + +enum { + ETHTOOL_A_FEATURES_UNSPEC, + ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEATURES_HW, /* bitset */ + ETHTOOL_A_FEATURES_WANTED, /* bitset */ + ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ + ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_FEATURES_CNT, + ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 +}; + +/* CHANNELS */ + +enum { + ETHTOOL_A_CHANNELS_UNSPEC, + ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_CHANNELS_CNT, + ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) +}; + +enum { + ETHTOOL_A_IRQ_MODERATION_UNSPEC, + ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + + __ETHTOOL_A_IRQ_MODERATION_CNT, + ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) +}; + +enum { + ETHTOOL_A_PROFILE_UNSPEC, + /* nest, _A_IRQ_MODERATION_* */ + ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, + ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) +}; + +/* COALESCE */ + +enum { + ETHTOOL_A_COALESCE_UNSPEC, + ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ + ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ + ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ + ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ + ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ + ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ + ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ + ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ + ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ + ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_RX_PROFILE, + /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_TX_PROFILE, + + /* add new constants above here */ + __ETHTOOL_A_COALESCE_CNT, + ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) +}; + +/* PAUSE */ + +enum { + ETHTOOL_A_PAUSE_STAT_UNSPEC, + ETHTOOL_A_PAUSE_STAT_PAD, + + ETHTOOL_A_PAUSE_STAT_TX_FRAMES, + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, + + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ + __ETHTOOL_A_PAUSE_STAT_CNT, + ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_PAUSE_UNSPEC, + ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ + ETHTOOL_A_PAUSE_RX, /* u8 */ + ETHTOOL_A_PAUSE_TX, /* u8 */ + ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ + ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PAUSE_CNT, + ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) +}; + +/* EEE */ + +enum { + ETHTOOL_A_EEE_UNSPEC, + ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_EEE_MODES_OURS, /* bitset */ + ETHTOOL_A_EEE_MODES_PEER, /* bitset */ + ETHTOOL_A_EEE_ACTIVE, /* u8 */ + ETHTOOL_A_EEE_ENABLED, /* u8 */ + ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ + ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_EEE_CNT, + ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) +}; + +/* TSINFO */ + +enum { + ETHTOOL_A_TS_STAT_UNSPEC, + + ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ + ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ + ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_TS_STAT_CNT, + ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) + +}; + +enum { + ETHTOOL_A_TSINFO_UNSPEC, + ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ + ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ + ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ + ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ + ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ + + /* add new constants above here */ + __ETHTOOL_A_TSINFO_CNT, + ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_RESULT_UNSPEC, + ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ + ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + + __ETHTOOL_A_CABLE_RESULT_CNT, + ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + + __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, + ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) +}; + +enum { + ETHTOOL_A_CABLE_NEST_UNSPEC, + ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + __ETHTOOL_A_CABLE_NEST_CNT, + ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) +}; + +/* CABLE TEST */ + +enum { + ETHTOOL_A_CABLE_TEST_UNSPEC, + ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_CNT, + ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + + __ETHTOOL_A_CABLE_TEST_NTF_CNT, + ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) +}; + +/* CABLE TEST TDR */ + +enum { + ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 +}; + +enum { + ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, + ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ + + /* add new constants above here */ + __ETHTOOL_A_CABLE_TEST_TDR_CNT, + ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, + ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_UDP_UNSPEC, + + ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_UDP_CNT, + ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) +}; + +enum { + ETHTOOL_A_TUNNEL_INFO_UNSPEC, + ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ + + /* add new constants above here */ + __ETHTOOL_A_TUNNEL_INFO_CNT, + ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) +}; + +/* FEC */ + +enum { + ETHTOOL_A_FEC_STAT_UNSPEC, + ETHTOOL_A_FEC_STAT_PAD, + + ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ + ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ + ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ + + /* add new constants above here */ + __ETHTOOL_A_FEC_STAT_CNT, + ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) +}; + +enum { + ETHTOOL_A_FEC_UNSPEC, + ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEC_MODES, /* bitset */ + ETHTOOL_A_FEC_AUTO, /* u8 */ + ETHTOOL_A_FEC_ACTIVE, /* u32 */ + ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ + + __ETHTOOL_A_FEC_CNT, + ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) +}; + +/* MODULE EEPROM */ + +enum { + ETHTOOL_A_MODULE_EEPROM_UNSPEC, + ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ + + ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ + ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ + + __ETHTOOL_A_MODULE_EEPROM_CNT, + ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) +}; + + +enum { + ETHTOOL_A_STATS_GRP_UNSPEC, + ETHTOOL_A_STATS_GRP_PAD, + + ETHTOOL_A_STATS_GRP_ID, /* u32 */ + ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ + + ETHTOOL_A_STATS_GRP_STAT, /* nest */ + + ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ + ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ + + ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ + ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ + ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ + + /* add new constants above here */ + __ETHTOOL_A_STATS_GRP_CNT, + ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) +}; + +/* STATS */ + +enum { + ETHTOOL_A_STATS_UNSPEC, + ETHTOOL_A_STATS_PAD, + ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_STATS_GROUPS, /* bitset */ + + ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ + + ETHTOOL_A_STATS_SRC, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_STATS_CNT, + ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) +}; + +/* PHC VCLOCKS */ + +enum { + ETHTOOL_A_PHC_VCLOCKS_UNSPEC, + ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ + ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHC_VCLOCKS_CNT, + ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) +}; + +/* MODULE */ + +enum { + ETHTOOL_A_MODULE_UNSPEC, + ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ + ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_CNT, + ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) +}; + +/* Power Sourcing Equipment */ +enum { + ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ +}; + +enum { + ETHTOOL_A_PSE_UNSPEC, + ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ + ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ + ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ + ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ + + /* add new constants above here */ + __ETHTOOL_A_PSE_CNT, + ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) +}; + +enum { + ETHTOOL_A_RSS_UNSPEC, + ETHTOOL_A_RSS_HEADER, + ETHTOOL_A_RSS_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_HFUNC, /* u32 */ + ETHTOOL_A_RSS_INDIR, /* binary */ + ETHTOOL_A_RSS_HKEY, /* binary */ + ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ + ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ + + __ETHTOOL_A_RSS_CNT, + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), +}; + +/* PLCA */ + +enum { + ETHTOOL_A_PLCA_UNSPEC, + ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PLCA_VERSION, /* u16 */ + ETHTOOL_A_PLCA_ENABLED, /* u8 */ + ETHTOOL_A_PLCA_STATUS, /* u8 */ + ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ + ETHTOOL_A_PLCA_NODE_ID, /* u32 */ + ETHTOOL_A_PLCA_TO_TMR, /* u32 */ + ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ + ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PLCA_CNT, + ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) +}; + +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + +enum { + ETHTOOL_A_PHY_UNSPEC, + ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHY_INDEX, /* u32 */ + ETHTOOL_A_PHY_DRVNAME, /* string */ + ETHTOOL_A_PHY_NAME, /* string */ + ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_CNT, + ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) +}; + +/* message types - userspace to kernel */ +enum { + ETHTOOL_MSG_USER_NONE, + ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_LINKINFO_GET, + ETHTOOL_MSG_LINKINFO_SET, + ETHTOOL_MSG_LINKMODES_GET, + ETHTOOL_MSG_LINKMODES_SET, + ETHTOOL_MSG_LINKSTATE_GET, + ETHTOOL_MSG_DEBUG_GET, + ETHTOOL_MSG_DEBUG_SET, + ETHTOOL_MSG_WOL_GET, + ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, + ETHTOOL_MSG_COALESCE_GET, + ETHTOOL_MSG_COALESCE_SET, + ETHTOOL_MSG_PAUSE_GET, + ETHTOOL_MSG_PAUSE_SET, + ETHTOOL_MSG_EEE_GET, + ETHTOOL_MSG_EEE_SET, + ETHTOOL_MSG_TSINFO_GET, + ETHTOOL_MSG_CABLE_TEST_ACT, + ETHTOOL_MSG_CABLE_TEST_TDR_ACT, + ETHTOOL_MSG_TUNNEL_INFO_GET, + ETHTOOL_MSG_FEC_GET, + ETHTOOL_MSG_FEC_SET, + ETHTOOL_MSG_MODULE_EEPROM_GET, + ETHTOOL_MSG_STATS_GET, + ETHTOOL_MSG_PHC_VCLOCKS_GET, + ETHTOOL_MSG_MODULE_GET, + ETHTOOL_MSG_MODULE_SET, + ETHTOOL_MSG_PSE_GET, + ETHTOOL_MSG_PSE_SET, + ETHTOOL_MSG_RSS_GET, + ETHTOOL_MSG_PLCA_GET_CFG, + ETHTOOL_MSG_PLCA_SET_CFG, + ETHTOOL_MSG_PLCA_GET_STATUS, + ETHTOOL_MSG_MM_GET, + ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, + ETHTOOL_MSG_PHY_GET, + + /* add new constants above here */ + __ETHTOOL_MSG_USER_CNT, + ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 +}; + +/* message types - kernel to userspace */ +enum { + ETHTOOL_MSG_KERNEL_NONE, + ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_LINKINFO_GET_REPLY, + ETHTOOL_MSG_LINKINFO_NTF, + ETHTOOL_MSG_LINKMODES_GET_REPLY, + ETHTOOL_MSG_LINKMODES_NTF, + ETHTOOL_MSG_LINKSTATE_GET_REPLY, + ETHTOOL_MSG_DEBUG_GET_REPLY, + ETHTOOL_MSG_DEBUG_NTF, + ETHTOOL_MSG_WOL_GET_REPLY, + ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, + ETHTOOL_MSG_COALESCE_GET_REPLY, + ETHTOOL_MSG_COALESCE_NTF, + ETHTOOL_MSG_PAUSE_GET_REPLY, + ETHTOOL_MSG_PAUSE_NTF, + ETHTOOL_MSG_EEE_GET_REPLY, + ETHTOOL_MSG_EEE_NTF, + ETHTOOL_MSG_TSINFO_GET_REPLY, + ETHTOOL_MSG_CABLE_TEST_NTF, + ETHTOOL_MSG_CABLE_TEST_TDR_NTF, + ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY, + ETHTOOL_MSG_FEC_GET_REPLY, + ETHTOOL_MSG_FEC_NTF, + ETHTOOL_MSG_MODULE_EEPROM_GET_REPLY, + ETHTOOL_MSG_STATS_GET_REPLY, + ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY, + ETHTOOL_MSG_MODULE_GET_REPLY, + ETHTOOL_MSG_MODULE_NTF, + ETHTOOL_MSG_PSE_GET_REPLY, + ETHTOOL_MSG_RSS_GET_REPLY, + ETHTOOL_MSG_PLCA_GET_CFG_REPLY, + ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, + ETHTOOL_MSG_PLCA_NTF, + ETHTOOL_MSG_MM_GET_REPLY, + ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_MSG_PHY_NTF, + + /* add new constants above here */ + __ETHTOOL_MSG_KERNEL_CNT, + ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 +}; + +#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ -- cgit v1.2.3 From dd7cde36de15b071b5f9163d21d7c9142089b424 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:48 -0800 Subject: ethtool: remove the comments that are not gonna be generated Cleanup the header manually to make it easier to review the changes that ynl generator brings in. No functional changes. Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-8-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 678 ++++++++++--------------- 1 file changed, 274 insertions(+), 404 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 4b4bf17d1a88..35a24d490efe 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -2,8 +2,6 @@ #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H -/* TUNNEL INFO */ - enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, @@ -12,8 +10,6 @@ enum { __ETHTOOL_UDP_TUNNEL_TYPE_CNT }; -/* request header */ - enum ethtool_header_flags { ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ @@ -28,303 +24,250 @@ enum { enum { ETHTOOL_A_HEADER_UNSPEC, - ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ - ETHTOOL_A_HEADER_DEV_NAME, /* string */ - ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ + ETHTOOL_A_HEADER_DEV_INDEX, + ETHTOOL_A_HEADER_DEV_NAME, + ETHTOOL_A_HEADER_FLAGS, + ETHTOOL_A_HEADER_PHY_INDEX, - /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 }; -/* bit sets */ - enum { ETHTOOL_A_BITSET_BIT_UNSPEC, - ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */ - ETHTOOL_A_BITSET_BIT_NAME, /* string */ - ETHTOOL_A_BITSET_BIT_VALUE, /* flag */ + ETHTOOL_A_BITSET_BIT_INDEX, + ETHTOOL_A_BITSET_BIT_NAME, + ETHTOOL_A_BITSET_BIT_VALUE, - /* add new constants above here */ __ETHTOOL_A_BITSET_BIT_CNT, ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 }; enum { ETHTOOL_A_BITSET_BITS_UNSPEC, - ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */ + ETHTOOL_A_BITSET_BITS_BIT, - /* add new constants above here */ __ETHTOOL_A_BITSET_BITS_CNT, ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 }; enum { ETHTOOL_A_BITSET_UNSPEC, - ETHTOOL_A_BITSET_NOMASK, /* flag */ - ETHTOOL_A_BITSET_SIZE, /* u32 */ - ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */ - ETHTOOL_A_BITSET_VALUE, /* binary */ - ETHTOOL_A_BITSET_MASK, /* binary */ + ETHTOOL_A_BITSET_NOMASK, + ETHTOOL_A_BITSET_SIZE, + ETHTOOL_A_BITSET_BITS, + ETHTOOL_A_BITSET_VALUE, + ETHTOOL_A_BITSET_MASK, - /* add new constants above here */ __ETHTOOL_A_BITSET_CNT, ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 }; -/* string sets */ - enum { ETHTOOL_A_STRING_UNSPEC, - ETHTOOL_A_STRING_INDEX, /* u32 */ - ETHTOOL_A_STRING_VALUE, /* string */ + ETHTOOL_A_STRING_INDEX, + ETHTOOL_A_STRING_VALUE, - /* add new constants above here */ __ETHTOOL_A_STRING_CNT, ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 }; enum { ETHTOOL_A_STRINGS_UNSPEC, - ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */ + ETHTOOL_A_STRINGS_STRING, - /* add new constants above here */ __ETHTOOL_A_STRINGS_CNT, ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 }; enum { ETHTOOL_A_STRINGSET_UNSPEC, - ETHTOOL_A_STRINGSET_ID, /* u32 */ - ETHTOOL_A_STRINGSET_COUNT, /* u32 */ - ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */ + ETHTOOL_A_STRINGSET_ID, + ETHTOOL_A_STRINGSET_COUNT, + ETHTOOL_A_STRINGSET_STRINGS, - /* add new constants above here */ __ETHTOOL_A_STRINGSET_CNT, ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 }; enum { ETHTOOL_A_STRINGSETS_UNSPEC, - ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */ + ETHTOOL_A_STRINGSETS_STRINGSET, - /* add new constants above here */ __ETHTOOL_A_STRINGSETS_CNT, ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 }; -/* STRSET */ - enum { ETHTOOL_A_STRSET_UNSPEC, - ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */ - ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */ + ETHTOOL_A_STRSET_HEADER, + ETHTOOL_A_STRSET_STRINGSETS, + ETHTOOL_A_STRSET_COUNTS_ONLY, - /* add new constants above here */ __ETHTOOL_A_STRSET_CNT, ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 }; -/* PRIVFLAGS */ - enum { ETHTOOL_A_PRIVFLAGS_UNSPEC, - ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + ETHTOOL_A_PRIVFLAGS_HEADER, + ETHTOOL_A_PRIVFLAGS_FLAGS, - /* add new constants above here */ __ETHTOOL_A_PRIVFLAGS_CNT, ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 }; -/* RINGS */ - enum { ETHTOOL_A_RINGS_UNSPEC, - ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_RINGS_RX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ - ETHTOOL_A_RINGS_TX_MAX, /* u32 */ - ETHTOOL_A_RINGS_RX, /* u32 */ - ETHTOOL_A_RINGS_RX_MINI, /* u32 */ - ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ - ETHTOOL_A_RINGS_TX, /* u32 */ - ETHTOOL_A_RINGS_RX_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TCP_DATA_SPLIT, /* u8 */ - ETHTOOL_A_RINGS_CQE_SIZE, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_RX_PUSH, /* u8 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, /* u32 */ - ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_RINGS_HEADER, + ETHTOOL_A_RINGS_RX_MAX, + ETHTOOL_A_RINGS_RX_MINI_MAX, + ETHTOOL_A_RINGS_RX_JUMBO_MAX, + ETHTOOL_A_RINGS_TX_MAX, + ETHTOOL_A_RINGS_RX, + ETHTOOL_A_RINGS_RX_MINI, + ETHTOOL_A_RINGS_RX_JUMBO, + ETHTOOL_A_RINGS_TX, + ETHTOOL_A_RINGS_RX_BUF_LEN, + ETHTOOL_A_RINGS_TCP_DATA_SPLIT, + ETHTOOL_A_RINGS_CQE_SIZE, + ETHTOOL_A_RINGS_TX_PUSH, + ETHTOOL_A_RINGS_RX_PUSH, + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, + ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + __ETHTOOL_A_RINGS_CNT, ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) }; -/* MAC Merge (802.3) */ - enum { ETHTOOL_A_MM_STAT_UNSPEC, ETHTOOL_A_MM_STAT_PAD, + ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, + ETHTOOL_A_MM_STAT_SMD_ERRORS, + ETHTOOL_A_MM_STAT_REASSEMBLY_OK, + ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, + ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, + ETHTOOL_A_MM_STAT_HOLD_COUNT, - /* aMACMergeFrameAssErrorCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS, /* u64 */ - /* aMACMergeFrameSmdErrorCount */ - ETHTOOL_A_MM_STAT_SMD_ERRORS, /* u64 */ - /* aMACMergeFrameAssOkCount */ - ETHTOOL_A_MM_STAT_REASSEMBLY_OK, /* u64 */ - /* aMACMergeFragCountRx */ - ETHTOOL_A_MM_STAT_RX_FRAG_COUNT, /* u64 */ - /* aMACMergeFragCountTx */ - ETHTOOL_A_MM_STAT_TX_FRAG_COUNT, /* u64 */ - /* aMACMergeHoldCount */ - ETHTOOL_A_MM_STAT_HOLD_COUNT, /* u64 */ - - /* add new constants above here */ __ETHTOOL_A_MM_STAT_CNT, ETHTOOL_A_MM_STAT_MAX = (__ETHTOOL_A_MM_STAT_CNT - 1) }; enum { ETHTOOL_A_MM_UNSPEC, - ETHTOOL_A_MM_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MM_PMAC_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ENABLED, /* u8 */ - ETHTOOL_A_MM_TX_ACTIVE, /* u8 */ - ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, /* u32 */ - ETHTOOL_A_MM_VERIFY_ENABLED, /* u8 */ - ETHTOOL_A_MM_VERIFY_STATUS, /* u8 */ - ETHTOOL_A_MM_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_MAX_VERIFY_TIME, /* u32 */ - ETHTOOL_A_MM_STATS, /* nest - _A_MM_STAT_* */ - - /* add new constants above here */ + ETHTOOL_A_MM_HEADER, + ETHTOOL_A_MM_PMAC_ENABLED, + ETHTOOL_A_MM_TX_ENABLED, + ETHTOOL_A_MM_TX_ACTIVE, + ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, + ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, + ETHTOOL_A_MM_VERIFY_ENABLED, + ETHTOOL_A_MM_VERIFY_STATUS, + ETHTOOL_A_MM_VERIFY_TIME, + ETHTOOL_A_MM_MAX_VERIFY_TIME, + ETHTOOL_A_MM_STATS, + __ETHTOOL_A_MM_CNT, ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -/* LINKINFO */ - enum { ETHTOOL_A_LINKINFO_UNSPEC, - ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKINFO_PORT, /* u8 */ - ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */ - ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */ - ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */ - - /* add new constants above here */ + ETHTOOL_A_LINKINFO_HEADER, + ETHTOOL_A_LINKINFO_PORT, + ETHTOOL_A_LINKINFO_PHYADDR, + ETHTOOL_A_LINKINFO_TP_MDIX, + ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, + ETHTOOL_A_LINKINFO_TRANSCEIVER, + __ETHTOOL_A_LINKINFO_CNT, ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 }; -/* LINKMODES */ - enum { ETHTOOL_A_LINKMODES_UNSPEC, - ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */ - ETHTOOL_A_LINKMODES_OURS, /* bitset */ - ETHTOOL_A_LINKMODES_PEER, /* bitset */ - ETHTOOL_A_LINKMODES_SPEED, /* u32 */ - ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, /* u8 */ - ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, /* u8 */ - ETHTOOL_A_LINKMODES_LANES, /* u32 */ - ETHTOOL_A_LINKMODES_RATE_MATCHING, /* u8 */ - - /* add new constants above here */ + ETHTOOL_A_LINKMODES_HEADER, + ETHTOOL_A_LINKMODES_AUTONEG, + ETHTOOL_A_LINKMODES_OURS, + ETHTOOL_A_LINKMODES_PEER, + ETHTOOL_A_LINKMODES_SPEED, + ETHTOOL_A_LINKMODES_DUPLEX, + ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, + ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, + ETHTOOL_A_LINKMODES_LANES, + ETHTOOL_A_LINKMODES_RATE_MATCHING, + __ETHTOOL_A_LINKMODES_CNT, ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 }; -/* LINKSTATE */ - enum { ETHTOOL_A_LINKSTATE_UNSPEC, - ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_LINKSTATE_LINK, /* u8 */ - ETHTOOL_A_LINKSTATE_SQI, /* u32 */ - ETHTOOL_A_LINKSTATE_SQI_MAX, /* u32 */ - ETHTOOL_A_LINKSTATE_EXT_STATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, /* u8 */ - ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_LINKSTATE_HEADER, + ETHTOOL_A_LINKSTATE_LINK, + ETHTOOL_A_LINKSTATE_SQI, + ETHTOOL_A_LINKSTATE_SQI_MAX, + ETHTOOL_A_LINKSTATE_EXT_STATE, + ETHTOOL_A_LINKSTATE_EXT_SUBSTATE, + ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, + __ETHTOOL_A_LINKSTATE_CNT, ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 }; -/* DEBUG */ - enum { ETHTOOL_A_DEBUG_UNSPEC, - ETHTOOL_A_DEBUG_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_DEBUG_MSGMASK, /* bitset */ + ETHTOOL_A_DEBUG_HEADER, + ETHTOOL_A_DEBUG_MSGMASK, - /* add new constants above here */ __ETHTOOL_A_DEBUG_CNT, ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 }; -/* WOL */ - enum { ETHTOOL_A_WOL_UNSPEC, - ETHTOOL_A_WOL_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_WOL_MODES, /* bitset */ - ETHTOOL_A_WOL_SOPASS, /* binary */ + ETHTOOL_A_WOL_HEADER, + ETHTOOL_A_WOL_MODES, + ETHTOOL_A_WOL_SOPASS, - /* add new constants above here */ __ETHTOOL_A_WOL_CNT, ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 }; -/* FEATURES */ - enum { ETHTOOL_A_FEATURES_UNSPEC, - ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEATURES_HW, /* bitset */ - ETHTOOL_A_FEATURES_WANTED, /* bitset */ - ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ - ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + ETHTOOL_A_FEATURES_HEADER, + ETHTOOL_A_FEATURES_HW, + ETHTOOL_A_FEATURES_WANTED, + ETHTOOL_A_FEATURES_ACTIVE, + ETHTOOL_A_FEATURES_NOCHANGE, - /* add new constants above here */ __ETHTOOL_A_FEATURES_CNT, ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 }; -/* CHANNELS */ - enum { ETHTOOL_A_CHANNELS_UNSPEC, - ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ - ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ - ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_CHANNELS_HEADER, + ETHTOOL_A_CHANNELS_RX_MAX, + ETHTOOL_A_CHANNELS_TX_MAX, + ETHTOOL_A_CHANNELS_OTHER_MAX, + ETHTOOL_A_CHANNELS_COMBINED_MAX, + ETHTOOL_A_CHANNELS_RX_COUNT, + ETHTOOL_A_CHANNELS_TX_COUNT, + ETHTOOL_A_CHANNELS_OTHER_COUNT, + ETHTOOL_A_CHANNELS_COMBINED_COUNT, + __ETHTOOL_A_CHANNELS_CNT, ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) }; enum { ETHTOOL_A_IRQ_MODERATION_UNSPEC, - ETHTOOL_A_IRQ_MODERATION_USEC, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_PKTS, /* u32 */ - ETHTOOL_A_IRQ_MODERATION_COMPS, /* u32 */ + ETHTOOL_A_IRQ_MODERATION_USEC, + ETHTOOL_A_IRQ_MODERATION_PKTS, + ETHTOOL_A_IRQ_MODERATION_COMPS, __ETHTOOL_A_IRQ_MODERATION_CNT, ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1) @@ -332,111 +275,91 @@ enum { enum { ETHTOOL_A_PROFILE_UNSPEC, - /* nest, _A_IRQ_MODERATION_* */ ETHTOOL_A_PROFILE_IRQ_MODERATION, __ETHTOOL_A_PROFILE_CNT, ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) }; -/* COALESCE */ - enum { ETHTOOL_A_COALESCE_UNSPEC, - ETHTOOL_A_COALESCE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_COALESCE_RX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, /* u32 */ - ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, /* u32 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, /* u8 */ - ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, /* u8 */ - ETHTOOL_A_COALESCE_PKT_RATE_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_LOW, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, /* u32 */ - ETHTOOL_A_COALESCE_PKT_RATE_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_USECS_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, /* u32 */ - ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, /* u32 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, /* u8 */ - ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, /* u8 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, /* u32 */ - ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, /* u32 */ - /* nest - _A_PROFILE_IRQ_MODERATION */ + ETHTOOL_A_COALESCE_HEADER, + ETHTOOL_A_COALESCE_RX_USECS, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES, + ETHTOOL_A_COALESCE_RX_USECS_IRQ, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_IRQ, + ETHTOOL_A_COALESCE_TX_USECS, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES, + ETHTOOL_A_COALESCE_TX_USECS_IRQ, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_A_COALESCE_STATS_BLOCK_USECS, + ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX, + ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX, + ETHTOOL_A_COALESCE_PKT_RATE_LOW, + ETHTOOL_A_COALESCE_RX_USECS_LOW, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_LOW, + ETHTOOL_A_COALESCE_TX_USECS_LOW, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_LOW, + ETHTOOL_A_COALESCE_PKT_RATE_HIGH, + ETHTOOL_A_COALESCE_RX_USECS_HIGH, + ETHTOOL_A_COALESCE_RX_MAX_FRAMES_HIGH, + ETHTOOL_A_COALESCE_TX_USECS_HIGH, + ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH, + ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL, + ETHTOOL_A_COALESCE_USE_CQE_MODE_TX, + ETHTOOL_A_COALESCE_USE_CQE_MODE_RX, + ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES, + ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, + ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, ETHTOOL_A_COALESCE_RX_PROFILE, - /* nest - _A_PROFILE_IRQ_MODERATION */ ETHTOOL_A_COALESCE_TX_PROFILE, - /* add new constants above here */ __ETHTOOL_A_COALESCE_CNT, ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1) }; -/* PAUSE */ - enum { ETHTOOL_A_PAUSE_STAT_UNSPEC, ETHTOOL_A_PAUSE_STAT_PAD, - ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here - * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! - */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; enum { ETHTOOL_A_PAUSE_UNSPEC, - ETHTOOL_A_PAUSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PAUSE_AUTONEG, /* u8 */ - ETHTOOL_A_PAUSE_RX, /* u8 */ - ETHTOOL_A_PAUSE_TX, /* u8 */ - ETHTOOL_A_PAUSE_STATS, /* nest - _PAUSE_STAT_* */ - ETHTOOL_A_PAUSE_STATS_SRC, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_PAUSE_HEADER, + ETHTOOL_A_PAUSE_AUTONEG, + ETHTOOL_A_PAUSE_RX, + ETHTOOL_A_PAUSE_TX, + ETHTOOL_A_PAUSE_STATS, + ETHTOOL_A_PAUSE_STATS_SRC, + __ETHTOOL_A_PAUSE_CNT, ETHTOOL_A_PAUSE_MAX = (__ETHTOOL_A_PAUSE_CNT - 1) }; -/* EEE */ - enum { ETHTOOL_A_EEE_UNSPEC, - ETHTOOL_A_EEE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_EEE_MODES_OURS, /* bitset */ - ETHTOOL_A_EEE_MODES_PEER, /* bitset */ - ETHTOOL_A_EEE_ACTIVE, /* u8 */ - ETHTOOL_A_EEE_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_ENABLED, /* u8 */ - ETHTOOL_A_EEE_TX_LPI_TIMER, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_EEE_HEADER, + ETHTOOL_A_EEE_MODES_OURS, + ETHTOOL_A_EEE_MODES_PEER, + ETHTOOL_A_EEE_ACTIVE, + ETHTOOL_A_EEE_ENABLED, + ETHTOOL_A_EEE_TX_LPI_ENABLED, + ETHTOOL_A_EEE_TX_LPI_TIMER, + __ETHTOOL_A_EEE_CNT, ETHTOOL_A_EEE_MAX = (__ETHTOOL_A_EEE_CNT - 1) }; -/* TSINFO */ - enum { ETHTOOL_A_TS_STAT_UNSPEC, + ETHTOOL_A_TS_STAT_TX_PKTS, + ETHTOOL_A_TS_STAT_TX_LOST, + ETHTOOL_A_TS_STAT_TX_ERR, - ETHTOOL_A_TS_STAT_TX_PKTS, /* uint */ - ETHTOOL_A_TS_STAT_TX_LOST, /* uint */ - ETHTOOL_A_TS_STAT_TX_ERR, /* uint */ - - /* add new constants above here */ __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) @@ -444,23 +367,22 @@ enum { enum { ETHTOOL_A_TSINFO_UNSPEC, - ETHTOOL_A_TSINFO_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TSINFO_TIMESTAMPING, /* bitset */ - ETHTOOL_A_TSINFO_TX_TYPES, /* bitset */ - ETHTOOL_A_TSINFO_RX_FILTERS, /* bitset */ - ETHTOOL_A_TSINFO_PHC_INDEX, /* u32 */ - ETHTOOL_A_TSINFO_STATS, /* nest - _A_TSINFO_STAT */ - - /* add new constants above here */ + ETHTOOL_A_TSINFO_HEADER, + ETHTOOL_A_TSINFO_TIMESTAMPING, + ETHTOOL_A_TSINFO_TX_TYPES, + ETHTOOL_A_TSINFO_RX_FILTERS, + ETHTOOL_A_TSINFO_PHC_INDEX, + ETHTOOL_A_TSINFO_STATS, + __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) }; enum { ETHTOOL_A_CABLE_RESULT_UNSPEC, - ETHTOOL_A_CABLE_RESULT_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_RESULT_CODE, /* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */ - ETHTOOL_A_CABLE_RESULT_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + ETHTOOL_A_CABLE_RESULT_PAIR, + ETHTOOL_A_CABLE_RESULT_CODE, + ETHTOOL_A_CABLE_RESULT_SRC, __ETHTOOL_A_CABLE_RESULT_CNT, ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1) @@ -468,9 +390,9 @@ enum { enum { ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC, - ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, /* u8 ETHTOOL_A_CABLE_PAIR_ */ - ETHTOOL_A_CABLE_FAULT_LENGTH_CM, /* u32 */ - ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, /* u32 ETHTOOL_A_CABLE_INF_SRC_ */ + ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, + ETHTOOL_A_CABLE_FAULT_LENGTH_CM, + ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, __ETHTOOL_A_CABLE_FAULT_LENGTH_CNT, ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1) @@ -478,245 +400,204 @@ enum { enum { ETHTOOL_A_CABLE_NEST_UNSPEC, - ETHTOOL_A_CABLE_NEST_RESULT, /* nest - ETHTOOL_A_CABLE_RESULT_ */ - ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, /* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */ + ETHTOOL_A_CABLE_NEST_RESULT, + ETHTOOL_A_CABLE_NEST_FAULT_LENGTH, + __ETHTOOL_A_CABLE_NEST_CNT, ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1) }; -/* CABLE TEST */ - enum { ETHTOOL_A_CABLE_TEST_UNSPEC, - ETHTOOL_A_CABLE_TEST_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CABLE_TEST_HEADER, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_CNT, ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_NTF_NEST, /* nest - of results: */ + ETHTOOL_A_CABLE_TEST_NTF_HEADER, + ETHTOOL_A_CABLE_TEST_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_NTF_NEST, __ETHTOOL_A_CABLE_TEST_NTF_CNT, ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1) }; -/* CABLE TEST TDR */ - enum { ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, /* u32 */ - ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, /* u8 */ + ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST, + ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST, + ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP, + ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, /* nest - ETHTOOL_A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, /* u8 - _STARTED/_COMPLETE */ - ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, /* nest - of results: */ + ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER, + ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS, + ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 }; enum { ETHTOOL_A_CABLE_TEST_TDR_UNSPEC, - ETHTOOL_A_CABLE_TEST_TDR_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_CABLE_TEST_TDR_CFG, /* nest - *_TDR_CFG_* */ + ETHTOOL_A_CABLE_TEST_TDR_HEADER, + ETHTOOL_A_CABLE_TEST_TDR_CFG, - /* add new constants above here */ __ETHTOOL_A_CABLE_TEST_TDR_CNT, ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 }; enum { ETHTOOL_A_TUNNEL_UDP_ENTRY_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, + ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, - ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, /* be16 */ - ETHTOOL_A_TUNNEL_UDP_ENTRY_TYPE, /* u32 */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT, ETHTOOL_A_TUNNEL_UDP_ENTRY_MAX = (__ETHTOOL_A_TUNNEL_UDP_ENTRY_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_UDP_TABLE_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, + ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, + ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, - ETHTOOL_A_TUNNEL_UDP_TABLE_SIZE, /* u32 */ - ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES, /* bitset */ - ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY, /* nest - _UDP_ENTRY_* */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_TABLE_CNT, ETHTOOL_A_TUNNEL_UDP_TABLE_MAX = (__ETHTOOL_A_TUNNEL_UDP_TABLE_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_UDP_UNSPEC, + ETHTOOL_A_TUNNEL_UDP_TABLE, - ETHTOOL_A_TUNNEL_UDP_TABLE, /* nest - _UDP_TABLE_* */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_UDP_CNT, ETHTOOL_A_TUNNEL_UDP_MAX = (__ETHTOOL_A_TUNNEL_UDP_CNT - 1) }; enum { ETHTOOL_A_TUNNEL_INFO_UNSPEC, - ETHTOOL_A_TUNNEL_INFO_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TUNNEL_INFO_HEADER, + ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, - ETHTOOL_A_TUNNEL_INFO_UDP_PORTS, /* nest - _UDP_TABLE */ - - /* add new constants above here */ __ETHTOOL_A_TUNNEL_INFO_CNT, ETHTOOL_A_TUNNEL_INFO_MAX = (__ETHTOOL_A_TUNNEL_INFO_CNT - 1) }; -/* FEC */ - enum { ETHTOOL_A_FEC_STAT_UNSPEC, ETHTOOL_A_FEC_STAT_PAD, + ETHTOOL_A_FEC_STAT_CORRECTED, + ETHTOOL_A_FEC_STAT_UNCORR, + ETHTOOL_A_FEC_STAT_CORR_BITS, - ETHTOOL_A_FEC_STAT_CORRECTED, /* array, u64 */ - ETHTOOL_A_FEC_STAT_UNCORR, /* array, u64 */ - ETHTOOL_A_FEC_STAT_CORR_BITS, /* array, u64 */ - - /* add new constants above here */ __ETHTOOL_A_FEC_STAT_CNT, ETHTOOL_A_FEC_STAT_MAX = (__ETHTOOL_A_FEC_STAT_CNT - 1) }; enum { ETHTOOL_A_FEC_UNSPEC, - ETHTOOL_A_FEC_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_FEC_MODES, /* bitset */ - ETHTOOL_A_FEC_AUTO, /* u8 */ - ETHTOOL_A_FEC_ACTIVE, /* u32 */ - ETHTOOL_A_FEC_STATS, /* nest - _A_FEC_STAT */ + ETHTOOL_A_FEC_HEADER, + ETHTOOL_A_FEC_MODES, + ETHTOOL_A_FEC_AUTO, + ETHTOOL_A_FEC_ACTIVE, + ETHTOOL_A_FEC_STATS, __ETHTOOL_A_FEC_CNT, ETHTOOL_A_FEC_MAX = (__ETHTOOL_A_FEC_CNT - 1) }; -/* MODULE EEPROM */ - enum { ETHTOOL_A_MODULE_EEPROM_UNSPEC, - ETHTOOL_A_MODULE_EEPROM_HEADER, /* nest - _A_HEADER_* */ - - ETHTOOL_A_MODULE_EEPROM_OFFSET, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_LENGTH, /* u32 */ - ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ + ETHTOOL_A_MODULE_EEPROM_HEADER, + ETHTOOL_A_MODULE_EEPROM_OFFSET, + ETHTOOL_A_MODULE_EEPROM_LENGTH, + ETHTOOL_A_MODULE_EEPROM_PAGE, + ETHTOOL_A_MODULE_EEPROM_BANK, + ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, + ETHTOOL_A_MODULE_EEPROM_DATA, __ETHTOOL_A_MODULE_EEPROM_CNT, ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) }; - enum { ETHTOOL_A_STATS_GRP_UNSPEC, ETHTOOL_A_STATS_GRP_PAD, + ETHTOOL_A_STATS_GRP_ID, + ETHTOOL_A_STATS_GRP_SS_ID, + ETHTOOL_A_STATS_GRP_STAT, + ETHTOOL_A_STATS_GRP_HIST_RX, + ETHTOOL_A_STATS_GRP_HIST_TX, + ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, + ETHTOOL_A_STATS_GRP_HIST_BKT_HI, + ETHTOOL_A_STATS_GRP_HIST_VAL, - ETHTOOL_A_STATS_GRP_ID, /* u32 */ - ETHTOOL_A_STATS_GRP_SS_ID, /* u32 */ - - ETHTOOL_A_STATS_GRP_STAT, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_RX, /* nest */ - ETHTOOL_A_STATS_GRP_HIST_TX, /* nest */ - - ETHTOOL_A_STATS_GRP_HIST_BKT_LOW, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_BKT_HI, /* u32 */ - ETHTOOL_A_STATS_GRP_HIST_VAL, /* u64 */ - - /* add new constants above here */ __ETHTOOL_A_STATS_GRP_CNT, ETHTOOL_A_STATS_GRP_MAX = (__ETHTOOL_A_STATS_GRP_CNT - 1) }; -/* STATS */ - enum { ETHTOOL_A_STATS_UNSPEC, ETHTOOL_A_STATS_PAD, - ETHTOOL_A_STATS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_STATS_GROUPS, /* bitset */ + ETHTOOL_A_STATS_HEADER, + ETHTOOL_A_STATS_GROUPS, + ETHTOOL_A_STATS_GRP, + ETHTOOL_A_STATS_SRC, - ETHTOOL_A_STATS_GRP, /* nest - _A_STATS_GRP_* */ - - ETHTOOL_A_STATS_SRC, /* u32 */ - - /* add new constants above here */ __ETHTOOL_A_STATS_CNT, ETHTOOL_A_STATS_MAX = (__ETHTOOL_A_STATS_CNT - 1) }; -/* PHC VCLOCKS */ - enum { ETHTOOL_A_PHC_VCLOCKS_UNSPEC, - ETHTOOL_A_PHC_VCLOCKS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHC_VCLOCKS_NUM, /* u32 */ - ETHTOOL_A_PHC_VCLOCKS_INDEX, /* array, s32 */ + ETHTOOL_A_PHC_VCLOCKS_HEADER, + ETHTOOL_A_PHC_VCLOCKS_NUM, + ETHTOOL_A_PHC_VCLOCKS_INDEX, - /* add new constants above here */ __ETHTOOL_A_PHC_VCLOCKS_CNT, ETHTOOL_A_PHC_VCLOCKS_MAX = (__ETHTOOL_A_PHC_VCLOCKS_CNT - 1) }; -/* MODULE */ - enum { ETHTOOL_A_MODULE_UNSPEC, - ETHTOOL_A_MODULE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_POWER_MODE_POLICY, /* u8 */ - ETHTOOL_A_MODULE_POWER_MODE, /* u8 */ + ETHTOOL_A_MODULE_HEADER, + ETHTOOL_A_MODULE_POWER_MODE_POLICY, + ETHTOOL_A_MODULE_POWER_MODE, - /* add new constants above here */ __ETHTOOL_A_MODULE_CNT, ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1) }; -/* Power Sourcing Equipment */ enum { ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, - ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, /* u32 */ + ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, + ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, }; enum { ETHTOOL_A_PSE_UNSPEC, - ETHTOOL_A_PSE_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PODL_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_PODL_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_ADMIN_CONTROL, /* u32 */ - ETHTOOL_A_C33_PSE_PW_D_STATUS, /* u32 */ - ETHTOOL_A_C33_PSE_PW_CLASS, /* u32 */ - ETHTOOL_A_C33_PSE_ACTUAL_PW, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_STATE, /* u32 */ - ETHTOOL_A_C33_PSE_EXT_SUBSTATE, /* u32 */ - ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, /* u32 */ - ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, /* nest - _C33_PSE_PW_LIMIT_* */ - - /* add new constants above here */ + ETHTOOL_A_PSE_HEADER, + ETHTOOL_A_PODL_PSE_ADMIN_STATE, + ETHTOOL_A_PODL_PSE_ADMIN_CONTROL, + ETHTOOL_A_PODL_PSE_PW_D_STATUS, + ETHTOOL_A_C33_PSE_ADMIN_STATE, + ETHTOOL_A_C33_PSE_ADMIN_CONTROL, + ETHTOOL_A_C33_PSE_PW_D_STATUS, + ETHTOOL_A_C33_PSE_PW_CLASS, + ETHTOOL_A_C33_PSE_ACTUAL_PW, + ETHTOOL_A_C33_PSE_EXT_STATE, + ETHTOOL_A_C33_PSE_EXT_SUBSTATE, + ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT, + ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES, + __ETHTOOL_A_PSE_CNT, ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) }; @@ -724,70 +605,62 @@ enum { enum { ETHTOOL_A_RSS_UNSPEC, ETHTOOL_A_RSS_HEADER, - ETHTOOL_A_RSS_CONTEXT, /* u32 */ - ETHTOOL_A_RSS_HFUNC, /* u32 */ - ETHTOOL_A_RSS_INDIR, /* binary */ - ETHTOOL_A_RSS_HKEY, /* binary */ - ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ - ETHTOOL_A_RSS_START_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_CONTEXT, + ETHTOOL_A_RSS_HFUNC, + ETHTOOL_A_RSS_INDIR, + ETHTOOL_A_RSS_HKEY, + ETHTOOL_A_RSS_INPUT_XFRM, + ETHTOOL_A_RSS_START_CONTEXT, __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), }; -/* PLCA */ - enum { ETHTOOL_A_PLCA_UNSPEC, - ETHTOOL_A_PLCA_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PLCA_VERSION, /* u16 */ - ETHTOOL_A_PLCA_ENABLED, /* u8 */ - ETHTOOL_A_PLCA_STATUS, /* u8 */ - ETHTOOL_A_PLCA_NODE_CNT, /* u32 */ - ETHTOOL_A_PLCA_NODE_ID, /* u32 */ - ETHTOOL_A_PLCA_TO_TMR, /* u32 */ - ETHTOOL_A_PLCA_BURST_CNT, /* u32 */ - ETHTOOL_A_PLCA_BURST_TMR, /* u32 */ - - /* add new constants above here */ + ETHTOOL_A_PLCA_HEADER, + ETHTOOL_A_PLCA_VERSION, + ETHTOOL_A_PLCA_ENABLED, + ETHTOOL_A_PLCA_STATUS, + ETHTOOL_A_PLCA_NODE_CNT, + ETHTOOL_A_PLCA_NODE_ID, + ETHTOOL_A_PLCA_TO_TMR, + ETHTOOL_A_PLCA_BURST_CNT, + ETHTOOL_A_PLCA_BURST_TMR, + __ETHTOOL_A_PLCA_CNT, ETHTOOL_A_PLCA_MAX = (__ETHTOOL_A_PLCA_CNT - 1) }; -/* MODULE_FW_FLASH */ - enum { ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, - ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ - ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ - ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ - ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ - - /* add new constants above here */ + ETHTOOL_A_MODULE_FW_FLASH_HEADER, + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, + ETHTOOL_A_MODULE_FW_FLASH_STATUS, + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, + ETHTOOL_A_MODULE_FW_FLASH_DONE, + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, + __ETHTOOL_A_MODULE_FW_FLASH_CNT, ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) }; enum { ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ + ETHTOOL_A_PHY_HEADER, + ETHTOOL_A_PHY_INDEX, + ETHTOOL_A_PHY_DRVNAME, + ETHTOOL_A_PHY_NAME, + ETHTOOL_A_PHY_UPSTREAM_TYPE, + ETHTOOL_A_PHY_UPSTREAM_INDEX, + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, + __ETHTOOL_A_PHY_CNT, ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) }; -/* message types - userspace to kernel */ enum { ETHTOOL_MSG_USER_NONE, ETHTOOL_MSG_STRSET_GET, @@ -836,12 +709,10 @@ enum { ETHTOOL_MSG_MODULE_FW_FLASH_ACT, ETHTOOL_MSG_PHY_GET, - /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 }; -/* message types - kernel to userspace */ enum { ETHTOOL_MSG_KERNEL_NONE, ETHTOOL_MSG_STRSET_GET_REPLY, @@ -891,7 +762,6 @@ enum { ETHTOOL_MSG_PHY_GET_REPLY, ETHTOOL_MSG_PHY_NTF, - /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 }; -- cgit v1.2.3 From 8d0580c6ebdd27879c83483f53bc71e2e470f6fe Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Dec 2024 07:55:49 -0800 Subject: ethtool: regenerate uapi header from the spec No functional changes. Mostly the following formatting: - extra docs - extra enums - XXX_MAX = __XXX_CNT - 1 -> XXX_MAX = (__XXX_CNT - 1) - newlines Signed-off-by: Stanislav Fomichev Link: https://patch.msgid.link/20241204155549.641348-9-sdf@fomichev.me Signed-off-by: Jakub Kicinski --- include/uapi/linux/ethtool_netlink_generated.h | 89 ++++++++++++++++---------- 1 file changed, 56 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 35a24d490efe..b58f352fe4f2 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -1,23 +1,43 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/ethtool.yaml */ +/* YNL-GEN uapi header */ + #ifndef _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H #define _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H +#define ETHTOOL_FAMILY_NAME "ethtool" +#define ETHTOOL_FAMILY_VERSION 1 + enum { ETHTOOL_UDP_TUNNEL_TYPE_VXLAN, ETHTOOL_UDP_TUNNEL_TYPE_GENEVE, ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE, - __ETHTOOL_UDP_TUNNEL_TYPE_CNT + /* private: */ + __ETHTOOL_UDP_TUNNEL_TYPE_CNT, + ETHTOOL_UDP_TUNNEL_TYPE_MAX = (__ETHTOOL_UDP_TUNNEL_TYPE_CNT - 1) }; +/** + * enum ethtool_header_flags - common ethtool header flags + * @ETHTOOL_FLAG_COMPACT_BITSETS: use compact bitsets in reply + * @ETHTOOL_FLAG_OMIT_REPLY: provide optional reply for SET or ACT requests + * @ETHTOOL_FLAG_STATS: request statistics, if supported by the driver + */ enum ethtool_header_flags { - ETHTOOL_FLAG_COMPACT_BITSETS = 1 << 0, /* use compact bitsets in reply */ - ETHTOOL_FLAG_OMIT_REPLY = 1 << 1, /* provide optional reply for SET or ACT requests */ - ETHTOOL_FLAG_STATS = 1 << 2, /* request statistics, if supported by the driver */ + ETHTOOL_FLAG_COMPACT_BITSETS = 1, + ETHTOOL_FLAG_OMIT_REPLY = 2, + ETHTOOL_FLAG_STATS = 4, }; enum { - ETHTOOL_TCP_DATA_SPLIT_UNKNOWN = 0, + ETHTOOL_PHY_UPSTREAM_TYPE_MAC, + ETHTOOL_PHY_UPSTREAM_TYPE_PHY, +}; + +enum ethtool_tcp_data_split { + ETHTOOL_TCP_DATA_SPLIT_UNKNOWN, ETHTOOL_TCP_DATA_SPLIT_DISABLED, ETHTOOL_TCP_DATA_SPLIT_ENABLED, }; @@ -30,7 +50,7 @@ enum { ETHTOOL_A_HEADER_PHY_INDEX, __ETHTOOL_A_HEADER_CNT, - ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1 + ETHTOOL_A_HEADER_MAX = (__ETHTOOL_A_HEADER_CNT - 1) }; enum { @@ -40,7 +60,7 @@ enum { ETHTOOL_A_BITSET_BIT_VALUE, __ETHTOOL_A_BITSET_BIT_CNT, - ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1 + ETHTOOL_A_BITSET_BIT_MAX = (__ETHTOOL_A_BITSET_BIT_CNT - 1) }; enum { @@ -48,7 +68,7 @@ enum { ETHTOOL_A_BITSET_BITS_BIT, __ETHTOOL_A_BITSET_BITS_CNT, - ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1 + ETHTOOL_A_BITSET_BITS_MAX = (__ETHTOOL_A_BITSET_BITS_CNT - 1) }; enum { @@ -60,7 +80,7 @@ enum { ETHTOOL_A_BITSET_MASK, __ETHTOOL_A_BITSET_CNT, - ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1 + ETHTOOL_A_BITSET_MAX = (__ETHTOOL_A_BITSET_CNT - 1) }; enum { @@ -69,7 +89,7 @@ enum { ETHTOOL_A_STRING_VALUE, __ETHTOOL_A_STRING_CNT, - ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1 + ETHTOOL_A_STRING_MAX = (__ETHTOOL_A_STRING_CNT - 1) }; enum { @@ -77,7 +97,7 @@ enum { ETHTOOL_A_STRINGS_STRING, __ETHTOOL_A_STRINGS_CNT, - ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1 + ETHTOOL_A_STRINGS_MAX = (__ETHTOOL_A_STRINGS_CNT - 1) }; enum { @@ -87,7 +107,7 @@ enum { ETHTOOL_A_STRINGSET_STRINGS, __ETHTOOL_A_STRINGSET_CNT, - ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1 + ETHTOOL_A_STRINGSET_MAX = (__ETHTOOL_A_STRINGSET_CNT - 1) }; enum { @@ -95,7 +115,7 @@ enum { ETHTOOL_A_STRINGSETS_STRINGSET, __ETHTOOL_A_STRINGSETS_CNT, - ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1 + ETHTOOL_A_STRINGSETS_MAX = (__ETHTOOL_A_STRINGSETS_CNT - 1) }; enum { @@ -105,7 +125,7 @@ enum { ETHTOOL_A_STRSET_COUNTS_ONLY, __ETHTOOL_A_STRSET_CNT, - ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1 + ETHTOOL_A_STRSET_MAX = (__ETHTOOL_A_STRSET_CNT - 1) }; enum { @@ -114,7 +134,7 @@ enum { ETHTOOL_A_PRIVFLAGS_FLAGS, __ETHTOOL_A_PRIVFLAGS_CNT, - ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 + ETHTOOL_A_PRIVFLAGS_MAX = (__ETHTOOL_A_PRIVFLAGS_CNT - 1) }; enum { @@ -182,7 +202,7 @@ enum { ETHTOOL_A_LINKINFO_TRANSCEIVER, __ETHTOOL_A_LINKINFO_CNT, - ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1 + ETHTOOL_A_LINKINFO_MAX = (__ETHTOOL_A_LINKINFO_CNT - 1) }; enum { @@ -199,7 +219,7 @@ enum { ETHTOOL_A_LINKMODES_RATE_MATCHING, __ETHTOOL_A_LINKMODES_CNT, - ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1 + ETHTOOL_A_LINKMODES_MAX = (__ETHTOOL_A_LINKMODES_CNT - 1) }; enum { @@ -213,7 +233,7 @@ enum { ETHTOOL_A_LINKSTATE_EXT_DOWN_CNT, __ETHTOOL_A_LINKSTATE_CNT, - ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1 + ETHTOOL_A_LINKSTATE_MAX = (__ETHTOOL_A_LINKSTATE_CNT - 1) }; enum { @@ -222,7 +242,7 @@ enum { ETHTOOL_A_DEBUG_MSGMASK, __ETHTOOL_A_DEBUG_CNT, - ETHTOOL_A_DEBUG_MAX = __ETHTOOL_A_DEBUG_CNT - 1 + ETHTOOL_A_DEBUG_MAX = (__ETHTOOL_A_DEBUG_CNT - 1) }; enum { @@ -232,7 +252,7 @@ enum { ETHTOOL_A_WOL_SOPASS, __ETHTOOL_A_WOL_CNT, - ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 + ETHTOOL_A_WOL_MAX = (__ETHTOOL_A_WOL_CNT - 1) }; enum { @@ -244,7 +264,7 @@ enum { ETHTOOL_A_FEATURES_NOCHANGE, __ETHTOOL_A_FEATURES_CNT, - ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 + ETHTOOL_A_FEATURES_MAX = (__ETHTOOL_A_FEATURES_CNT - 1) }; enum { @@ -276,6 +296,7 @@ enum { enum { ETHTOOL_A_PROFILE_UNSPEC, ETHTOOL_A_PROFILE_IRQ_MODERATION, + __ETHTOOL_A_PROFILE_CNT, ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1) }; @@ -362,7 +383,6 @@ enum { __ETHTOOL_A_TS_STAT_CNT, ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) - }; enum { @@ -412,7 +432,7 @@ enum { ETHTOOL_A_CABLE_TEST_HEADER, __ETHTOOL_A_CABLE_TEST_CNT, - ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1 + ETHTOOL_A_CABLE_TEST_MAX = (__ETHTOOL_A_CABLE_TEST_CNT - 1) }; enum { @@ -433,7 +453,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR, __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT, - ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1) }; enum { @@ -443,7 +463,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST, __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT, - ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1) }; enum { @@ -452,7 +472,7 @@ enum { ETHTOOL_A_CABLE_TEST_TDR_CFG, __ETHTOOL_A_CABLE_TEST_TDR_CNT, - ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1 + ETHTOOL_A_CABLE_TEST_TDR_MAX = (__ETHTOOL_A_CABLE_TEST_TDR_CNT - 1) }; enum { @@ -580,6 +600,9 @@ enum { ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC, ETHTOOL_A_C33_PSE_PW_LIMIT_MIN, ETHTOOL_A_C33_PSE_PW_LIMIT_MAX, + + __ETHTOOL_A_C33_PSE_PW_LIMIT_CNT, + __ETHTOOL_A_C33_PSE_PW_LIMIT_MAX = (__ETHTOOL_A_C33_PSE_PW_LIMIT_CNT - 1) }; enum { @@ -613,7 +636,7 @@ enum { ETHTOOL_A_RSS_START_CONTEXT, __ETHTOOL_A_RSS_CNT, - ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1) }; enum { @@ -662,8 +685,8 @@ enum { }; enum { - ETHTOOL_MSG_USER_NONE, - ETHTOOL_MSG_STRSET_GET, + ETHTOOL_MSG_USER_NONE = 0, + ETHTOOL_MSG_STRSET_GET = 1, ETHTOOL_MSG_LINKINFO_GET, ETHTOOL_MSG_LINKINFO_SET, ETHTOOL_MSG_LINKMODES_GET, @@ -710,12 +733,12 @@ enum { ETHTOOL_MSG_PHY_GET, __ETHTOOL_MSG_USER_CNT, - ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1 + ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) }; enum { - ETHTOOL_MSG_KERNEL_NONE, - ETHTOOL_MSG_STRSET_GET_REPLY, + ETHTOOL_MSG_KERNEL_NONE = 0, + ETHTOOL_MSG_STRSET_GET_REPLY = 1, ETHTOOL_MSG_LINKINFO_GET_REPLY, ETHTOOL_MSG_LINKINFO_NTF, ETHTOOL_MSG_LINKMODES_GET_REPLY, @@ -763,7 +786,7 @@ enum { ETHTOOL_MSG_PHY_NTF, __ETHTOOL_MSG_KERNEL_CNT, - ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1 + ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) }; #endif /* _UAPI_LINUX_ETHTOOL_NETLINK_GENERATED_H */ -- cgit v1.2.3 From 5765c7f6e3173eb894889a29963a497aeb721c5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Dec 2024 17:19:50 +0000 Subject: net_sched: sch_fq: add three drop_reason MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three new drop_reason, more precise than generic QDISC_DROP: "tc -s qd" show aggregate counters, it might be more useful to use drop_reason infrastructure for bug hunting. 1) SKB_DROP_REASON_FQ_BAND_LIMIT Whenever a packet is added while its band limit is hit. Corresponding value in "tc -s qd" is bandX_drops XXXX 2) SKB_DROP_REASON_FQ_HORIZON_LIMIT Whenever a packet has a timestamp too far in the future. Corresponding value in "tc -s qd" is horizon_drops XXXX 3) SKB_DROP_REASON_FQ_FLOW_LIMIT Whenever a flow has reached its limit. Corresponding value in "tc -s qd" is flows_plimit XXXX Tested: tc qd replace dev eth1 root fq flow_limit 10 limit 100000 perf record -a -e skb:kfree_skb sleep 1; perf script udp_stream 12329 [004] 216.929492: skb:kfree_skb: skbaddr=0xffff888eabe17e00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12385 [006] 216.929593: skb:kfree_skb: skbaddr=0xffff888ef8827f00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12389 [005] 216.929871: skb:kfree_skb: skbaddr=0xffff888ecb9ba500 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12316 [009] 216.930398: skb:kfree_skb: skbaddr=0xffff888eca286b00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT udp_stream 12400 [008] 216.930490: skb:kfree_skb: skbaddr=0xffff888eabf93d00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_FLOW_LIMIT tc qd replace dev eth1 root fq flow_limit 100 limit 10000 perf record -a -e skb:kfree_skb sleep 1; perf script udp_stream 18074 [001] 1058.318040: skb:kfree_skb: skbaddr=0xffffa23c881fc000 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 18126 [005] 1058.320651: skb:kfree_skb: skbaddr=0xffffa23c6aad4000 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 18118 [006] 1058.321065: skb:kfree_skb: skbaddr=0xffffa23df0d48a00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 18074 [001] 1058.321126: skb:kfree_skb: skbaddr=0xffffa23c881ffa00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT udp_stream 15815 [003] 1058.321224: skb:kfree_skb: skbaddr=0xffffa23c9835db00 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x9d9 reason: FQ_BAND_LIMIT tc -s -d qd sh dev eth1 qdisc fq 8023: root refcnt 257 limit 10000p flow_limit 100p buckets 1024 orphan_mask 1023 bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1 weights 589824 196608 65536 quantum 18Kb initial_quantum 92120b low_rate_threshold 550Kbit refill_delay 40ms timer_slack 10us horizon 10s horizon_drop Sent 492439603330 bytes 336953991 pkt (dropped 61724094, overlimits 0 requeues 4463) backlog 14611228b 9995p requeues 4463 flows 2965 (inactive 1151 throttled 0) band0_pkts 0 band1_pkts 9993 band2_pkts 0 gc 6347 highprio 0 fastpath 30 throttled 5 latency 2.32us flows_plimit 7403693 band1_drops 54320401 Signed-off-by: Eric Dumazet Reviewed-by: Victor Nogueira Reviewed-by: Toke Høiland-Jørgensen Acked-by: Jamal Hadi Salim Link: https://patch.msgid.link/20241204171950.89829-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 18 ++++++++++++++++++ include/net/sch_generic.h | 8 ++++++++ net/sched/sch_fq.c | 14 ++++++++++---- 3 files changed, 36 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index 6c5a1ea209a2..c29282fabae6 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -58,6 +58,9 @@ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ + FN(FQ_BAND_LIMIT) \ + FN(FQ_HORIZON_LIMIT) \ + FN(FQ_FLOW_LIMIT) \ FN(CPU_BACKLOG) \ FN(XDP) \ FN(TC_INGRESS) \ @@ -311,6 +314,21 @@ enum skb_drop_reason { * failed to enqueue to current qdisc) */ SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band + * limit is reached. + */ + SKB_DROP_REASON_FQ_BAND_LIMIT, + /** + * @SKB_DROP_REASON_FQ_HORIZON_LIMIT: dropped by fq qdisc when packet + * timestamp is too far in the future. + */ + SKB_DROP_REASON_FQ_HORIZON_LIMIT, + /** + * @SKB_DROP_REASON_FQ_FLOW_LIMIT: dropped by fq qdisc when a flow + * exceeds its limits. + */ + SKB_DROP_REASON_FQ_FLOW_LIMIT, /** * @SKB_DROP_REASON_CPU_BACKLOG: failed to enqueue the skb to the per CPU * backlog queue. This can be caused by backlog queue full (see diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5d74fa7e694c..8074322dd636 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -1245,6 +1245,14 @@ static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_DROP; } +static inline int qdisc_drop_reason(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free, + enum skb_drop_reason reason) +{ + tcf_set_drop_reason(skb, reason); + return qdisc_drop(skb, sch, to_free); +} + static inline int qdisc_drop_all(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a5e87f9ea986..2ca5332cfcc5 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -537,6 +537,8 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } +#define FQDR(reason) SKB_DROP_REASON_FQ_##reason + static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -548,7 +550,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + FQDR(BAND_LIMIT)); } now = ktime_get_ns(); @@ -558,8 +561,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Check if packet timestamp is too far in the future. */ if (fq_packet_beyond_horizon(skb, q, now)) { if (q->horizon_drop) { - q->stat_horizon_drops++; - return qdisc_drop(skb, sch, to_free); + q->stat_horizon_drops++; + return qdisc_drop_reason(skb, sch, to_free, + FQDR(HORIZON_LIMIT)); } q->stat_horizon_caps++; skb->tstamp = now + q->horizon; @@ -572,7 +576,8 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (f != &q->internal) { if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, + FQDR(FLOW_LIMIT)); } if (fq_flow_is_detached(f)) { @@ -597,6 +602,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } +#undef FQDR static void fq_check_throttled(struct fq_sched_data *q, u64 now) { -- cgit v1.2.3 From ca5c94949facce1f67a4a9a9528a27f635ff3e78 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:24 +0100 Subject: xsk: align &xdp_buff_xsk harder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the series "XSk buff on a diet" by Maciej, the greatest pow-2 which &xdp_buff_xsk can be divided got reduced from 16 to 8 on x86_64. Also, sizeof(xdp_buff_xsk) now is 120 bytes, which, taking the previous sentence into account, leads to that it leaves 8 bytes at the end of cacheline, which means an array of buffs will have its elements messed between the cachelines chaotically. Use __aligned_largest for this struct. This alignment is usually 16 bytes, which makes it fill two full cachelines and align an array nicely. ___cacheline_aligned may be excessive here, especially on arches with 128-256 byte CLs, as well as 32-bit arches (76 -> 96 bytes on MIPS32R2), while not doing better than _largest. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-2-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xsk_buff_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index bb03cee716b3..7637799b6c19 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -29,7 +29,7 @@ struct xdp_buff_xsk { dma_addr_t frame_dma; struct xsk_buff_pool *pool; struct list_head list_node; -}; +} __aligned_largest; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) #define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) -- cgit v1.2.3 From 7cd1107f48e2a246c6a628c2381e1b8aafa4675a Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:25 +0100 Subject: bpf, xdp: constify some bpf_prog * function arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In lots of places, bpf_prog pointer is used only for tracing or other stuff that doesn't modify the structure itself. Same for net_device. Address at least some of them and add `const` attributes there. The object code didn't change, but that may prevent unwanted data modifications and also allow more helpers to have const arguments. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Signed-off-by: Jakub Kicinski --- include/linux/bpf.h | 12 ++++++------ include/linux/filter.h | 9 +++++---- include/linux/netdevice.h | 6 +++--- include/linux/skbuff.h | 2 +- kernel/bpf/devmap.c | 8 ++++---- net/core/dev.c | 10 +++++----- net/core/filter.c | 29 ++++++++++++++++------------- net/core/skbuff.c | 2 +- 8 files changed, 41 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eaee2a819f4c..ec3acb16359e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2591,10 +2591,10 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_frame *xdpf, int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog); + const struct bpf_prog *xdp_prog); int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress); + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress); void __cpu_map_flush(struct list_head *flush_list); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf, @@ -2864,15 +2864,15 @@ struct sk_buff; static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { return 0; } static inline int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index 3a21947f2fd4..9a5d23ae3855 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1179,17 +1179,18 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd, * This does not appear to be a real limitation for existing software. */ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct xdp_buff *xdp, struct bpf_prog *prog); + struct xdp_buff *xdp, const struct bpf_prog *prog); int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *prog); + const struct bpf_prog *prog); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, struct xdp_frame *xdpf, - struct bpf_prog *prog); + const struct bpf_prog *prog); void xdp_do_flush(void); -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act); +void bpf_warn_invalid_xdp_action(const struct net_device *dev, + const struct bpf_prog *prog, u32 act); #ifdef CONFIG_INET struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ecc686409161..ecca21387a68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3958,9 +3958,9 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog); -void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb); + const struct bpf_prog *xdp_prog); +void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog); +int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb); int netif_rx(struct sk_buff *skb); int __netif_rx(struct sk_buff *skb); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 58009fa66102..95452d1a07fc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3627,7 +3627,7 @@ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, - struct bpf_prog *prog); + const struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 7878be18e9d2..effde52bc857 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -678,7 +678,7 @@ int dev_map_enqueue_multi(struct xdp_frame *xdpf, struct net_device *dev_rx, } int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { int err; @@ -701,7 +701,7 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst, struct sk_buff *skb, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { struct sk_buff *nskb; int err; @@ -720,8 +720,8 @@ static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst, } int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, - struct bpf_prog *xdp_prog, struct bpf_map *map, - bool exclude_ingress) + const struct bpf_prog *xdp_prog, + struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dst, *last_dst = NULL; diff --git a/net/core/dev.c b/net/core/dev.c index 45a8c3dd4a64..40a2332e3fa0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4931,7 +4931,7 @@ static struct netdev_rx_queue *netif_get_rxqueue(struct sk_buff *skb) } u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { void *orig_data, *orig_data_end, *hard_start; struct netdev_rx_queue *rxqueue; @@ -5033,7 +5033,7 @@ u32 bpf_prog_run_generic_xdp(struct sk_buff *skb, struct xdp_buff *xdp, } static int -netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog) +netif_skb_check_for_xdp(struct sk_buff **pskb, const struct bpf_prog *prog) { struct sk_buff *skb = *pskb; int err, hroom, troom; @@ -5057,7 +5057,7 @@ netif_skb_check_for_xdp(struct sk_buff **pskb, struct bpf_prog *prog) static u32 netif_receive_generic_xdp(struct sk_buff **pskb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { struct sk_buff *skb = *pskb; u32 mac_len, act = XDP_DROP; @@ -5110,7 +5110,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff **pskb, * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX * queues, so they do not have this starvation issue. */ -void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +void generic_xdp_tx(struct sk_buff *skb, const struct bpf_prog *xdp_prog) { struct net_device *dev = skb->dev; struct netdev_queue *txq; @@ -5135,7 +5135,7 @@ void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) static DEFINE_STATIC_KEY_FALSE(generic_xdp_needed_key); -int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) +int do_xdp_generic(const struct bpf_prog *xdp_prog, struct sk_buff **pskb) { struct bpf_net_context __bpf_net_ctx, *bpf_net_ctx; diff --git a/net/core/filter.c b/net/core/filter.c index 6625b3f563a4..fac245065b0a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4348,9 +4348,9 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) EXPORT_SYMBOL_GPL(xdp_master_redirect); static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, - struct net_device *dev, + const struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; @@ -4371,10 +4371,10 @@ err: return err; } -static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, - struct net_device *dev, - struct xdp_frame *xdpf, - struct bpf_prog *xdp_prog) +static __always_inline int +__xdp_do_redirect_frame(struct bpf_redirect_info *ri, struct net_device *dev, + struct xdp_frame *xdpf, + const struct bpf_prog *xdp_prog) { enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; @@ -4443,7 +4443,7 @@ err: } int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + const struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; @@ -4457,7 +4457,8 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, EXPORT_SYMBOL_GPL(xdp_do_redirect); int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, - struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) + struct xdp_frame *xdpf, + const struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; @@ -4472,9 +4473,9 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, void *fwd, - enum bpf_map_type map_type, u32 map_id, - u32 flags) + const struct bpf_prog *xdp_prog, + void *fwd, enum bpf_map_type map_type, + u32 map_id, u32 flags) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); struct bpf_map *map; @@ -4528,7 +4529,8 @@ err: } int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, - struct xdp_buff *xdp, struct bpf_prog *xdp_prog) + struct xdp_buff *xdp, + const struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = bpf_net_ctx_get_ri(); enum bpf_map_type map_type = ri->map_type; @@ -9075,7 +9077,8 @@ static bool xdp_is_valid_access(int off, int size, return __is_valid_xdp_access(off, size); } -void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act) +void bpf_warn_invalid_xdp_action(const struct net_device *dev, + const struct bpf_prog *prog, u32 act) { const u32 act_max = XDP_REDIRECT; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6841e61a6bd0..a441613a1e6c 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1009,7 +1009,7 @@ int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, EXPORT_SYMBOL(skb_pp_cow_data); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, - struct bpf_prog *prog) + const struct bpf_prog *prog) { if (!prog->aux->xdp_has_frags) return -EINVAL; -- cgit v1.2.3 From dcf3827cde8621d2317a7f98e069adbdc2112982 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:26 +0100 Subject: xdp, xsk: constify read-only arguments of some static inline helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lots of read-only helpers for &xdp_buff and &xdp_frame, such as getting the frame length, skb_shared_info etc., don't have their arguments marked with `const` for no reason. Add the missing annotations to leave less place for mistakes and more for optimization. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-4-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 29 +++++++++++++++++------------ include/net/xdp_sock_drv.h | 11 ++++++----- include/net/xsk_buff_pool.h | 2 +- 3 files changed, 24 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index e6770dd40c91..197808df1ee1 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -88,7 +88,7 @@ struct xdp_buff { u32 flags; /* supported values defined in xdp_buff_flags */ }; -static __always_inline bool xdp_buff_has_frags(struct xdp_buff *xdp) +static __always_inline bool xdp_buff_has_frags(const struct xdp_buff *xdp) { return !!(xdp->flags & XDP_FLAGS_HAS_FRAGS); } @@ -103,7 +103,8 @@ static __always_inline void xdp_buff_clear_frags_flag(struct xdp_buff *xdp) xdp->flags &= ~XDP_FLAGS_HAS_FRAGS; } -static __always_inline bool xdp_buff_is_frag_pfmemalloc(struct xdp_buff *xdp) +static __always_inline bool +xdp_buff_is_frag_pfmemalloc(const struct xdp_buff *xdp) { return !!(xdp->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); } @@ -144,15 +145,16 @@ xdp_prepare_buff(struct xdp_buff *xdp, unsigned char *hard_start, SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) static inline struct skb_shared_info * -xdp_get_shared_info_from_buff(struct xdp_buff *xdp) +xdp_get_shared_info_from_buff(const struct xdp_buff *xdp) { return (struct skb_shared_info *)xdp_data_hard_end(xdp); } -static __always_inline unsigned int xdp_get_buff_len(struct xdp_buff *xdp) +static __always_inline unsigned int +xdp_get_buff_len(const struct xdp_buff *xdp) { unsigned int len = xdp->data_end - xdp->data; - struct skb_shared_info *sinfo; + const struct skb_shared_info *sinfo; if (likely(!xdp_buff_has_frags(xdp))) goto out; @@ -177,12 +179,13 @@ struct xdp_frame { u32 flags; /* supported values defined in xdp_buff_flags */ }; -static __always_inline bool xdp_frame_has_frags(struct xdp_frame *frame) +static __always_inline bool xdp_frame_has_frags(const struct xdp_frame *frame) { return !!(frame->flags & XDP_FLAGS_HAS_FRAGS); } -static __always_inline bool xdp_frame_is_frag_pfmemalloc(struct xdp_frame *frame) +static __always_inline bool +xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) { return !!(frame->flags & XDP_FLAGS_FRAGS_PF_MEMALLOC); } @@ -201,7 +204,7 @@ static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) } static inline struct skb_shared_info * -xdp_get_shared_info_from_frame(struct xdp_frame *frame) +xdp_get_shared_info_from_frame(const struct xdp_frame *frame) { void *data_hard_start = frame->data - frame->headroom - sizeof(*frame); @@ -249,7 +252,8 @@ int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp); struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf); static inline -void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) +void xdp_convert_frame_to_buff(const struct xdp_frame *frame, + struct xdp_buff *xdp) { xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame); xdp->data = frame->data; @@ -260,7 +264,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp) } static inline -int xdp_update_frame_from_buff(struct xdp_buff *xdp, +int xdp_update_frame_from_buff(const struct xdp_buff *xdp, struct xdp_frame *xdp_frame) { int metasize, headroom; @@ -317,9 +321,10 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); -static __always_inline unsigned int xdp_get_frame_len(struct xdp_frame *xdpf) +static __always_inline unsigned int +xdp_get_frame_len(const struct xdp_frame *xdpf) { - struct skb_shared_info *sinfo; + const struct skb_shared_info *sinfo; unsigned int len = xdpf->len; if (likely(!xdp_frame_has_frags(xdpf))) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 40085afd9160..f3175a5d28f7 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -101,7 +101,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return xp_alloc(pool); } -static inline bool xsk_is_eop_desc(struct xdp_desc *desc) +static inline bool xsk_is_eop_desc(const struct xdp_desc *desc) { return !xp_mb_desc(desc); } @@ -143,7 +143,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) list_add_tail(&frag->list_node, &frag->pool->xskb_list); } -static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) +static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) { struct xdp_buff_xsk *xskb = container_of(first, struct xdp_buff_xsk, xdp); struct xdp_buff *ret = NULL; @@ -200,7 +200,8 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) XDP_TXMD_FLAGS_CHECKSUM | \ 0) -static inline bool xsk_buff_valid_tx_metadata(struct xsk_tx_metadata *meta) +static inline bool +xsk_buff_valid_tx_metadata(const struct xsk_tx_metadata *meta) { return !(meta->flags & ~XDP_TXMD_FLAGS_VALID); } @@ -337,7 +338,7 @@ static inline struct xdp_buff *xsk_buff_alloc(struct xsk_buff_pool *pool) return NULL; } -static inline bool xsk_is_eop_desc(struct xdp_desc *desc) +static inline bool xsk_is_eop_desc(const struct xdp_desc *desc) { return false; } @@ -360,7 +361,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp) { } -static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first) +static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) { return NULL; } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 7637799b6c19..50779406bc2d 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -183,7 +183,7 @@ static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool, !(pool->dma_pages[addr >> PAGE_SHIFT] & XSK_NEXT_PG_CONTIG_MASK); } -static inline bool xp_mb_desc(struct xdp_desc *desc) +static inline bool xp_mb_desc(const struct xdp_desc *desc) { return desc->options & XDP_PKT_CONTD; } -- cgit v1.2.3 From f65966fe0178c06065d354c22fb456fc4370b527 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:27 +0100 Subject: xdp: allow attaching already registered memory model to xdp_rxq_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One may need to register memory model separately from xdp_rxq_info. One simple example may be XDP test run code, but in general, it might be useful when memory model registering is managed by one layer and then XDP RxQ info by a different one. Allow such scenarios by adding a simple helper which "attaches" already registered memory model to the desired xdp_rxq_info. As this is mostly needed for Page Pool, add a special function to do that for a &page_pool pointer. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-5-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 32 +++++++++++++++++++++++++++++++ net/core/xdp.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 88 insertions(+) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 197808df1ee1..1253fe21ede7 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -356,6 +356,38 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); int xdp_reg_mem_model(struct xdp_mem_info *mem, enum xdp_mem_type type, void *allocator); void xdp_unreg_mem_model(struct xdp_mem_info *mem); +int xdp_reg_page_pool(struct page_pool *pool); +void xdp_unreg_page_pool(const struct page_pool *pool); +void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq, + const struct page_pool *pool); + +/** + * xdp_rxq_info_attach_mem_model - attach registered mem info to RxQ info + * @xdp_rxq: XDP RxQ info to attach the memory info to + * @mem: already registered memory info + * + * If the driver registers its memory providers manually, it must use this + * function instead of xdp_rxq_info_reg_mem_model(). + */ +static inline void +xdp_rxq_info_attach_mem_model(struct xdp_rxq_info *xdp_rxq, + const struct xdp_mem_info *mem) +{ + xdp_rxq->mem = *mem; +} + +/** + * xdp_rxq_info_detach_mem_model - detach registered mem info from RxQ info + * @xdp_rxq: XDP RxQ info to detach the memory info from + * + * If the driver registers its memory providers manually and then attaches it + * via xdp_rxq_info_attach_mem_model(), it must call this function before + * xdp_rxq_info_unreg(). + */ +static inline void xdp_rxq_info_detach_mem_model(struct xdp_rxq_info *xdp_rxq) +{ + xdp_rxq->mem = (struct xdp_mem_info){ }; +} /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/net/core/xdp.c b/net/core/xdp.c index bcc5551c6424..885a2a664bce 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -365,6 +365,62 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); +/** + * xdp_reg_page_pool - register &page_pool as a memory provider for XDP + * @pool: &page_pool to register + * + * Can be used to register pools manually without connecting to any XDP RxQ + * info, so that the XDP layer will be aware of them. Then, they can be + * attached to an RxQ info manually via xdp_rxq_info_attach_page_pool(). + * + * Return: %0 on success, -errno on error. + */ +int xdp_reg_page_pool(struct page_pool *pool) +{ + struct xdp_mem_info mem; + + return xdp_reg_mem_model(&mem, MEM_TYPE_PAGE_POOL, pool); +} +EXPORT_SYMBOL_GPL(xdp_reg_page_pool); + +/** + * xdp_unreg_page_pool - unregister &page_pool from the memory providers list + * @pool: &page_pool to unregister + * + * A shorthand for manual unregistering page pools. If the pool was previously + * attached to an RxQ info, it must be detached first. + */ +void xdp_unreg_page_pool(const struct page_pool *pool) +{ + struct xdp_mem_info mem = { + .type = MEM_TYPE_PAGE_POOL, + .id = pool->xdp_mem_id, + }; + + xdp_unreg_mem_model(&mem); +} +EXPORT_SYMBOL_GPL(xdp_unreg_page_pool); + +/** + * xdp_rxq_info_attach_page_pool - attach registered pool to RxQ info + * @xdp_rxq: XDP RxQ info to attach the pool to + * @pool: pool to attach + * + * If the pool was registered manually, this function must be called instead + * of xdp_rxq_info_reg_mem_model() to connect it to the RxQ info. + */ +void xdp_rxq_info_attach_page_pool(struct xdp_rxq_info *xdp_rxq, + const struct page_pool *pool) +{ + struct xdp_mem_info mem = { + .type = MEM_TYPE_PAGE_POOL, + .id = pool->xdp_mem_id, + }; + + xdp_rxq_info_attach_mem_model(xdp_rxq, &mem); +} +EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); + /* XDP RX runs under NAPI protection, and in different delivery error * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean -- cgit v1.2.3 From e77d9aee951341119be16a991fcfc76d1154d22a Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Tue, 3 Dec 2024 18:37:29 +0100 Subject: xdp: register system page pool as an XDP memory model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make the system page pool usable as a source for allocating XDP frames, we need to register it with xdp_reg_mem_model(), so that page return works correctly. This is done in preparation for using the system page_pool to convert XDP_PASS XSk frames to skbs; for the same reason, make the per-cpu variable non-static so we can access it from other source files as well (but w/o exporting). Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241203173733.3181246-7-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ecca21387a68..d1a8d98b132c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3322,6 +3322,7 @@ struct softnet_data { }; DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); +DECLARE_PER_CPU(struct page_pool *, system_page_pool); #ifndef CONFIG_PREEMPT_RT static inline int dev_recursion_level(void) diff --git a/net/core/dev.c b/net/core/dev.c index 40a2332e3fa0..c7f3dea3e0eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -460,7 +460,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data); * PP consumers must pay attention to run APIs in the appropriate context * (e.g. NAPI context). */ -static DEFINE_PER_CPU(struct page_pool *, system_page_pool); +DEFINE_PER_CPU(struct page_pool *, system_page_pool); #ifdef CONFIG_LOCKDEP /* @@ -12152,11 +12152,18 @@ static int net_page_pool_create(int cpuid) .nid = cpu_to_mem(cpuid), }; struct page_pool *pp_ptr; + int err; pp_ptr = page_pool_create_percpu(&page_pool_params, cpuid); if (IS_ERR(pp_ptr)) return -ENOMEM; + err = xdp_reg_page_pool(pp_ptr); + if (err) { + page_pool_destroy(pp_ptr); + return err; + } + per_cpu(system_page_pool, cpuid) = pp_ptr; #endif return 0; @@ -12290,6 +12297,7 @@ out: if (!pp_ptr) continue; + xdp_unreg_page_pool(pp_ptr); page_pool_destroy(pp_ptr); per_cpu(system_page_pool, i) = NULL; } -- cgit v1.2.3 From 9bd9f72a74344b54cfb6fcabf1173e6c6e5c6952 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:30 +0100 Subject: netmem: add a couple of page helper wrappers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the following netmem counterparts: * virt_to_netmem() -- simple page_to_netmem(virt_to_page()) wrapper; * netmem_is_pfmemalloc() -- page_is_pfmemalloc() for page-backed netmems, false otherwise; and the following "unsafe" versions: * __netmem_to_page() * __netmem_get_pp() * __netmem_address() They do the same as their non-underscored buddies, but assume the netmem is always page-backed. When working with header &page_pools, you don't need to check whether netmem belongs to the host memory and you can never get NULL instead of &page. Checks for the LSB, clearing the LSB, branches take cycles and increase object code size, sometimes significantly. When you're sure your PP is always host, you can avoid this by using the underscored counterparts. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-8-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/netmem.h | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/netmem.h b/include/net/netmem.h index 8a6e20be4b9d..1b58faa4f20f 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -72,6 +72,22 @@ static inline bool netmem_is_net_iov(const netmem_ref netmem) return (__force unsigned long)netmem & NET_IOV; } +/** + * __netmem_to_page - unsafely get pointer to the &page backing @netmem + * @netmem: netmem reference to convert + * + * Unsafe version of netmem_to_page(). When @netmem is always page-backed, + * e.g. when it's a header buffer, performs faster and generates smaller + * object code (no check for the LSB, no WARN). When @netmem points to IOV, + * provokes undefined behaviour. + * + * Return: pointer to the &page (garbage if @netmem is not page-backed). + */ +static inline struct page *__netmem_to_page(netmem_ref netmem) +{ + return (__force struct page *)netmem; +} + /* This conversion fails (returns NULL) if the netmem_ref is not struct page * backed. */ @@ -80,7 +96,7 @@ static inline struct page *netmem_to_page(netmem_ref netmem) if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return NULL; - return (__force struct page *)netmem; + return __netmem_to_page(netmem); } static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) @@ -103,6 +119,17 @@ static inline netmem_ref page_to_netmem(struct page *page) return (__force netmem_ref)page; } +/** + * virt_to_netmem - convert virtual memory pointer to a netmem reference + * @data: host memory pointer to convert + * + * Return: netmem reference to the &page backing this virtual address. + */ +static inline netmem_ref virt_to_netmem(const void *data) +{ + return page_to_netmem(virt_to_page(data)); +} + static inline int netmem_ref_count(netmem_ref netmem) { /* The non-pp refcount of net_iov is always 1. On net_iov, we only @@ -127,6 +154,22 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); } +/** + * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem + * @netmem: netmem reference to get the pointer from + * + * Unsafe version of netmem_get_pp(). When @netmem is always page-backed, + * e.g. when it's a header buffer, performs faster and generates smaller + * object code (avoids clearing the LSB). When @netmem points to IOV, + * provokes invalid memory access. + * + * Return: pointer to the &page_pool (garbage if @netmem is not page-backed). + */ +static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) +{ + return __netmem_to_page(netmem)->pp; +} + static inline struct page_pool *netmem_get_pp(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->pp; @@ -158,12 +201,43 @@ static inline netmem_ref netmem_compound_head(netmem_ref netmem) return page_to_netmem(compound_head(netmem_to_page(netmem))); } +/** + * __netmem_address - unsafely get pointer to the memory backing @netmem + * @netmem: netmem reference to get the pointer for + * + * Unsafe version of netmem_address(). When @netmem is always page-backed, + * e.g. when it's a header buffer, performs faster and generates smaller + * object code (no check for the LSB). When @netmem points to IOV, provokes + * undefined behaviour. + * + * Return: pointer to the memory (garbage if @netmem is not page-backed). + */ +static inline void *__netmem_address(netmem_ref netmem) +{ + return page_address(__netmem_to_page(netmem)); +} + static inline void *netmem_address(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return NULL; - return page_address(netmem_to_page(netmem)); + return __netmem_address(netmem); +} + +/** + * netmem_is_pfmemalloc - check if @netmem was allocated under memory pressure + * @netmem: netmem reference to check + * + * Return: true if @netmem is page-backed and the page was allocated under + * memory pressure, false otherwise. + */ +static inline bool netmem_is_pfmemalloc(netmem_ref netmem) +{ + if (netmem_is_net_iov(netmem)) + return false; + + return page_is_pfmemalloc(netmem_to_page(netmem)); } static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) -- cgit v1.2.3 From 024bfd2e9d80d7131f1178eb2235030b96f7ef0e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Tue, 3 Dec 2024 18:37:31 +0100 Subject: page_pool: make page_pool_put_page_bulk() handle array of netmems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, page_pool_put_page_bulk() indeed takes an array of pointers to the data, not pages, despite the name. As one side effect, when you're freeing frags from &skb_shared_info, xdp_return_frame_bulk() converts page pointers to virtual addresses and then page_pool_put_page_bulk() converts them back. Moreover, data pointers assume every frag is placed in the host memory, making this function non-universal. Make page_pool_put_page_bulk() handle array of netmems. Pass frag netmems directly and use virt_to_netmem() when freeing xdpf->data, so that the PP core will then get the compound netmem and take care of the rest. Signed-off-by: Alexander Lobakin Reviewed-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241203173733.3181246-9-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 8 ++++---- include/net/xdp.h | 2 +- net/core/page_pool.c | 30 +++++++++++++++--------------- net/core/xdp.c | 6 +++--- 4 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index c022c410abe3..1ea16b0e9c79 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -259,8 +259,8 @@ void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count); +void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, + u32 count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -272,8 +272,8 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, { } -static inline void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +static inline void page_pool_put_netmem_bulk(struct page_pool *pool, + netmem_ref *data, u32 count) { } #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index 1253fe21ede7..f4020b29122f 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -194,7 +194,7 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) struct xdp_frame_bulk { int count; void *xa; - void *q[XDP_BULK_QUEUE_SIZE]; + netmem_ref q[XDP_BULK_QUEUE_SIZE]; }; static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index f89cf93f6eb4..4c85b77cfdac 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -840,22 +840,22 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, EXPORT_SYMBOL(page_pool_put_unrefed_page); /** - * page_pool_put_page_bulk() - release references on multiple pages + * page_pool_put_netmem_bulk() - release references on multiple netmems * @pool: pool from which pages were allocated - * @data: array holding page pointers - * @count: number of pages in @data + * @data: array holding netmem references + * @count: number of entries in @data * - * Tries to refill a number of pages into the ptr_ring cache holding ptr_ring - * producer lock. If the ptr_ring is full, page_pool_put_page_bulk() - * will release leftover pages to the page allocator. - * page_pool_put_page_bulk() is suitable to be run inside the driver NAPI tx + * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring + * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk() + * will release leftover netmems to the memory provider. + * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx * completion loop for the XDP_REDIRECT use case. * * Please note the caller must not use data area after running - * page_pool_put_page_bulk(), as this function overwrites it. + * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_page_bulk(struct page_pool *pool, void **data, - int count) +void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, + u32 count) { int i, bulk_len = 0; bool allow_direct; @@ -864,7 +864,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, allow_direct = page_pool_napi_local(pool); for (i = 0; i < count; i++) { - netmem_ref netmem = page_to_netmem(virt_to_head_page(data[i])); + netmem_ref netmem = netmem_compound_head(data[i]); /* It is not the last user for the page frag case */ if (!page_pool_is_last_ref(netmem)) @@ -873,7 +873,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (netmem) - data[bulk_len++] = (__force void *)netmem; + data[bulk_len++] = netmem; } if (!bulk_len) @@ -882,7 +882,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, /* Bulk producer into ptr_ring page_pool cache */ in_softirq = page_pool_producer_lock(pool); for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, data[i])) { + if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; @@ -899,9 +899,9 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data, * since put_page() with refcnt == 1 can be an expensive operation */ for (; i < bulk_len; i++) - page_pool_return_page(pool, (__force netmem_ref)data[i]); + page_pool_return_page(pool, data[i]); } -EXPORT_SYMBOL(page_pool_put_page_bulk); +EXPORT_SYMBOL(page_pool_put_netmem_bulk); static netmem_ref page_pool_drain_frag(struct page_pool *pool, netmem_ref netmem) diff --git a/net/core/xdp.c b/net/core/xdp.c index de1e9cb78718..938ad15c9857 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -518,7 +518,7 @@ void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) if (unlikely(!xa || !bq->count)) return; - page_pool_put_page_bulk(xa->page_pool, bq->q, bq->count); + page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count); /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ bq->count = 0; } @@ -559,12 +559,12 @@ void xdp_return_frame_bulk(struct xdp_frame *xdpf, for (i = 0; i < sinfo->nr_frags; i++) { skb_frag_t *frag = &sinfo->frags[i]; - bq->q[bq->count++] = skb_frag_address(frag); + bq->q[bq->count++] = skb_frag_netmem(frag); if (bq->count == XDP_BULK_QUEUE_SIZE) xdp_flush_frame_bulk(bq); } } - bq->q[bq->count++] = xdpf->data; + bq->q[bq->count++] = virt_to_netmem(xdpf->data); } EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); -- cgit v1.2.3 From 8f1c716090a7ed20fea802b63b37758169d59b81 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Thu, 5 Dec 2024 10:42:10 +0000 Subject: net: phy: remove genphy_c45_eee_is_active()'s is_enabled arg All callers to genphy_c45_eee_is_active() now pass NULL as the is_enabled argument, which means we never use the value computed in this function. Remove the argument and clean up this function. Signed-off-by: Russell King (Oracle) Reviewed-by: Heiner Kallweit Link: https://patch.msgid.link/E1tJ9JC-006LIt-Ne@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- drivers/net/phy/phy-c45.c | 12 ++++-------- drivers/net/phy/phy.c | 5 ++--- include/linux/phy.h | 2 +- 3 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c index d162f78bc68d..0dac08e85304 100644 --- a/drivers/net/phy/phy-c45.c +++ b/drivers/net/phy/phy-c45.c @@ -1469,18 +1469,17 @@ EXPORT_SYMBOL_GPL(genphy_c45_plca_get_status); * @phydev: target phy_device struct * @adv: variable to store advertised linkmodes * @lp: variable to store LP advertised linkmodes - * @is_enabled: variable to store EEE enabled/disabled configuration value * * Description: this function will read local and link partner PHY * advertisements. Compare them return current EEE state. */ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp, bool *is_enabled) + unsigned long *lp) { __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_adv) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(tmp_lp) = {}; __ETHTOOL_DECLARE_LINK_MODE_MASK(common); - bool eee_enabled, eee_active; + bool eee_active; int ret; ret = genphy_c45_read_eee_adv(phydev, tmp_adv); @@ -1491,9 +1490,8 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, if (ret) return ret; - eee_enabled = !linkmode_empty(tmp_adv); linkmode_and(common, tmp_adv, tmp_lp); - if (eee_enabled && !linkmode_empty(common)) + if (!linkmode_empty(tmp_adv) && !linkmode_empty(common)) eee_active = phy_check_valid(phydev->speed, phydev->duplex, common); else @@ -1503,8 +1501,6 @@ int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, linkmode_copy(adv, tmp_adv); if (lp) linkmode_copy(lp, tmp_lp); - if (is_enabled) - *is_enabled = eee_enabled; return eee_active; } @@ -1524,7 +1520,7 @@ int genphy_c45_ethtool_get_eee(struct phy_device *phydev, int ret; ret = genphy_c45_eee_is_active(phydev, data->advertised, - data->lp_advertised, NULL); + data->lp_advertised); if (ret < 0) return ret; diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 0c228aa18019..4cf344254237 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -988,8 +988,7 @@ static int phy_check_link_status(struct phy_device *phydev) if (phydev->link && phydev->state != PHY_RUNNING) { phy_check_downshift(phydev); phydev->state = PHY_RUNNING; - err = genphy_c45_eee_is_active(phydev, - NULL, NULL, NULL); + err = genphy_c45_eee_is_active(phydev, NULL, NULL); phydev->eee_active = err > 0; phydev->enable_tx_lpi = phydev->eee_cfg.tx_lpi_enabled && phydev->eee_active; @@ -1658,7 +1657,7 @@ int phy_init_eee(struct phy_device *phydev, bool clk_stop_enable) if (!phydev->drv) return -EIO; - ret = genphy_c45_eee_is_active(phydev, NULL, NULL, NULL); + ret = genphy_c45_eee_is_active(phydev, NULL, NULL); if (ret < 0) return ret; if (!ret) diff --git a/include/linux/phy.h b/include/linux/phy.h index 61a1bc81f597..bb157136351e 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -1991,7 +1991,7 @@ int genphy_c45_plca_set_cfg(struct phy_device *phydev, int genphy_c45_plca_get_status(struct phy_device *phydev, struct phy_plca_status *plca_st); int genphy_c45_eee_is_active(struct phy_device *phydev, unsigned long *adv, - unsigned long *lp, bool *is_enabled); + unsigned long *lp); int genphy_c45_ethtool_get_eee(struct phy_device *phydev, struct ethtool_keee *data); int genphy_c45_ethtool_set_eee(struct phy_device *phydev, -- cgit v1.2.3 From 18eabadd73ae60023ab05e376246bd725fb0c113 Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 4 Dec 2024 13:11:21 +0100 Subject: vrf: Make pcpu_dstats update functions available to other modules. Currently vrf is the only module that uses NETDEV_PCPU_STAT_DSTATS. In order to make this kind of statistics available to other modules, we need to define the update functions in netdevice.h. Therefore, let's define dev_dstats_*() functions for RX and TX packet updates (packets, bytes and drops). Use these new functions in vrf.c instead of vrf_rx_stats() and the other manual counter updates. While there, update the type of the "len" variables to "unsigned int", so that there're aligned with both skb->len and the new dstats update functions. Signed-off-by: Guillaume Nault Link: https://patch.msgid.link/d7a552ee382c79f4854e7fcc224cf176cd21150d.1733313925.git.gnault@redhat.com Signed-off-by: Jakub Kicinski --- drivers/net/vrf.c | 49 ++++++++++++++--------------------------------- include/linux/netdevice.h | 40 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 35 deletions(-) (limited to 'include') diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 67d25f4f94ef..ca81b212a246 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -122,16 +122,6 @@ struct net_vrf { int ifindex; }; -static void vrf_rx_stats(struct net_device *dev, int len) -{ - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - u64_stats_inc(&dstats->rx_packets); - u64_stats_add(&dstats->rx_bytes, len); - u64_stats_update_end(&dstats->syncp); -} - static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; @@ -369,7 +359,7 @@ static bool qdisc_tx_is_default(const struct net_device *dev) static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst) { - int len = skb->len; + unsigned int len = skb->len; skb_orphan(skb); @@ -382,15 +372,10 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, skb->protocol = eth_type_trans(skb, dev); - if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { - vrf_rx_stats(dev, len); - } else { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - u64_stats_update_begin(&dstats->syncp); - u64_stats_inc(&dstats->rx_drops); - u64_stats_update_end(&dstats->syncp); - } + if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) + dev_dstats_rx_add(dev, len); + else + dev_dstats_rx_dropped(dev); return NETDEV_TX_OK; } @@ -578,20 +563,14 @@ static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { - struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); - - int len = skb->len; - netdev_tx_t ret = is_ip_tx_frame(skb, dev); - - u64_stats_update_begin(&dstats->syncp); - if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { + unsigned int len = skb->len; + netdev_tx_t ret; - u64_stats_inc(&dstats->tx_packets); - u64_stats_add(&dstats->tx_bytes, len); - } else { - u64_stats_inc(&dstats->tx_drops); - } - u64_stats_update_end(&dstats->syncp); + ret = is_ip_tx_frame(skb, dev); + if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) + dev_dstats_tx_add(dev, len); + else + dev_dstats_tx_dropped(dev); return ret; } @@ -1364,7 +1343,7 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, if (!is_ndisc) { struct net_device *orig_dev = skb->dev; - vrf_rx_stats(vrf_dev, skb->len); + dev_dstats_rx_add(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -1420,7 +1399,7 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, goto out; } - vrf_rx_stats(vrf_dev, skb->len); + dev_dstats_rx_add(vrf_dev, skb->len); if (!list_empty(&vrf_dev->ptype_all)) { int err; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d1a8d98b132c..135105441681 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2854,6 +2854,46 @@ static inline void dev_lstats_add(struct net_device *dev, unsigned int len) u64_stats_update_end(&lstats->syncp); } +static inline void dev_dstats_rx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_packets); + u64_stats_add(&dstats->rx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_rx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->rx_drops); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_add(struct net_device *dev, + unsigned int len) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_packets); + u64_stats_add(&dstats->tx_bytes, len); + u64_stats_update_end(&dstats->syncp); +} + +static inline void dev_dstats_tx_dropped(struct net_device *dev) +{ + struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); + + u64_stats_update_begin(&dstats->syncp); + u64_stats_inc(&dstats->tx_drops); + u64_stats_update_end(&dstats->syncp); +} + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ -- cgit v1.2.3 From 435004291c9aebeb940c4bb5b0f5764c012ea5ff Mon Sep 17 00:00:00 2001 From: David Lechner Date: Wed, 13 Nov 2024 10:55:19 -0600 Subject: iio: adc: ad4695: move dt-bindings header Move the dt-bindings header file to the include/dt-bindings/iio/adc/ directory. ad4695 is an ADC driver, so it should be in the adc/ subdirectory for better organization. Previously, it was in the iio/ subdirectory. Signed-off-by: David Lechner Link: https://patch.msgid.link/20241113-iio-adc-ad4695-move-dt-bindings-header-v1-1-aba1f0f9b628@baylibre.com Signed-off-by: Jonathan Cameron --- Documentation/iio/ad4695.rst | 2 +- MAINTAINERS | 2 +- drivers/iio/adc/ad4695.c | 2 +- include/dt-bindings/iio/adc/adi,ad4695.h | 9 +++++++++ include/dt-bindings/iio/adi,ad4695.h | 9 --------- 5 files changed, 12 insertions(+), 12 deletions(-) create mode 100644 include/dt-bindings/iio/adc/adi,ad4695.h delete mode 100644 include/dt-bindings/iio/adi,ad4695.h (limited to 'include') diff --git a/Documentation/iio/ad4695.rst b/Documentation/iio/ad4695.rst index 33ed29b7c98a..9ec8bf466c15 100644 --- a/Documentation/iio/ad4695.rst +++ b/Documentation/iio/ad4695.rst @@ -101,7 +101,7 @@ The macro comes from: .. code-block:: - #include + #include Pairing two INx pins ^^^^^^^^^^^^^^^^^^^^ diff --git a/MAINTAINERS b/MAINTAINERS index 1e930c7a58b1..a32dceeba40b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1292,7 +1292,7 @@ W: https://ez.analog.com/linux-software-drivers F: Documentation/devicetree/bindings/iio/adc/adi,ad4695.yaml F: Documentation/iio/ad4695.rst F: drivers/iio/adc/ad4695.c -F: include/dt-bindings/iio/adi,ad4695.h +F: include/dt-bindings/iio/adc/adi,ad4695.h ANALOG DEVICES INC AD7091R DRIVER M: Marcelo Schmitt diff --git a/drivers/iio/adc/ad4695.c b/drivers/iio/adc/ad4695.c index 595ec4158e73..3c2c01289fda 100644 --- a/drivers/iio/adc/ad4695.c +++ b/drivers/iio/adc/ad4695.c @@ -30,7 +30,7 @@ #include #include -#include +#include /* AD4695 registers */ #define AD4695_REG_SPI_CONFIG_A 0x0000 diff --git a/include/dt-bindings/iio/adc/adi,ad4695.h b/include/dt-bindings/iio/adc/adi,ad4695.h new file mode 100644 index 000000000000..9fbef542bf67 --- /dev/null +++ b/include/dt-bindings/iio/adc/adi,ad4695.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_ADI_AD4695_H +#define _DT_BINDINGS_ADI_AD4695_H + +#define AD4695_COMMON_MODE_REFGND 0xFF +#define AD4695_COMMON_MODE_COM 0xFE + +#endif /* _DT_BINDINGS_ADI_AD4695_H */ diff --git a/include/dt-bindings/iio/adi,ad4695.h b/include/dt-bindings/iio/adi,ad4695.h deleted file mode 100644 index 9fbef542bf67..000000000000 --- a/include/dt-bindings/iio/adi,ad4695.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ - -#ifndef _DT_BINDINGS_ADI_AD4695_H -#define _DT_BINDINGS_ADI_AD4695_H - -#define AD4695_COMMON_MODE_REFGND 0xFF -#define AD4695_COMMON_MODE_COM 0xFE - -#endif /* _DT_BINDINGS_ADI_AD4695_H */ -- cgit v1.2.3 From cb3e9a446763da8850c8280be009d95f61e11a94 Mon Sep 17 00:00:00 2001 From: David Lechner Date: Fri, 22 Nov 2024 11:42:48 -0600 Subject: iio: adc: ad_sigma_delta: add tab to align irq_line Align the irq_line field in struct ad_sigma_delta with the other fields. Signed-off-by: David Lechner Link: https://patch.msgid.link/20241122-iio-adc-ad_signal_delta-fix-align-v1-1-d0a071d2dc83@baylibre.com Signed-off-by: Jonathan Cameron --- include/linux/iio/adc/ad_sigma_delta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index f8c1d2505940..1851f8fed3a4 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -96,7 +96,7 @@ struct ad_sigma_delta { unsigned int active_slots; unsigned int current_slot; unsigned int num_slots; - int irq_line; + int irq_line; bool status_appended; /* map slots to channels in order to know what to expect from devices */ unsigned int *slots; -- cgit v1.2.3 From 0c39208bc3af6f7afcbcdb675ad8fbe6b3022865 Mon Sep 17 00:00:00 2001 From: Robert Budai Date: Mon, 25 Nov 2024 15:35:08 +0200 Subject: iio: imu: adis: Remove documented not used elements This patch removes elements from adis.h that are documented but not used anymore. Signed-off-by: Robert Budai Link: https://patch.msgid.link/20241125133520.24328-2-robert.budai@analog.com Signed-off-by: Jonathan Cameron --- include/linux/iio/imu/adis.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h index e6a75356567a..4bb98d9731de 100644 --- a/include/linux/iio/imu/adis.h +++ b/include/linux/iio/imu/adis.h @@ -99,7 +99,6 @@ struct adis_data { * @spi: Reference to SPI device which owns this ADIS IIO device * @trig: IIO trigger object data * @data: ADIS chip variant specific data - * @burst: ADIS burst transfer information * @burst_extra_len: Burst extra length. Should only be used by devices that can * dynamically change their burst mode length. * @state_lock: Lock used by the device to protect state -- cgit v1.2.3 From e895f2edfe4820b671c700ccc17be35b1de3d295 Mon Sep 17 00:00:00 2001 From: Javier Carrasco Date: Mon, 25 Nov 2024 22:16:19 +0100 Subject: iio: core: fix doc reference to iio_push_to_buffers_with_ts_unaligned Use the right name of the function, which is defined in drivers/iio/industrialio-buffer.c Signed-off-by: Javier Carrasco Link: https://patch.msgid.link/20241125-iio_memset_scan_holes-v1-11-0cb6e98d895c@gmail.com Signed-off-by: Jonathan Cameron --- include/linux/iio/iio-opaque.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h index a89e7e43e441..4247497f3f8b 100644 --- a/include/linux/iio/iio-opaque.h +++ b/include/linux/iio/iio-opaque.h @@ -28,7 +28,7 @@ * @groupcounter: index of next attribute group * @legacy_scan_el_group: attribute group for legacy scan elements attribute group * @legacy_buffer_group: attribute group for legacy buffer attributes group - * @bounce_buffer: for devices that call iio_push_to_buffers_with_timestamp_unaligned() + * @bounce_buffer: for devices that call iio_push_to_buffers_with_ts_unaligned() * @bounce_buffer_size: size of currently allocate bounce buffer * @scan_index_timestamp: cache of the index to the timestamp * @clock_id: timestamping clock posix identifier -- cgit v1.2.3 From d87daa18535d38385f1da460c557c004e0b66347 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Mon, 26 Aug 2024 20:31:14 +0300 Subject: dt-bindings: clk: at91: Add clock IDs for the slow clock controller Add clock IDs for the slow clock controller. Previously, raw numbers were used (0 or 1) for clocks generated by the slow clock controller. This leads to confusion and wrong IDs were used on few device trees. To avoid this add macros. Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20240826173116.3628337-2-claudiu.beznea@tuxon.dev Signed-off-by: Claudiu Beznea --- include/dt-bindings/clock/at91.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index 6ede88c3992d..99f4767ff6bb 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -55,4 +55,8 @@ #define AT91_PMC_GCKRDY 24 /* Generated Clocks */ #endif +/* Slow clock. */ +#define SCKC_MD_SLCK 0 +#define SCKC_TD_SLCK 1 + #endif -- cgit v1.2.3 From ebe559609d7829b52c6642b581860760984faf9d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Nov 2024 10:30:14 -0500 Subject: fs: get rid of __FMODE_NONOTIFY kludge All it takes to get rid of the __FMODE_NONOTIFY kludge is switching fanotify from anon_inode_getfd() to anon_inode_getfile_fmode() and adding a dentry_open_nonotify() helper to be used by fanotify on the other path. That's it - no more weird shit in OPEN_FMODE(), etc. Signed-off-by: Al Viro Link: https://lore.kernel.org/linux-fsdevel/20241113043003.GH3387508@ZenIV/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/d1231137e7b661a382459e79a764259509a4115d.1731684329.git.josef@toxicpanda.com --- fs/fcntl.c | 4 ++-- fs/notify/fanotify/fanotify_user.c | 25 ++++++++++++++++--------- fs/open.c | 23 +++++++++++++++++++---- include/linux/fs.h | 6 +++--- include/uapi/asm-generic/fcntl.h | 1 - 5 files changed, 40 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/fs/fcntl.c b/fs/fcntl.c index 49884fa3c81d..5598e4d57422 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -1158,10 +1158,10 @@ static int __init fcntl_init(void) * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY * is defined as O_NONBLOCK on some platforms and not on others. */ - BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != + BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32( (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) | - __FMODE_EXEC | __FMODE_NONOTIFY)); + __FMODE_EXEC)); fasync_cache = kmem_cache_create("fasync_cache", sizeof(struct fasync_struct), 0, diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 2d85c71717d6..919ff59cb802 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -100,8 +100,7 @@ static void __init fanotify_sysctls_init(void) * * Internal and external open flags are stored together in field f_flags of * struct file. Only external open flags shall be allowed in event_f_flags. - * Internal flags like FMODE_NONOTIFY, FMODE_EXEC, FMODE_NOCMTIME shall be - * excluded. + * Internal flags like FMODE_EXEC shall be excluded. */ #define FANOTIFY_INIT_ALL_EVENT_F_BITS ( \ O_ACCMODE | O_APPEND | O_NONBLOCK | \ @@ -258,12 +257,11 @@ static int create_fd(struct fsnotify_group *group, const struct path *path, return client_fd; /* - * we need a new file handle for the userspace program so it can read even if it was - * originally opened O_WRONLY. + * We provide an fd for the userspace program, so it could access the + * file without generating fanotify events itself. */ - new_file = dentry_open(path, - group->fanotify_data.f_flags | __FMODE_NONOTIFY, - current_cred()); + new_file = dentry_open_nonotify(path, group->fanotify_data.f_flags, + current_cred()); if (IS_ERR(new_file)) { put_unused_fd(client_fd); client_fd = PTR_ERR(new_file); @@ -1409,6 +1407,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) unsigned int fid_mode = flags & FANOTIFY_FID_BITS; unsigned int class = flags & FANOTIFY_CLASS_BITS; unsigned int internal_flags = 0; + struct file *file; pr_debug("%s: flags=%x event_f_flags=%x\n", __func__, flags, event_f_flags); @@ -1477,7 +1476,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) (!(fid_mode & FAN_REPORT_NAME) || !(fid_mode & FAN_REPORT_FID))) return -EINVAL; - f_flags = O_RDWR | __FMODE_NONOTIFY; + f_flags = O_RDWR; if (flags & FAN_CLOEXEC) f_flags |= O_CLOEXEC; if (flags & FAN_NONBLOCK) @@ -1555,10 +1554,18 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) goto out_destroy_group; } - fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags); + fd = get_unused_fd_flags(f_flags); if (fd < 0) goto out_destroy_group; + file = anon_inode_getfile_fmode("[fanotify]", &fanotify_fops, group, + f_flags, FMODE_NONOTIFY); + if (IS_ERR(file)) { + fd = PTR_ERR(file); + put_unused_fd(fd); + goto out_destroy_group; + } + fd_install(fd, file); return fd; out_destroy_group: diff --git a/fs/open.c b/fs/open.c index e6911101fe71..c3490286092e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1105,6 +1105,23 @@ struct file *dentry_open(const struct path *path, int flags, } EXPORT_SYMBOL(dentry_open); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred) +{ + struct file *f = alloc_empty_file(flags, cred); + if (!IS_ERR(f)) { + int error; + + f->f_mode |= FMODE_NONOTIFY; + error = vfs_open(path, f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } + return f; +} + /** * dentry_create - Create and open a file * @path: path to create @@ -1202,7 +1219,7 @@ inline struct open_how build_open_how(int flags, umode_t mode) inline int build_open_flags(const struct open_how *how, struct open_flags *op) { u64 flags = how->flags; - u64 strip = __FMODE_NONOTIFY | O_CLOEXEC; + u64 strip = O_CLOEXEC; int lookup_flags = 0; int acc_mode = ACC_MODE(flags); @@ -1210,9 +1227,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op) "struct open_flags doesn't yet handle flags > 32 bits"); /* - * Strip flags that either shouldn't be set by userspace like - * FMODE_NONOTIFY or that aren't relevant in determining struct - * open_flags like O_CLOEXEC. + * Strip flags that aren't relevant in determining struct open_flags. */ flags &= ~strip; diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e29433c5ecc..93c2b720271e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2751,6 +2751,8 @@ static inline struct file *file_open_root_mnt(struct vfsmount *mnt, } struct file *dentry_open(const struct path *path, int flags, const struct cred *creds); +struct file *dentry_open_nonotify(const struct path *path, int flags, + const struct cred *cred); struct file *dentry_create(const struct path *path, int flags, umode_t mode, const struct cred *cred); struct path *backing_file_user_path(struct file *f); @@ -3707,11 +3709,9 @@ struct ctl_table; int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) -#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) -#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ - (flag & __FMODE_NONOTIFY))) +#define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE)) static inline bool is_sxid(umode_t mode) { diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 80f37a0d40d7..613475285643 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -6,7 +6,6 @@ /* * FMODE_EXEC is 0x20 - * FMODE_NONOTIFY is 0x4000000 * These cannot be used by userspace O_* until internal and external open * flags are split. * -Eric Paris -- cgit v1.2.3 From e419ddeabe7edd89650a19f411f928eea12b35b1 Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 13 Nov 2023 23:32:09 +1000 Subject: m68k: Use kernel's generic muldi3 libgcc function Use the kernels own generic lib/muldi3.c implementation of muldi3 for 68K machines. Some 68K CPUs support 64bit multiplies so move the arch specific umul_ppmm() macro into a header file that is included by lib/muldi3.c. That way it can take advantage of the single instruction when available. There does not appear to be any existing mechanism for the generic lib/muldi3.c code to pick up an external arch definition of umul_ppmm(). Create an arch specific libgcc.h that can optionally be included by the system include/linux/libgcc.h to allow for this. Somewhat oddly there is also a similar definition of umul_ppmm() in the non-architecture code in lib/crypto/mpi/longlong.h for a wide range or machines. Its presence ends up complicating the include setup and means not being able to use something like compiler.h instead. Actually there is a few other defines of umul_ppmm() macros spread around in various architectures, but not directly usable for the m68k case. Signed-off-by: Greg Ungerer Link: https://lore.kernel.org/20231113133209.1367286-1-gerg@linux-m68k.org Reviewed-by: Geert Uytterhoeven Reviewed-by: Arnd Bergmann Signed-off-by: Geert Uytterhoeven --- arch/Kconfig | 8 ++++ arch/m68k/Kconfig | 2 + arch/m68k/include/asm/libgcc.h | 20 +++++++++ arch/m68k/lib/Makefile | 2 +- arch/m68k/lib/muldi3.c | 97 ------------------------------------------ include/linux/libgcc.h | 4 ++ 6 files changed, 35 insertions(+), 98 deletions(-) create mode 100644 arch/m68k/include/asm/libgcc.h delete mode 100644 arch/m68k/lib/muldi3.c (limited to 'include') diff --git a/arch/Kconfig b/arch/Kconfig index 6682b2a53e34..b8a4ff365582 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1526,6 +1526,14 @@ config HAVE_ARCH_COMPILER_H linux/compiler-*.h in order to override macro definitions that those headers generally provide. +config HAVE_ARCH_LIBGCC_H + bool + help + An architecture can select this if it provides an + asm/libgcc.h header that should be included after + linux/libgcc.h in order to override macro definitions that + header generally provides. + config HAVE_ARCH_PREL32_RELOCATIONS bool help diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 7c4f7bcc89d7..b2ed0308c0ea 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -23,7 +23,9 @@ config M68K select GENERIC_LIB_ASHLDI3 select GENERIC_LIB_ASHRDI3 select GENERIC_LIB_LSHRDI3 + select GENERIC_LIB_MULDI3 select HAS_IOPORT if PCI || ISA || ATARI_ROM_ISA + select HAVE_ARCH_LIBGCC_H select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER select HAVE_ASM_MODVERSIONS diff --git a/arch/m68k/include/asm/libgcc.h b/arch/m68k/include/asm/libgcc.h new file mode 100644 index 000000000000..1cce6d130d80 --- /dev/null +++ b/arch/m68k/include/asm/libgcc.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_M68K_LIBGCC_H +#define __ASM_M68K_LIBGCC_H + +#ifndef CONFIG_CPU_HAS_NO_MULDIV64 +/* + * For those 68K CPUs that support 64bit multiply define umul_ppm() + * for the common muldi3 libgcc helper function (in lib/muldi3.c). + * CPUs that don't have it (like the original 68000 and ColdFire) + * will fallback to using the C-coded version of umul_ppmm(). + */ +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulu%.l %3,%1:%0" \ + : "=d" ((unsigned long)(w0)), \ + "=d" ((unsigned long)(w1)) \ + : "%0" ((unsigned long)(u)), \ + "dmi" ((unsigned long)(v))) +#endif /* !CONFIG_CPU_HAS_NO_MULDIV64 */ + +#endif /* __ASM_M68K_LIBGCC_H */ diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile index 9158688e6cc6..15278a95259e 100644 --- a/arch/m68k/lib/Makefile +++ b/arch/m68k/lib/Makefile @@ -4,7 +4,7 @@ # Makefile for m68k-specific library files.. # -lib-y := muldi3.o memcpy.o memset.o memmove.o +lib-y := memcpy.o memset.o memmove.o lib-$(CONFIG_MMU) += uaccess.o lib-$(CONFIG_CPU_HAS_NO_MULDIV64) += mulsi3.o divsi3.o udivsi3.o diff --git a/arch/m68k/lib/muldi3.c b/arch/m68k/lib/muldi3.c deleted file mode 100644 index 5012a9b218c7..000000000000 --- a/arch/m68k/lib/muldi3.c +++ /dev/null @@ -1,97 +0,0 @@ -/* muldi3.c extracted from gcc-2.7.2.3/libgcc2.c and - gcc-2.7.2.3/longlong.h which is: */ -/* Copyright (C) 1989, 1992, 1993, 1994, 1995 Free Software Foundation, Inc. - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. */ - -#include -#include -#include - -#ifdef CONFIG_CPU_HAS_NO_MULDIV64 - -#define SI_TYPE_SIZE 32 -#define __BITS4 (SI_TYPE_SIZE / 4) -#define __ll_B (1L << (SI_TYPE_SIZE / 2)) -#define __ll_lowpart(t) ((USItype) (t) % __ll_B) -#define __ll_highpart(t) ((USItype) (t) / __ll_B) - -#define umul_ppmm(w1, w0, u, v) \ - do { \ - USItype __x0, __x1, __x2, __x3; \ - USItype __ul, __vl, __uh, __vh; \ - \ - __ul = __ll_lowpart (u); \ - __uh = __ll_highpart (u); \ - __vl = __ll_lowpart (v); \ - __vh = __ll_highpart (v); \ - \ - __x0 = (USItype) __ul * __vl; \ - __x1 = (USItype) __ul * __vh; \ - __x2 = (USItype) __uh * __vl; \ - __x3 = (USItype) __uh * __vh; \ - \ - __x1 += __ll_highpart (__x0);/* this can't give carry */ \ - __x1 += __x2; /* but this indeed can */ \ - if (__x1 < __x2) /* did we get it? */ \ - __x3 += __ll_B; /* yes, add it in the proper pos. */ \ - \ - (w1) = __x3 + __ll_highpart (__x1); \ - (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ - } while (0) - -#else - -#define umul_ppmm(w1, w0, u, v) \ - __asm__ ("mulu%.l %3,%1:%0" \ - : "=d" ((USItype)(w0)), \ - "=d" ((USItype)(w1)) \ - : "%0" ((USItype)(u)), \ - "dmi" ((USItype)(v))) - -#endif - -#define __umulsidi3(u, v) \ - ({DIunion __w; \ - umul_ppmm (__w.s.high, __w.s.low, u, v); \ - __w.ll; }) - -typedef int SItype __mode(SI); -typedef unsigned int USItype __mode(SI); -typedef int DItype __mode(DI); -typedef int word_type __mode(__word__); - -struct DIstruct {SItype high, low;}; - -typedef union -{ - struct DIstruct s; - DItype ll; -} DIunion; - -DItype -__muldi3 (DItype u, DItype v) -{ - DIunion w; - DIunion uu, vv; - - uu.ll = u; - vv.ll = v; - - w.ll = __umulsidi3 (uu.s.low, vv.s.low); - w.s.high += ((USItype) uu.s.low * (USItype) vv.s.high - + (USItype) uu.s.high * (USItype) vv.s.low); - - return w.ll; -} -EXPORT_SYMBOL(__muldi3); diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h index fc388da6a027..0d68f9d6a6a7 100644 --- a/include/linux/libgcc.h +++ b/include/linux/libgcc.h @@ -34,4 +34,8 @@ long long notrace __lshrdi3(long long u, word_type b); long long notrace __muldi3(long long u, long long v); word_type notrace __ucmpdi2(unsigned long long a, unsigned long long b); +#ifdef CONFIG_HAVE_ARCH_LIBGCC_H +#include +#endif + #endif /* __ASM_LIBGCC_H */ -- cgit v1.2.3 From e9705da8472f306b44cbe1992ea2161bb96ece6e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 9 Dec 2024 10:44:42 +0100 Subject: ASoC: dt-bindings: qcom,wcd9335: Drop number of DAIs from the header Number of DAIs in the codec is not really a binding constant, because it could grow, e.g. when we implement missing features. Signed-off-by: Krzysztof Kozlowski Link: https://patch.msgid.link/20241209094442.38900-2-krzysztof.kozlowski@linaro.org Signed-off-by: Mark Brown --- include/dt-bindings/sound/qcom,wcd9335.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/dt-bindings/sound/qcom,wcd9335.h b/include/dt-bindings/sound/qcom,wcd9335.h index f5e9f1db091e..4fc68aeb9e04 100644 --- a/include/dt-bindings/sound/qcom,wcd9335.h +++ b/include/dt-bindings/sound/qcom,wcd9335.h @@ -10,6 +10,5 @@ #define AIF3_PB 4 #define AIF3_CAP 5 #define AIF4_PB 6 -#define NUM_CODEC_DAIS 7 #endif -- cgit v1.2.3 From 017b76fb8e5b6066f6791e7ad2387deb2c9c9a14 Mon Sep 17 00:00:00 2001 From: Joy Zou Date: Thu, 5 Dec 2024 11:51:12 -0500 Subject: regulator: pca9450: Add PMIC pca9452 support Add the PMIC pca9452 support, which add ldo3 compared with pca9451a. Signed-off-by: Joy Zou Signed-off-by: Frank Li Link: https://patch.msgid.link/20241205-pca9450-v1-4-aab448b74e78@nxp.com Signed-off-by: Mark Brown --- drivers/regulator/pca9450-regulator.c | 29 ++++++++++++++++++++++++++++- include/linux/regulator/pca9450.h | 1 + 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index 0b1f099fdd5e..faa6b79c27d7 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c @@ -815,6 +815,24 @@ static const struct pca9450_regulator_desc pca9451a_regulators[] = { .owner = THIS_MODULE, }, }, + { + .desc = { + .name = "ldo3", + .of_match = of_match_ptr("LDO3"), + .regulators_node = of_match_ptr("regulators"), + .id = PCA9450_LDO3, + .ops = &pca9450_ldo_regulator_ops, + .type = REGULATOR_VOLTAGE, + .n_voltages = PCA9450_LDO3_VOLTAGE_NUM, + .linear_ranges = pca9450_ldo34_volts, + .n_linear_ranges = ARRAY_SIZE(pca9450_ldo34_volts), + .vsel_reg = PCA9450_REG_LDO3CTRL, + .vsel_mask = LDO3OUT_MASK, + .enable_reg = PCA9450_REG_LDO3CTRL, + .enable_mask = LDO3_EN_MASK, + .owner = THIS_MODULE, + }, + }, { .desc = { .name = "ldo4", @@ -916,6 +934,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) pca9450->rcnt = ARRAY_SIZE(pca9450bc_regulators); break; case PCA9450_TYPE_PCA9451A: + case PCA9450_TYPE_PCA9452: regulator_desc = pca9451a_regulators; pca9450->rcnt = ARRAY_SIZE(pca9451a_regulators); break; @@ -943,7 +962,8 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) /* Check your board and dts for match the right pmic */ if (((device_id >> 4) != 0x1 && type == PCA9450_TYPE_PCA9450A) || ((device_id >> 4) != 0x3 && type == PCA9450_TYPE_PCA9450BC) || - ((device_id >> 4) != 0x9 && type == PCA9450_TYPE_PCA9451A)) + ((device_id >> 4) != 0x9 && type == PCA9450_TYPE_PCA9451A) || + ((device_id >> 4) != 0x9 && type == PCA9450_TYPE_PCA9452)) return dev_err_probe(&i2c->dev, -EINVAL, "Device id(%x) mismatched\n", device_id >> 4); @@ -955,6 +975,9 @@ static int pca9450_i2c_probe(struct i2c_client *i2c) r = ®ulator_desc[i]; desc = &r->desc; + if (type == PCA9450_TYPE_PCA9451A && !strcmp(desc->name, "ldo3")) + continue; + config.regmap = pca9450->regmap; config.dev = pca9450->dev; @@ -1043,6 +1066,10 @@ static const struct of_device_id pca9450_of_match[] = { .compatible = "nxp,pca9451a", .data = (void *)PCA9450_TYPE_PCA9451A, }, + { + .compatible = "nxp,pca9452", + .data = (void *)PCA9450_TYPE_PCA9452, + }, { } }; MODULE_DEVICE_TABLE(of, pca9450_of_match); diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 243633c8dceb..b427b5873de1 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -10,6 +10,7 @@ enum pca9450_chip_type { PCA9450_TYPE_PCA9450A = 0, PCA9450_TYPE_PCA9450BC, PCA9450_TYPE_PCA9451A, + PCA9450_TYPE_PCA9452, PCA9450_TYPE_AMOUNT, }; -- cgit v1.2.3 From 2ff913ab3f472321ac1931b663314edd6c211a0c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 5 Dec 2024 16:24:14 -0800 Subject: uprobes: Simplify session consumer tracking In practice, each return_instance will typically contain either zero or one return_consumer, depending on whether it has any uprobe session consumer attached or not. It's highly unlikely that more than one uprobe session consumers will be attached to any given uprobe, so there is no need to optimize for that case. But the way we currently do memory allocation and accounting is by pre-allocating the space for 4 session consumers in contiguous block of memory next to struct return_instance fixed part. This is unnecessarily wasteful. This patch changes this to keep struct return_instance fixed-sized with one pre-allocated return_consumer, while (in a highly unlikely scenario) allowing for more session consumers in a separate dynamically allocated and reallocated array. We also simplify accounting a bit by not maintaining a separate temporary capacity for consumers array, and, instead, relying on krealloc() to be a no-op if underlying memory can accommodate a slightly bigger allocation (but again, it's very uncommon scenario to even have to do this reallocation). All this gets rid of ri_size(), simplifies push_consumer() and removes confusing ri->consumers_cnt re-assignment, while containing this singular preallocated consumer logic contained within a few simple preexisting helpers. Having fixed-sized struct return_instance simplifies and speeds up return_instance reuse that we ultimately add later in this patch set, see follow up patches. Signed-off-by: Andrii Nakryiko Signed-off-by: Ingo Molnar Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Oleg Nesterov Link: https://lore.kernel.org/r/20241206002417.3295533-2-andrii@kernel.org --- include/linux/uprobes.h | 10 +++++-- kernel/events/uprobes.c | 72 +++++++++++++++++++++++++------------------------ 2 files changed, 45 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index e0a4c2082245..1d449978558d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -154,12 +154,18 @@ struct return_instance { unsigned long stack; /* stack pointer */ unsigned long orig_ret_vaddr; /* original return address */ bool chained; /* true, if instance is nested */ - int consumers_cnt; + int cons_cnt; /* total number of session consumers */ struct return_instance *next; /* keep as stack */ struct rcu_head rcu; - struct return_consumer consumers[] __counted_by(consumers_cnt); + /* singular pre-allocated return_consumer instance for common case */ + struct return_consumer consumer; + /* + * extra return_consumer instances for rare cases of multiple session consumers, + * contains (cons_cnt - 1) elements + */ + struct return_consumer *extra_consumers; } ____cacheline_aligned; enum rp_check { diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index daf4314961ab..6beac52239be 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1899,6 +1899,7 @@ static struct return_instance *free_ret_instance(struct return_instance *ri, boo hprobe_finalize(&ri->hprobe, hstate); } + kfree(ri->extra_consumers); kfree_rcu(ri, rcu); return next; } @@ -1974,32 +1975,34 @@ static struct uprobe_task *get_utask(void) return current->utask; } -static size_t ri_size(int consumers_cnt) -{ - struct return_instance *ri; - - return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt; -} - -#define DEF_CNT 4 - static struct return_instance *alloc_return_instance(void) { struct return_instance *ri; - ri = kzalloc(ri_size(DEF_CNT), GFP_KERNEL); + ri = kzalloc(sizeof(*ri), GFP_KERNEL); if (!ri) return ZERO_SIZE_PTR; - ri->consumers_cnt = DEF_CNT; return ri; } static struct return_instance *dup_return_instance(struct return_instance *old) { - size_t size = ri_size(old->consumers_cnt); + struct return_instance *ri; + + ri = kmemdup(old, sizeof(*ri), GFP_KERNEL); + + if (unlikely(old->cons_cnt > 1)) { + ri->extra_consumers = kmemdup(old->extra_consumers, + sizeof(ri->extra_consumers[0]) * (old->cons_cnt - 1), + GFP_KERNEL); + if (!ri->extra_consumers) { + kfree(ri); + return NULL; + } + } - return kmemdup(old, size, GFP_KERNEL); + return ri; } static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask) @@ -2369,25 +2372,28 @@ static struct uprobe *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb return uprobe; } -static struct return_instance* -push_consumer(struct return_instance *ri, int idx, __u64 id, __u64 cookie) +static struct return_instance *push_consumer(struct return_instance *ri, __u64 id, __u64 cookie) { + struct return_consumer *ric; + if (unlikely(ri == ZERO_SIZE_PTR)) return ri; - if (unlikely(idx >= ri->consumers_cnt)) { - struct return_instance *old_ri = ri; - - ri->consumers_cnt += DEF_CNT; - ri = krealloc(old_ri, ri_size(old_ri->consumers_cnt), GFP_KERNEL); - if (!ri) { - kfree(old_ri); + if (unlikely(ri->cons_cnt > 0)) { + ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL); + if (!ric) { + kfree(ri->extra_consumers); + kfree_rcu(ri, rcu); return ZERO_SIZE_PTR; } + ri->extra_consumers = ric; } - ri->consumers[idx].id = id; - ri->consumers[idx].cookie = cookie; + ric = likely(ri->cons_cnt == 0) ? &ri->consumer : &ri->extra_consumers[ri->cons_cnt - 1]; + ric->id = id; + ric->cookie = cookie; + + ri->cons_cnt++; return ri; } @@ -2395,14 +2401,17 @@ static struct return_consumer * return_consumer_find(struct return_instance *ri, int *iter, int id) { struct return_consumer *ric; - int idx = *iter; + int idx; - for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) { + for (idx = *iter; idx < ri->cons_cnt; idx++) + { + ric = likely(idx == 0) ? &ri->consumer : &ri->extra_consumers[idx - 1]; if (ric->id == id) { *iter = idx + 1; return ric; } } + return NULL; } @@ -2416,7 +2425,6 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) struct uprobe_consumer *uc; bool has_consumers = false, remove = true; struct return_instance *ri = NULL; - int push_idx = 0; current->utask->auprobe = &uprobe->arch; @@ -2441,18 +2449,12 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) ri = alloc_return_instance(); if (session) - ri = push_consumer(ri, push_idx++, uc->id, cookie); + ri = push_consumer(ri, uc->id, cookie); } current->utask->auprobe = NULL; - if (!ZERO_OR_NULL_PTR(ri)) { - /* - * The push_idx value has the final number of return consumers, - * and ri->consumers_cnt has number of allocated consumers. - */ - ri->consumers_cnt = push_idx; + if (!ZERO_OR_NULL_PTR(ri)) prepare_uretprobe(uprobe, regs, ri); - } if (remove && has_consumers) { down_read(&uprobe->register_rwsem); -- cgit v1.2.3 From 8622e45b5da17e777e0e45f16296072494452318 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 5 Dec 2024 16:24:17 -0800 Subject: uprobes: Reuse return_instances between multiple uretprobes within task MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of constantly allocating and freeing very short-lived struct return_instance, reuse it as much as possible within current task. For that, store a linked list of reusable return_instances within current->utask. The only complication is that ri_timer() might be still processing such return_instance. And so while the main uretprobe processing logic might be already done with return_instance and would be OK to immediately reuse it for the next uretprobe instance, it's not correct to unconditionally reuse it just like that. Instead we make sure that ri_timer() can't possibly be processing it by using seqcount_t, with ri_timer() being "a writer", while free_ret_instance() being "a reader". If, after we unlink return instance from utask->return_instances list, we know that ri_timer() hasn't gotten to processing utask->return_instances yet, then we can be sure that immediate return_instance reuse is OK, and so we put it onto utask->ri_pool for future (potentially, almost immediate) reuse. This change shows improvements both in single CPU performance (by avoiding relatively expensive kmalloc/free combon) and in terms of multi-CPU scalability, where you can see that per-CPU throughput doesn't decline as steeply with increased number of CPUs (which were previously attributed to kmalloc()/free() through profiling): BASELINE (latest perf/core) =========================== uretprobe-nop ( 1 cpus): 1.898 ± 0.002M/s ( 1.898M/s/cpu) uretprobe-nop ( 2 cpus): 3.574 ± 0.011M/s ( 1.787M/s/cpu) uretprobe-nop ( 3 cpus): 5.279 ± 0.066M/s ( 1.760M/s/cpu) uretprobe-nop ( 4 cpus): 6.824 ± 0.047M/s ( 1.706M/s/cpu) uretprobe-nop ( 5 cpus): 8.339 ± 0.060M/s ( 1.668M/s/cpu) uretprobe-nop ( 6 cpus): 9.812 ± 0.047M/s ( 1.635M/s/cpu) uretprobe-nop ( 7 cpus): 11.030 ± 0.048M/s ( 1.576M/s/cpu) uretprobe-nop ( 8 cpus): 12.453 ± 0.126M/s ( 1.557M/s/cpu) uretprobe-nop (10 cpus): 14.838 ± 0.044M/s ( 1.484M/s/cpu) uretprobe-nop (12 cpus): 17.092 ± 0.115M/s ( 1.424M/s/cpu) uretprobe-nop (14 cpus): 19.576 ± 0.022M/s ( 1.398M/s/cpu) uretprobe-nop (16 cpus): 22.264 ± 0.015M/s ( 1.391M/s/cpu) uretprobe-nop (24 cpus): 33.534 ± 0.078M/s ( 1.397M/s/cpu) uretprobe-nop (32 cpus): 43.262 ± 0.127M/s ( 1.352M/s/cpu) uretprobe-nop (40 cpus): 53.252 ± 0.080M/s ( 1.331M/s/cpu) uretprobe-nop (48 cpus): 55.778 ± 0.045M/s ( 1.162M/s/cpu) uretprobe-nop (56 cpus): 56.850 ± 0.227M/s ( 1.015M/s/cpu) uretprobe-nop (64 cpus): 62.005 ± 0.077M/s ( 0.969M/s/cpu) uretprobe-nop (72 cpus): 66.445 ± 0.236M/s ( 0.923M/s/cpu) uretprobe-nop (80 cpus): 68.353 ± 0.180M/s ( 0.854M/s/cpu) THIS PATCHSET (on top of latest perf/core) ========================================== uretprobe-nop ( 1 cpus): 2.253 ± 0.004M/s ( 2.253M/s/cpu) uretprobe-nop ( 2 cpus): 4.281 ± 0.003M/s ( 2.140M/s/cpu) uretprobe-nop ( 3 cpus): 6.389 ± 0.027M/s ( 2.130M/s/cpu) uretprobe-nop ( 4 cpus): 8.328 ± 0.005M/s ( 2.082M/s/cpu) uretprobe-nop ( 5 cpus): 10.353 ± 0.001M/s ( 2.071M/s/cpu) uretprobe-nop ( 6 cpus): 12.513 ± 0.010M/s ( 2.086M/s/cpu) uretprobe-nop ( 7 cpus): 14.525 ± 0.017M/s ( 2.075M/s/cpu) uretprobe-nop ( 8 cpus): 15.633 ± 0.013M/s ( 1.954M/s/cpu) uretprobe-nop (10 cpus): 19.532 ± 0.011M/s ( 1.953M/s/cpu) uretprobe-nop (12 cpus): 21.405 ± 0.009M/s ( 1.784M/s/cpu) uretprobe-nop (14 cpus): 24.857 ± 0.020M/s ( 1.776M/s/cpu) uretprobe-nop (16 cpus): 26.466 ± 0.018M/s ( 1.654M/s/cpu) uretprobe-nop (24 cpus): 40.513 ± 0.222M/s ( 1.688M/s/cpu) uretprobe-nop (32 cpus): 54.180 ± 0.074M/s ( 1.693M/s/cpu) uretprobe-nop (40 cpus): 66.100 ± 0.082M/s ( 1.652M/s/cpu) uretprobe-nop (48 cpus): 70.544 ± 0.068M/s ( 1.470M/s/cpu) uretprobe-nop (56 cpus): 74.494 ± 0.055M/s ( 1.330M/s/cpu) uretprobe-nop (64 cpus): 79.317 ± 0.029M/s ( 1.239M/s/cpu) uretprobe-nop (72 cpus): 84.875 ± 0.020M/s ( 1.179M/s/cpu) uretprobe-nop (80 cpus): 92.318 ± 0.224M/s ( 1.154M/s/cpu) For reference, with uprobe-nop we hit the following throughput: uprobe-nop (80 cpus): 143.485 ± 0.035M/s ( 1.794M/s/cpu) So now uretprobe stays a bit closer to that performance. Signed-off-by: Andrii Nakryiko Signed-off-by: Ingo Molnar Cc: Masami Hiramatsu Cc: Peter Zijlstra Cc: Oleg Nesterov Link: https://lore.kernel.org/r/20241206002417.3295533-5-andrii@kernel.org --- include/linux/uprobes.h | 6 +++- kernel/events/uprobes.c | 83 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 75 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 1d449978558d..b1df7d792fa1 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -16,6 +16,7 @@ #include #include #include +#include struct uprobe; struct vm_area_struct; @@ -124,6 +125,10 @@ struct uprobe_task { unsigned int depth; struct return_instance *return_instances; + struct return_instance *ri_pool; + struct timer_list ri_timer; + seqcount_t ri_seqcount; + union { struct { struct arch_uprobe_task autask; @@ -137,7 +142,6 @@ struct uprobe_task { }; struct uprobe *active_uprobe; - struct timer_list ri_timer; unsigned long xol_vaddr; struct arch_uprobe *auprobe; diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 2345aeb63d3b..1af950208c2b 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1888,8 +1888,34 @@ unsigned long uprobe_get_trap_addr(struct pt_regs *regs) return instruction_pointer(regs); } -static void free_ret_instance(struct return_instance *ri, bool cleanup_hprobe) +static void ri_pool_push(struct uprobe_task *utask, struct return_instance *ri) { + ri->cons_cnt = 0; + ri->next = utask->ri_pool; + utask->ri_pool = ri; +} + +static struct return_instance *ri_pool_pop(struct uprobe_task *utask) +{ + struct return_instance *ri = utask->ri_pool; + + if (likely(ri)) + utask->ri_pool = ri->next; + + return ri; +} + +static void ri_free(struct return_instance *ri) +{ + kfree(ri->extra_consumers); + kfree_rcu(ri, rcu); +} + +static void free_ret_instance(struct uprobe_task *utask, + struct return_instance *ri, bool cleanup_hprobe) +{ + unsigned seq; + if (cleanup_hprobe) { enum hprobe_state hstate; @@ -1897,8 +1923,22 @@ static void free_ret_instance(struct return_instance *ri, bool cleanup_hprobe) hprobe_finalize(&ri->hprobe, hstate); } - kfree(ri->extra_consumers); - kfree_rcu(ri, rcu); + /* + * At this point return_instance is unlinked from utask's + * return_instances list and this has become visible to ri_timer(). + * If seqcount now indicates that ri_timer's return instance + * processing loop isn't active, we can return ri into the pool of + * to-be-reused return instances for future uretprobes. If ri_timer() + * happens to be running right now, though, we fallback to safety and + * just perform RCU-delated freeing of ri. + */ + if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) { + /* immediate reuse of ri without RCU GP is OK */ + ri_pool_push(utask, ri); + } else { + /* we might be racing with ri_timer(), so play it safe */ + ri_free(ri); + } } /* @@ -1920,7 +1960,15 @@ void uprobe_free_utask(struct task_struct *t) ri = utask->return_instances; while (ri) { ri_next = ri->next; - free_ret_instance(ri, true /* cleanup_hprobe */); + free_ret_instance(utask, ri, true /* cleanup_hprobe */); + ri = ri_next; + } + + /* free_ret_instance() above might add to ri_pool, so this loop should come last */ + ri = utask->ri_pool; + while (ri) { + ri_next = ri->next; + ri_free(ri); ri = ri_next; } @@ -1943,8 +1991,12 @@ static void ri_timer(struct timer_list *timer) /* RCU protects return_instance from freeing. */ guard(rcu)(); + write_seqcount_begin(&utask->ri_seqcount); + for_each_ret_instance_rcu(ri, utask->return_instances) hprobe_expire(&ri->hprobe, false); + + write_seqcount_end(&utask->ri_seqcount); } static struct uprobe_task *alloc_utask(void) @@ -1956,6 +2008,7 @@ static struct uprobe_task *alloc_utask(void) return NULL; timer_setup(&utask->ri_timer, ri_timer, 0); + seqcount_init(&utask->ri_seqcount); return utask; } @@ -1975,10 +2028,14 @@ static struct uprobe_task *get_utask(void) return current->utask; } -static struct return_instance *alloc_return_instance(void) +static struct return_instance *alloc_return_instance(struct uprobe_task *utask) { struct return_instance *ri; + ri = ri_pool_pop(utask); + if (ri) + return ri; + ri = kzalloc(sizeof(*ri), GFP_KERNEL); if (!ri) return ZERO_SIZE_PTR; @@ -2119,7 +2176,7 @@ static void cleanup_return_instances(struct uprobe_task *utask, bool chained, rcu_assign_pointer(utask->return_instances, ri_next); utask->depth--; - free_ret_instance(ri, true /* cleanup_hprobe */); + free_ret_instance(utask, ri, true /* cleanup_hprobe */); ri = ri_next; } } @@ -2186,7 +2243,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs, return; free: - kfree(ri); + ri_free(ri); } /* Prepare to single-step probed instruction out of line. */ @@ -2385,8 +2442,7 @@ static struct return_instance *push_consumer(struct return_instance *ri, __u64 i if (unlikely(ri->cons_cnt > 0)) { ric = krealloc(ri->extra_consumers, sizeof(*ric) * ri->cons_cnt, GFP_KERNEL); if (!ric) { - kfree(ri->extra_consumers); - kfree_rcu(ri, rcu); + ri_free(ri); return ZERO_SIZE_PTR; } ri->extra_consumers = ric; @@ -2428,8 +2484,9 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) struct uprobe_consumer *uc; bool has_consumers = false, remove = true; struct return_instance *ri = NULL; + struct uprobe_task *utask = current->utask; - current->utask->auprobe = &uprobe->arch; + utask->auprobe = &uprobe->arch; list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) { bool session = uc->handler && uc->ret_handler; @@ -2449,12 +2506,12 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs) continue; if (!ri) - ri = alloc_return_instance(); + ri = alloc_return_instance(utask); if (session) ri = push_consumer(ri, uc->id, cookie); } - current->utask->auprobe = NULL; + utask->auprobe = NULL; if (!ZERO_OR_NULL_PTR(ri)) prepare_uretprobe(uprobe, regs, ri); @@ -2554,7 +2611,7 @@ void uprobe_handle_trampoline(struct pt_regs *regs) hprobe_finalize(&ri->hprobe, hstate); /* We already took care of hprobe, no need to waste more time on that. */ - free_ret_instance(ri, false /* !cleanup_hprobe */); + free_ret_instance(utask, ri, false /* !cleanup_hprobe */); ri = ri_next; } while (ri != next_chain); } while (!valid); -- cgit v1.2.3 From 6057b90ecc84f232dd32a047a086a4c4c271765f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 3 Dec 2024 10:04:40 -0800 Subject: perf/core: Export perf_exclude_event() While at it, rename the same function in s390 cpum_sf PMU. Signed-off-by: Namhyung Kim Signed-off-by: Ingo Molnar Tested-by: Ravi Bangoria Reviewed-by: Ravi Bangoria Acked-by: Thomas Richter Link: https://lore.kernel.org/r/20241203180441.1634709-2-namhyung@kernel.org --- arch/s390/kernel/perf_cpum_sf.c | 6 +++--- include/linux/perf_event.h | 6 ++++++ kernel/events/core.c | 3 +-- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1e99514fb7ae..5f60248cb468 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -981,7 +981,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } -/* perf_exclude_event() - Filter event +/* perf_event_exclude() - Filter event * @event: The perf event * @regs: pt_regs structure * @sde_regs: Sample-data-entry (sde) regs structure @@ -990,7 +990,7 @@ static void cpumsf_pmu_disable(struct pmu *pmu) * * Return non-zero if the event shall be excluded. */ -static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, +static int perf_event_exclude(struct perf_event *event, struct pt_regs *regs, struct perf_sf_sde_regs *sde_regs) { if (event->attr.exclude_user && user_mode(regs)) @@ -1073,7 +1073,7 @@ static int perf_push_sample(struct perf_event *event, data.tid_entry.pid = basic->hpp & LPP_PID_MASK; overflow = 0; - if (perf_exclude_event(event, ®s, sde_regs)) + if (perf_event_exclude(event, ®s, sde_regs)) goto out; if (perf_event_overflow(event, &data, ®s)) { overflow = 1; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index bf831b1485ff..8333f132f4a9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1690,6 +1690,8 @@ static inline int perf_allow_tracepoint(struct perf_event_attr *attr) return security_perf_event_open(attr, PERF_SECURITY_TRACEPOINT); } +extern int perf_exclude_event(struct perf_event *event, struct pt_regs *regs); + extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, @@ -1895,6 +1897,10 @@ static inline u64 perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) +{ + return 0; +} #endif #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) diff --git a/kernel/events/core.c b/kernel/events/core.c index e9f698c08dc1..b2bc67791f84 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -10039,8 +10039,7 @@ static void perf_swevent_event(struct perf_event *event, u64 nr, perf_swevent_overflow(event, 0, data, regs); } -static int perf_exclude_event(struct perf_event *event, - struct pt_regs *regs) +int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { if (event->hw.state & PERF_HES_STOPPED) return 1; -- cgit v1.2.3 From df8e78607d4795806b59564ba7a3e2e125d119fc Mon Sep 17 00:00:00 2001 From: Bastien Curutchet Date: Wed, 4 Dec 2024 10:43:16 +0100 Subject: memory: ti-aemif: Export aemif_*_cs_timings() Export the aemif_set_cs_timing() and aemif_check_cs_timing() symbols so they can be used by other drivers Add a mutex to protect the CS configuration register from concurrent accesses between the AEMIF and its 'children'. Signed-off-by: Bastien Curutchet Reviewed-by: Miquel Raynal Link: https://lore.kernel.org/r/20241204094319.1050826-7-bastien.curutchet@bootlin.com [krzysztof: wrap aemif_set_cs_timings() at 80-char] Signed-off-by: Krzysztof Kozlowski --- drivers/memory/ti-aemif.c | 36 ++++++++++++++---------------------- include/linux/memory/ti-aemif.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 22 deletions(-) create mode 100644 include/linux/memory/ti-aemif.h (limited to 'include') diff --git a/drivers/memory/ti-aemif.c b/drivers/memory/ti-aemif.c index 83fb308a831b..c8b83c9edbd5 100644 --- a/drivers/memory/ti-aemif.c +++ b/drivers/memory/ti-aemif.c @@ -13,7 +13,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -79,26 +81,6 @@ #define CONFIG_MASK (EW(EW_MAX) | SSTROBE(SSTROBE_MAX) | ASIZE_MAX) -/** - * struct aemif_cs_timings: structure to hold CS timings - * @wstrobe: write strobe width, number of cycles - 1 - * @rstrobe: read strobe width, number of cycles - 1 - * @wsetup: write setup width, number of cycles - 1 - * @whold: write hold width, number of cycles - 1 - * @rsetup: read setup width, number of cycles - 1 - * @rhold: read hold width, number of cycles - 1 - * @ta: minimum turn around time, number of cycles - 1 - */ -struct aemif_cs_timings { - u32 wstrobe; - u32 rstrobe; - u32 wsetup; - u32 whold; - u32 rsetup; - u32 rhold; - u32 ta; -}; - /** * struct aemif_cs_data: structure to hold CS parameters * @timings: timings configuration @@ -123,6 +105,7 @@ struct aemif_cs_data { * @num_cs: number of assigned chip-selects * @cs_offset: start number of cs nodes * @cs_data: array of chip-select settings + * @config_cs_lock: lock used to access CS configuration */ struct aemif_device { void __iomem *base; @@ -131,6 +114,7 @@ struct aemif_device { u8 num_cs; int cs_offset; struct aemif_cs_data cs_data[NUM_CS]; + struct mutex config_cs_lock; }; /** @@ -139,7 +123,7 @@ struct aemif_device { * * @return: 0 if the timing configuration is valid, negative error number otherwise. */ -static int aemif_check_cs_timings(struct aemif_cs_timings *timings) +int aemif_check_cs_timings(struct aemif_cs_timings *timings) { if (timings->ta > TA_MAX) return -EINVAL; @@ -164,6 +148,7 @@ static int aemif_check_cs_timings(struct aemif_cs_timings *timings) return 0; } +EXPORT_SYMBOL_GPL(aemif_check_cs_timings); /** * aemif_set_cs_timings() - Set the timing configuration of a given chip select. @@ -173,7 +158,8 @@ static int aemif_check_cs_timings(struct aemif_cs_timings *timings) * * @return: 0 on success, else negative errno. */ -static int aemif_set_cs_timings(struct aemif_device *aemif, u8 cs, struct aemif_cs_timings *timings) +int aemif_set_cs_timings(struct aemif_device *aemif, u8 cs, + struct aemif_cs_timings *timings) { unsigned int offset; u32 val, set; @@ -195,13 +181,16 @@ static int aemif_set_cs_timings(struct aemif_device *aemif, u8 cs, struct aemif_ offset = A1CR_OFFSET + cs * 4; + mutex_lock(&aemif->config_cs_lock); val = readl(aemif->base + offset); val &= ~TIMINGS_MASK; val |= set; writel(val, aemif->base + offset); + mutex_unlock(&aemif->config_cs_lock); return 0; } +EXPORT_SYMBOL_GPL(aemif_set_cs_timings); /** * aemif_calc_rate - calculate timing data. @@ -257,10 +246,12 @@ static int aemif_config_abus(struct platform_device *pdev, int csnum) if (data->enable_ss) set |= ACR_SSTROBE_MASK; + mutex_lock(&aemif->config_cs_lock); val = readl(aemif->base + offset); val &= ~CONFIG_MASK; val |= set; writel(val, aemif->base + offset); + mutex_unlock(&aemif->config_cs_lock); return aemif_set_cs_timings(aemif, data->cs - aemif->cs_offset, &data->timings); } @@ -399,6 +390,7 @@ static int aemif_probe(struct platform_device *pdev) if (IS_ERR(aemif->base)) return PTR_ERR(aemif->base); + mutex_init(&aemif->config_cs_lock); if (np) { /* * For every controller device node, there is a cs device node diff --git a/include/linux/memory/ti-aemif.h b/include/linux/memory/ti-aemif.h new file mode 100644 index 000000000000..da94a9d985e7 --- /dev/null +++ b/include/linux/memory/ti-aemif.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __MEMORY_TI_AEMIF_H +#define __MEMORY_TI_AEMIF_H + +/** + * struct aemif_cs_timings: structure to hold CS timing configuration + * values are expressed in number of clock cycles - 1 + * @ta: minimum turn around time + * @rhold: read hold width + * @rstrobe: read strobe width + * @rsetup: read setup width + * @whold: write hold width + * @wstrobe: write strobe width + * @wsetup: write setup width + */ +struct aemif_cs_timings { + u32 ta; + u32 rhold; + u32 rstrobe; + u32 rsetup; + u32 whold; + u32 wstrobe; + u32 wsetup; +}; + +struct aemif_device; + +int aemif_set_cs_timings(struct aemif_device *aemif, u8 cs, struct aemif_cs_timings *timings); +int aemif_check_cs_timings(struct aemif_cs_timings *timings); + +#endif // __MEMORY_TI_AEMIF_H -- cgit v1.2.3 From d1fd972914239996dbd15c5142d7f6e09d95a002 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:29 +0000 Subject: ktime: Add us_to_ktime() Add a us_to_ktime() helper to go with ms_to_ktime() and ns_to_ktime(). Signed-off-by: David Howells cc: Thomas Gleixner cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-2-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/linux/ktime.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 3a4e723eae0f..383ed9985802 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -222,6 +222,11 @@ static inline ktime_t ns_to_ktime(u64 ns) return ns; } +static inline ktime_t us_to_ktime(u64 us) +{ + return us * NSEC_PER_USEC; +} + static inline ktime_t ms_to_ktime(u64 ms) { return ms * NSEC_PER_MSEC; -- cgit v1.2.3 From 0e56ebde245e4799ce74d38419426f2a80d39950 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:30 +0000 Subject: rxrpc: Fix handling of received connection abort Fix the handling of a connection abort that we've received. Though the abort is at the connection level, it needs propagating to the calls on that connection. Whilst the propagation bit is performed, the calls aren't then woken up to go and process their termination, and as no further input is forthcoming, they just hang. Also add some tracing for the logging of connection aborts. Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-3-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 25 +++++++++++++++++++++++++ net/rxrpc/conn_event.c | 12 ++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d03e0bd8c028..27c23873c881 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -117,6 +117,7 @@ #define rxrpc_call_poke_traces \ EM(rxrpc_call_poke_abort, "Abort") \ EM(rxrpc_call_poke_complete, "Compl") \ + EM(rxrpc_call_poke_conn_abort, "Conn-abort") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ EM(rxrpc_call_poke_set_timeout, "Set-timo") \ @@ -282,6 +283,7 @@ EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ EM(rxrpc_call_see_connected, "SEE connect ") \ + EM(rxrpc_call_see_conn_abort, "SEE conn-abt") \ EM(rxrpc_call_see_disconnected, "SEE disconn ") \ EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ EM(rxrpc_call_see_input, "SEE input ") \ @@ -981,6 +983,29 @@ TRACE_EVENT(rxrpc_rx_abort, __entry->abort_code) ); +TRACE_EVENT(rxrpc_rx_conn_abort, + TP_PROTO(const struct rxrpc_connection *conn, const struct sk_buff *skb), + + TP_ARGS(conn, skb), + + TP_STRUCT__entry( + __field(unsigned int, conn) + __field(rxrpc_serial_t, serial) + __field(u32, abort_code) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = rxrpc_skb(skb)->hdr.serial; + __entry->abort_code = skb->priority; + ), + + TP_printk("C=%08x ABORT %08x ac=%d", + __entry->conn, + __entry->serial, + __entry->abort_code) + ); + TRACE_EVENT(rxrpc_rx_challenge, TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, u32 version, u32 nonce, u32 min_level), diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 598b4ee389fc..2a1396cd892f 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -63,11 +63,12 @@ int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb, /* * Mark a connection as being remotely aborted. */ -static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn, +static void rxrpc_input_conn_abort(struct rxrpc_connection *conn, struct sk_buff *skb) { - return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, - RXRPC_CALL_REMOTELY_ABORTED); + trace_rxrpc_rx_conn_abort(conn, skb); + rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED, + RXRPC_CALL_REMOTELY_ABORTED); } /* @@ -202,11 +203,14 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) for (i = 0; i < RXRPC_MAXCALLS; i++) { call = conn->channels[i].call; - if (call) + if (call) { + rxrpc_see_call(call, rxrpc_call_see_conn_abort); rxrpc_set_call_completion(call, conn->completion, conn->abort_code, conn->error); + rxrpc_poke_call(call, rxrpc_call_poke_conn_abort); + } } _leave(""); -- cgit v1.2.3 From efa95c32352b2ac7ff09d680144e22c0f25244cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:32 +0000 Subject: rxrpc: Clean up Tx header flags generation handling Clean up the generation of the header flags when building packet headers for transmission: (1) Assemble the flags in a local variable rather than in the txb->flags. (2) Do the flags masking and JUMBO-PACKET setting in one bit of code for both the main header and the jumbo headers. (3) Generate the REQUEST-ACK flag afresh each time. There's a possibility we might want to do jumbo retransmission packets in future. (4) Pass the local flags variable to the rxrpc_tx_data tracepoint rather than the combination of the txb flags and the wire header flags (the latter belong only to the first subpacket). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-5-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 - net/rxrpc/ar-internal.h | 2 +- net/rxrpc/output.c | 18 ++++++++++++------ net/rxrpc/proc.c | 3 +-- 4 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 27c23873c881..62064f63d6eb 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -452,7 +452,6 @@ #define rxrpc_req_ack_traces \ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ - EM(rxrpc_reqack_already_on, "ALREADY-ON") \ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index d0fd37bdcfe9..fcdfbc1d5aaf 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -110,7 +110,7 @@ struct rxrpc_net { atomic_t stat_tx_acks[256]; atomic_t stat_rx_acks[256]; - atomic_t stat_why_req_ack[8]; + atomic_t stat_why_req_ack[7]; atomic_t stat_io_loop; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 85112ea31a39..50d5f2a02458 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -330,6 +330,8 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; + bool last; + u8 flags; _enter("%x,{%d}", txb->seq, txb->len); @@ -339,6 +341,10 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t txb->seq == 1) whdr->userStatus = RXRPC_USERSTATUS_SERVICE_UPGRADE; + txb->flags &= ~RXRPC_REQUEST_ACK; + flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; + last = txb->flags & RXRPC_LAST_PACKET; + /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -346,9 +352,7 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t * service call, lest OpenAFS incorrectly send us an ACK with some * soft-ACKs in it and then never follow up with a proper hard ACK. */ - if (txb->flags & RXRPC_REQUEST_ACK) - why = rxrpc_reqack_already_on; - else if ((txb->flags & RXRPC_LAST_PACKET) && rxrpc_sending_to_client(txb)) + if (last && rxrpc_sending_to_client(txb)) why = rxrpc_reqack_no_srv_last; else if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) why = rxrpc_reqack_ack_lost; @@ -367,15 +371,17 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t rxrpc_inc_stat(call->rxnet, stat_why_req_ack[why]); trace_rxrpc_req_ack(call->debug_id, txb->seq, why); - if (why != rxrpc_reqack_no_srv_last) + if (why != rxrpc_reqack_no_srv_last) { txb->flags |= RXRPC_REQUEST_ACK; + flags |= RXRPC_REQUEST_ACK; + } dont_set_request_ack: - whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; + whdr->flags = flags; whdr->serial = htonl(txb->serial); whdr->cksum = txb->cksum; - trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); } /* diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 263a2251e3d2..3b7e34dd4385 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -519,9 +519,8 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]), atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE])); seq_printf(seq, - "Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n", + "Why-Req-A: acklost=%u mrtt=%u ortt=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), - atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt])); seq_printf(seq, -- cgit v1.2.3 From 8b5823ea437624b53ecf084b6dd582760f110394 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:35 +0000 Subject: rxrpc: Request an ACK on impending Tx stall Set the REQUEST-ACK flag on the DATA packet we're about to send if we're about to stall transmission because the app layer isn't keeping up supplying us with data to transmit. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-8-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 1 + net/rxrpc/ar-internal.h | 2 +- net/rxrpc/output.c | 7 ++++++- net/rxrpc/proc.c | 5 +++-- 4 files changed, 11 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 62064f63d6eb..d86b5f07d292 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -452,6 +452,7 @@ #define rxrpc_req_ack_traces \ EM(rxrpc_reqack_ack_lost, "ACK-LOST ") \ + EM(rxrpc_reqack_app_stall, "APP-STALL ") \ EM(rxrpc_reqack_more_rtt, "MORE-RTT ") \ EM(rxrpc_reqack_no_srv_last, "NO-SRVLAST") \ EM(rxrpc_reqack_old_rtt, "OLD-RTT ") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index fcdfbc1d5aaf..d0fd37bdcfe9 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -110,7 +110,7 @@ struct rxrpc_net { atomic_t stat_tx_acks[256]; atomic_t stat_rx_acks[256]; - atomic_t stat_why_req_ack[7]; + atomic_t stat_why_req_ack[8]; atomic_t stat_io_loop; }; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 50d5f2a02458..b93a5d50be3e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -330,7 +330,7 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; enum rxrpc_req_ack_trace why; struct rxrpc_connection *conn = call->conn; - bool last; + bool last, more; u8 flags; _enter("%x,{%d}", txb->seq, txb->len); @@ -345,6 +345,9 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; last = txb->flags & RXRPC_LAST_PACKET; + more = (!list_is_last(&txb->call_link, &call->tx_buffer) || + !list_empty(&call->tx_sendmsg)); + /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -366,6 +369,8 @@ static void rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_t why = rxrpc_reqack_more_rtt; else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) why = rxrpc_reqack_old_rtt; + else if (!last && !more) + why = rxrpc_reqack_app_stall; else goto dont_set_request_ack; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index cdf32f0d8e0e..ce4d48bdfbe9 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -520,10 +520,11 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]), atomic_read(&rxnet->stat_rx_acks[0])); seq_printf(seq, - "Why-Req-A: acklost=%u mrtt=%u ortt=%u\n", + "Why-Req-A: acklost=%u mrtt=%u ortt=%u stall=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]), atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]), - atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt])); + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]), + atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_app_stall])); seq_printf(seq, "Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n", atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]), -- cgit v1.2.3 From eeaedc5449d9fccf2b56e844a018df9d3720d59e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:37 +0000 Subject: rxrpc: Implement path-MTU probing using padded PING ACKs (RFC8899) Implement path-MTU probing (along the lines of RFC8899) by padding some of the PING ACKs we send. PING ACKs get their own individual responses quite apart from the acking of data (though, as ACKs, they fulfil that role also). The probing concentrates on packet sizes that correspond how many subpackets can be stuffed inside a jumbo packet as jumbo DATA packets are just aggregations of individual DATA packets and can be split easily for retransmission purposes. If we want to perform probing, we advertise this by setting the maximum number of jumbo subpackets to 0 in the ack trailer when we send an ACK and see if the peer is also advertising the service. This is interpreted by non-supporting Rx stacks as an indication that jumbo packets aren't supported. The MTU sizes advertised in the ACK trailer AF_RXRPC transmits are pegged at a maximum of 1444 unless pmtud is supported by both sides. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-10-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 124 +++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 25 +++++++-- net/rxrpc/call_event.c | 5 ++ net/rxrpc/conn_event.c | 17 +++--- net/rxrpc/conn_object.c | 6 +++ net/rxrpc/input.c | 26 +++++---- net/rxrpc/io_thread.c | 6 +++ net/rxrpc/misc.c | 4 +- net/rxrpc/output.c | 67 ++++++++++++++++++----- net/rxrpc/peer_event.c | 104 ++++++++++++++++++++++++++++++++++-- net/rxrpc/peer_object.c | 24 +++++++-- net/rxrpc/proc.c | 9 ++-- net/rxrpc/protocol.h | 13 +++-- net/rxrpc/sysctl.c | 6 ++- net/rxrpc/txbuf.c | 3 +- 15 files changed, 382 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d86b5f07d292..9dcadad88e76 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -364,6 +364,7 @@ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ EM(rxrpc_propose_ack_ping_for_0_retrans, "0-Retrn") \ + EM(rxrpc_propose_ack_ping_for_mtu_probe, "MTUProb") \ EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \ @@ -478,6 +479,11 @@ EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") +#define rxrpc_pmtud_reduce_traces \ + EM(rxrpc_pmtud_reduce_ack, "Ack ") \ + EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ + E_(rxrpc_pmtud_reduce_route, "Route") + /* * Generate enums for tracing information. */ @@ -498,6 +504,7 @@ enum rxrpc_congest_change { rxrpc_congest_changes } __mode(byte); enum rxrpc_conn_trace { rxrpc_conn_traces } __mode(byte); enum rxrpc_local_trace { rxrpc_local_traces } __mode(byte); enum rxrpc_peer_trace { rxrpc_peer_traces } __mode(byte); +enum rxrpc_pmtud_reduce_trace { rxrpc_pmtud_reduce_traces } __mode(byte); enum rxrpc_propose_ack_outcome { rxrpc_propose_ack_outcomes } __mode(byte); enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte); enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte); @@ -534,6 +541,7 @@ rxrpc_congest_changes; rxrpc_congest_modes; rxrpc_conn_traces; rxrpc_local_traces; +rxrpc_pmtud_reduce_traces; rxrpc_propose_ack_traces; rxrpc_receive_traces; rxrpc_recvmsg_traces; @@ -2040,6 +2048,122 @@ TRACE_EVENT(rxrpc_sack, __entry->sack) ); +TRACE_EVENT(rxrpc_pmtud_tx, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(unsigned int, call_debug_id) + __field(rxrpc_serial_t, ping_serial) + __field(unsigned short, pmtud_trial) + __field(unsigned short, pmtud_good) + __field(unsigned short, pmtud_bad) + ), + + TP_fast_assign( + __entry->peer_debug_id = call->peer->debug_id; + __entry->call_debug_id = call->debug_id; + __entry->ping_serial = call->conn->pmtud_probe; + __entry->pmtud_trial = call->peer->pmtud_trial; + __entry->pmtud_good = call->peer->pmtud_good; + __entry->pmtud_bad = call->peer->pmtud_bad; + ), + + TP_printk("P=%08x c=%08x pr=%08x %u-%u-%u", + __entry->peer_debug_id, + __entry->call_debug_id, + __entry->ping_serial, + __entry->pmtud_good, + __entry->pmtud_trial, + __entry->pmtud_bad) + ); + +TRACE_EVENT(rxrpc_pmtud_rx, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial), + + TP_ARGS(conn, resp_serial), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(unsigned int, call_debug_id) + __field(rxrpc_serial_t, ping_serial) + __field(rxrpc_serial_t, resp_serial) + __field(unsigned short, max_data) + __field(u8, jumbo_max) + ), + + TP_fast_assign( + __entry->peer_debug_id = conn->peer->debug_id; + __entry->call_debug_id = conn->pmtud_call; + __entry->ping_serial = conn->pmtud_probe; + __entry->resp_serial = resp_serial; + __entry->max_data = conn->peer->max_data; + __entry->jumbo_max = conn->peer->pmtud_jumbo; + ), + + TP_printk("P=%08x c=%08x pr=%08x rr=%08x max=%u jm=%u", + __entry->peer_debug_id, + __entry->call_debug_id, + __entry->ping_serial, + __entry->resp_serial, + __entry->max_data, + __entry->jumbo_max) + ); + +TRACE_EVENT(rxrpc_pmtud_lost, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t resp_serial), + + TP_ARGS(conn, resp_serial), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(unsigned int, call_debug_id) + __field(rxrpc_serial_t, ping_serial) + __field(rxrpc_serial_t, resp_serial) + ), + + TP_fast_assign( + __entry->peer_debug_id = conn->peer->debug_id; + __entry->call_debug_id = conn->pmtud_call; + __entry->ping_serial = conn->pmtud_probe; + __entry->resp_serial = resp_serial; + ), + + TP_printk("P=%08x c=%08x pr=%08x rr=%08x", + __entry->peer_debug_id, + __entry->call_debug_id, + __entry->ping_serial, + __entry->resp_serial) + ); + +TRACE_EVENT(rxrpc_pmtud_reduce, + TP_PROTO(struct rxrpc_peer *peer, rxrpc_serial_t serial, + unsigned int max_data, enum rxrpc_pmtud_reduce_trace reason), + + TP_ARGS(peer, serial, max_data, reason), + + TP_STRUCT__entry( + __field(unsigned int, peer_debug_id) + __field(rxrpc_serial_t, serial) + __field(unsigned int, max_data) + __field(enum rxrpc_pmtud_reduce_trace, reason) + ), + + TP_fast_assign( + __entry->peer_debug_id = peer->debug_id; + __entry->serial = serial; + __entry->max_data = max_data; + __entry->reason = reason; + ), + + TP_printk("P=%08x %s r=%08x m=%u", + __entry->peer_debug_id, + __print_symbolic(__entry->reason, rxrpc_pmtud_reduce_traces), + __entry->serial, __entry->max_data) + ); + #undef EM #undef E_ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ab8e565cb20b..69e6f4b20bad 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -344,13 +344,25 @@ struct rxrpc_peer { time64_t last_tx_at; /* Last time packet sent here */ seqlock_t service_conn_lock; spinlock_t lock; /* access lock */ - unsigned int if_mtu; /* interface MTU for this peer */ - unsigned int mtu; /* network MTU for this peer */ - unsigned int maxdata; /* data size (MTU - hdrsize) */ - unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ int debug_id; /* debug ID for printks */ struct sockaddr_rxrpc srx; /* remote address */ + /* Path MTU discovery [RFC8899] */ + unsigned int pmtud_trial; /* Current MTU probe size */ + unsigned int pmtud_good; /* Largest working MTU probe we've tried */ + unsigned int pmtud_bad; /* Smallest non-working MTU probe we've tried */ + bool pmtud_lost; /* T if MTU probe was lost */ + bool pmtud_probing; /* T if we have an active probe outstanding */ + bool pmtud_pending; /* T if a call to this peer should send a probe */ + u8 pmtud_jumbo; /* Max jumbo packets for the MTU */ + bool ackr_adv_pmtud; /* T if the peer advertises path-MTU */ + unsigned int ackr_max_data; /* Maximum data advertised by peer */ + seqcount_t mtu_lock; /* Lockless MTU access management */ + unsigned int if_mtu; /* Local interface MTU (- hdrsize) for this peer */ + unsigned int max_data; /* Maximum packet data capacity for this peer */ + unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ + unsigned short tx_seg_max; /* Maximum number of transmissable segments */ + /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 spinlock_t rtt_input_lock; /* RTT lock for input routine */ @@ -531,6 +543,8 @@ struct rxrpc_connection { int debug_id; /* debug ID for printks */ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ + rxrpc_serial_t pmtud_probe; /* Serial of MTU probe (or 0) */ + unsigned int pmtud_call; /* ID of call used for probe */ u32 service_id; /* Service ID, possibly upgraded */ u32 security_level; /* Security level selected */ u8 security_ix; /* security type */ @@ -1155,6 +1169,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) */ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); +void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); int rxrpc_send_abort_packet(struct rxrpc_call *); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); @@ -1166,6 +1181,8 @@ void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); */ void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *); void rxrpc_peer_keepalive_worker(struct work_struct *); +void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial, + bool sendmsg_fail); /* * peer_object.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index c4754cc9b8d4..1d889b6f0366 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -483,6 +483,11 @@ out: rxrpc_disconnect_call(call); if (call->security) call->security->free_call_crypto(call); + } else { + if (skb && + call->peer->ackr_adv_pmtud && + call->peer->pmtud_pending) + rxrpc_send_probe_for_pmtud(call); } if (call->acks_hard_ack != call->tx_bottom) rxrpc_shrink_call_tx_buffer(call); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 2a1396cd892f..f6c02cc44d98 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -92,7 +92,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct rxrpc_acktrailer trailer; size_t len; int ret, ioc; - u32 serial, mtu, call_id, padding; + u32 serial, max_mtu, if_mtu, call_id, padding; _enter("%d", conn->debug_id); @@ -150,8 +150,13 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; case RXRPC_PACKET_TYPE_ACK: - mtu = conn->peer->if_mtu; - mtu -= conn->peer->hdrsize; + if_mtu = conn->peer->if_mtu - conn->peer->hdrsize; + if (conn->peer->ackr_adv_pmtud) { + max_mtu = umax(conn->peer->max_data, rxrpc_rx_mtu); + } else { + if_mtu = umin(1444, if_mtu); + max_mtu = if_mtu; + } pkt.ack.bufferSpace = 0; pkt.ack.maxSkew = htons(skb ? skb->priority : 0); pkt.ack.firstPacket = htonl(chan->last_seq + 1); @@ -159,10 +164,10 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, pkt.ack.serial = htonl(skb ? sp->hdr.serial : 0); pkt.ack.reason = skb ? RXRPC_ACK_DUPLICATE : RXRPC_ACK_IDLE; pkt.ack.nAcks = 0; - trailer.maxMTU = htonl(rxrpc_rx_mtu); - trailer.ifMTU = htonl(mtu); + trailer.maxMTU = htonl(max_mtu); + trailer.ifMTU = htonl(if_mtu); trailer.rwind = htonl(rxrpc_rx_window_size); - trailer.jumbo_max = htonl(rxrpc_rx_jumbo_max); + trailer.jumbo_max = 0; pkt.whdr.flags |= RXRPC_SLOW_START_OK; padding = 0; iov[0].iov_len += sizeof(pkt.ack); diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 694c4df7a1a3..b0627398311b 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -321,6 +321,12 @@ static void rxrpc_clean_up_connection(struct work_struct *work) list_del_init(&conn->proc_link); write_unlock(&rxnet->conn_lock); + if (conn->pmtud_probe) { + trace_rxrpc_pmtud_lost(conn, 0); + conn->peer->pmtud_probing = false; + conn->peer->pmtud_pending = true; + } + rxrpc_purge_queue(&conn->rx_queue); rxrpc_kill_client_conn(conn); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 49e35be7dc13..fd08d813ef29 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -692,8 +692,8 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb struct rxrpc_acktrailer *trailer) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - unsigned int mtu; + struct rxrpc_peer *peer = call->peer; + unsigned int max_data; bool wake = false; u32 rwind = ntohl(trailer->rwind); @@ -706,14 +706,22 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - mtu = umin(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); + if (trailer->jumbo_max == 0) { + /* The peer says it supports pmtu discovery */ + peer->ackr_adv_pmtud = true; + } else { + peer->ackr_adv_pmtud = false; + } + + max_data = ntohl(trailer->maxMTU); + peer->ackr_max_data = max_data; - peer = call->peer; - if (mtu < peer->maxdata) { - spin_lock(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock(&peer->lock); + if (max_data < peer->max_data) { + trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_data, + rxrpc_pmtud_reduce_ack); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_data; + write_seqcount_end(&peer->mtu_lock); } if (wake) diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 7af5adf53b25..bd6d4f5e97b4 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -364,6 +364,12 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, if (sp->hdr.callNumber == 0) return rxrpc_input_conn_packet(conn, skb); + /* Deal with path MTU discovery probing. */ + if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK && + conn->pmtud_probe && + after_eq(sp->ack.acked_serial, conn->pmtud_probe)) + rxrpc_input_probe_for_pmtud(conn, sp->ack.acked_serial, false); + /* Call-bound packets are routed by connection channel. */ channel = sp->hdr.cid & RXRPC_CHANNELMASK; chan = &conn->channels[channel]; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 657cf35089a6..8fcc8139d771 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -46,13 +46,13 @@ unsigned int rxrpc_rx_window_size = 255; * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet * made by gluing normal packets together that we're willing to handle. */ -unsigned int rxrpc_rx_mtu = 5692; +unsigned int rxrpc_rx_mtu = RXRPC_JUMBO(46); /* * The maximum number of fragments in a received jumbo packet that we tell the * sender that we're willing to handle. */ -unsigned int rxrpc_rx_jumbo_max = 4; +unsigned int rxrpc_rx_jumbo_max = 46; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY /* diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f8bb5250e849..a91be871ad96 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -82,10 +82,9 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); - unsigned int qsize, sack, wrap, to; + unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; rxrpc_seq_t window, wtop; int rsize; - u32 mtu, jmax; u8 *filler = txb->kvec[2].iov_base; u8 *sackp = txb->kvec[1].iov_base; @@ -132,16 +131,22 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, ack->reason = RXRPC_ACK_IDLE; } - mtu = call->peer->if_mtu; - mtu -= call->peer->hdrsize; - jmax = rxrpc_rx_jumbo_max; qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); txb->ack_rwind = rsize; - trailer->maxMTU = htonl(rxrpc_rx_mtu); - trailer->ifMTU = htonl(mtu); + + if_mtu = call->peer->if_mtu - call->peer->hdrsize; + if (call->peer->ackr_adv_pmtud) { + max_mtu = umax(call->peer->max_data, rxrpc_rx_mtu); + } else { + if_mtu = umin(if_mtu, 1444); + max_mtu = if_mtu; + } + + trailer->maxMTU = htonl(max_mtu); + trailer->ifMTU = htonl(if_mtu); trailer->rwind = htonl(rsize); - trailer->jumbo_max = htonl(jmax); + trailer->jumbo_max = 0; /* Advertise pmtu discovery */ } /* @@ -176,7 +181,7 @@ no_slot: * Transmit an ACK packet. */ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - int nr_kv) + int nr_kv, enum rxrpc_propose_ack_trace why) { struct kvec *kv = call->local->kvec; struct rxrpc_wire_header *whdr = kv[0].iov_base; @@ -209,13 +214,16 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, txb->len); - rxrpc_local_dont_fragment(conn->local, false); + rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, rxrpc_tx_point_call_ack); + if (why == rxrpc_propose_ack_ping_for_mtu_probe && + ret == -EMSGSIZE) + rxrpc_input_probe_for_pmtud(conn, txb->serial, true); } else { trace_rxrpc_tx_packet(call->debug_id, whdr, rxrpc_tx_point_call_ack); @@ -225,6 +233,13 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t if (txb->flags & RXRPC_REQUEST_ACK) call->peer->rtt_last_req = now; rxrpc_set_keepalive(call, now); + if (why == rxrpc_propose_ack_ping_for_mtu_probe) { + call->peer->pmtud_pending = false; + call->peer->pmtud_probing = true; + call->conn->pmtud_probe = txb->serial; + call->conn->pmtud_call = call->debug_id; + trace_rxrpc_pmtud_tx(call); + } } rxrpc_tx_backoff(call, ret); } @@ -254,21 +269,45 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_fill_out_ack(call, txb, ack_reason, serial); + /* Extend a path MTU probe ACK. */ nr_kv = txb->nr_kvec; kv[0] = txb->kvec[0]; kv[1] = txb->kvec[1]; kv[2] = txb->kvec[2]; - // TODO: Extend a path MTU probe ACK + if (why == rxrpc_propose_ack_ping_for_mtu_probe) { + size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header); + + if (txb->len > probe_mtu) + goto skip; + while (txb->len < probe_mtu) { + size_t part = umin(probe_mtu - txb->len, PAGE_SIZE); + + kv[nr_kv].iov_base = page_address(ZERO_PAGE(0)); + kv[nr_kv].iov_len = part; + txb->len += part; + nr_kv++; + } + } call->ackr_nr_unacked = 0; atomic_set(&call->ackr_nr_consumed, 0); clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); trace_rxrpc_send_ack(call, why, ack_reason, serial); - rxrpc_send_ack_packet(call, txb, nr_kv); + rxrpc_send_ack_packet(call, txb, nr_kv, why); +skip: rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); } +/* + * Send an ACK probe for path MTU discovery. + */ +void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call) +{ + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_mtu_probe); +} + /* * Send an ABORT call packet. */ @@ -501,7 +540,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (len >= sizeof(struct rxrpc_wire_header) + call->peer->maxdata) { + if (len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; } else { @@ -548,7 +587,7 @@ done: RX_USER_ABORT, ret); } - _leave(" = %d [%u]", ret, call->peer->maxdata); + _leave(" = %d [%u]", ret, call->peer->max_data); return ret; } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 552ba84a255c..8fc9464a960c 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -102,6 +102,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, */ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { + unsigned int max_data; + /* wind down the local interface MTU */ if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) peer->if_mtu = mtu; @@ -120,11 +122,17 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) } } - if (mtu < peer->mtu) { - spin_lock(&peer->lock); - peer->mtu = mtu; - peer->maxdata = peer->mtu - peer->hdrsize; - spin_unlock(&peer->lock); + max_data = max_t(int, mtu - peer->hdrsize, 500); + if (max_data < peer->max_data) { + if (peer->pmtud_good > max_data) + peer->pmtud_good = max_data; + if (peer->pmtud_bad > max_data + 1) + peer->pmtud_bad = max_data + 1; + + trace_rxrpc_pmtud_reduce(peer, 0, max_data, rxrpc_pmtud_reduce_icmp); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_data; + write_seqcount_end(&peer->mtu_lock); } } @@ -347,3 +355,89 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) _leave(""); } + +/* + * Do path MTU probing. + */ +void rxrpc_input_probe_for_pmtud(struct rxrpc_connection *conn, rxrpc_serial_t acked_serial, + bool sendmsg_fail) +{ + struct rxrpc_peer *peer = conn->peer; + unsigned int max_data = peer->max_data; + int good, trial, bad, jumbo; + + good = peer->pmtud_good; + trial = peer->pmtud_trial; + bad = peer->pmtud_bad; + if (good >= bad - 1) { + conn->pmtud_probe = 0; + peer->pmtud_lost = false; + return; + } + + if (!peer->pmtud_probing) + goto send_probe; + + if (sendmsg_fail || after(acked_serial, conn->pmtud_probe)) { + /* Retry a lost probe. */ + if (!peer->pmtud_lost) { + trace_rxrpc_pmtud_lost(conn, acked_serial); + conn->pmtud_probe = 0; + peer->pmtud_lost = true; + goto send_probe; + } + + /* The probed size didn't seem to get through. */ + bad = trial; + peer->pmtud_bad = bad; + if (bad <= max_data) + max_data = bad - 1; + } else { + /* It did get through. */ + good = trial; + peer->pmtud_good = good; + if (good > max_data) + max_data = good; + } + + max_data = umin(max_data, peer->ackr_max_data); + if (max_data != peer->max_data) { + preempt_disable(); + write_seqcount_begin(&peer->mtu_lock); + peer->max_data = max_data; + write_seqcount_end(&peer->mtu_lock); + preempt_enable(); + } + + jumbo = max_data + sizeof(struct rxrpc_jumbo_header); + jumbo /= RXRPC_JUMBO_SUBPKTLEN; + peer->pmtud_jumbo = jumbo; + + trace_rxrpc_pmtud_rx(conn, acked_serial); + conn->pmtud_probe = 0; + peer->pmtud_lost = false; + + if (good < RXRPC_JUMBO(2) && bad > RXRPC_JUMBO(2)) + trial = RXRPC_JUMBO(2); + else if (good < RXRPC_JUMBO(4) && bad > RXRPC_JUMBO(4)) + trial = RXRPC_JUMBO(4); + else if (good < RXRPC_JUMBO(3) && bad > RXRPC_JUMBO(3)) + trial = RXRPC_JUMBO(3); + else if (good < RXRPC_JUMBO(6) && bad > RXRPC_JUMBO(6)) + trial = RXRPC_JUMBO(6); + else if (good < RXRPC_JUMBO(5) && bad > RXRPC_JUMBO(5)) + trial = RXRPC_JUMBO(5); + else if (good < RXRPC_JUMBO(8) && bad > RXRPC_JUMBO(8)) + trial = RXRPC_JUMBO(8); + else if (good < RXRPC_JUMBO(7) && bad > RXRPC_JUMBO(7)) + trial = RXRPC_JUMBO(7); + else + trial = (good + bad) / 2; + peer->pmtud_trial = trial; + + if (good >= bad) + return; + +send_probe: + peer->pmtud_pending = true; +} diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 49dcda67a0d5..80ef6f06d512 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -162,6 +162,11 @@ static void rxrpc_assess_MTU_size(struct rxrpc_local *local, #endif peer->if_mtu = 1500; + if (peer->max_data < peer->if_mtu - peer->hdrsize) { + trace_rxrpc_pmtud_reduce(peer, 0, peer->if_mtu - peer->hdrsize, + rxrpc_pmtud_reduce_route); + peer->max_data = peer->if_mtu - peer->hdrsize; + } memset(&fl, 0, sizeof(fl)); switch (peer->srx.transport.family) { @@ -199,8 +204,16 @@ static void rxrpc_assess_MTU_size(struct rxrpc_local *local, } peer->if_mtu = dst_mtu(dst); + peer->hdrsize += dst->header_len + dst->trailer_len; + peer->tx_seg_max = dst->dev->gso_max_segs; dst_release(dst); + peer->max_data = umin(RXRPC_JUMBO(1), peer->if_mtu - peer->hdrsize); + peer->pmtud_good = 500; + peer->pmtud_bad = peer->if_mtu - peer->hdrsize + 1; + peer->pmtud_trial = umin(peer->max_data, peer->pmtud_bad - 1); + peer->pmtud_pending = true; + _leave(" [if_mtu %u]", peer->if_mtu); } @@ -223,6 +236,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); spin_lock_init(&peer->rtt_input_lock); + seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); rxrpc_peer_init_rtt(peer); @@ -242,9 +256,7 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, unsigned long hash_key) { peer->hash_key = hash_key; - rxrpc_assess_MTU_size(local, peer); - peer->mtu = peer->if_mtu; - peer->rtt_last_req = ktime_get_real(); + switch (peer->srx.transport.family) { case AF_INET: @@ -268,7 +280,11 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, } peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; + peer->max_data = peer->if_mtu - peer->hdrsize; + + rxrpc_assess_MTU_size(local, peer); + + peer->rtt_last_req = ktime_get_real(); } /* diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index ce4d48bdfbe9..44722c226064 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -283,9 +283,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) { seq_puts(seq, - "Proto Local " - " Remote " - " Use SST MTU LastUse RTT RTO\n" + "Proto Local Remote Use SST Maxd LastUse RTT RTO\n" ); return 0; } @@ -298,13 +296,12 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, - "UDP %-47.47s %-47.47s %3u" - " %3u %5u %6llus %8u %8u\n", + "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8u %8u\n", lbuff, rbuff, refcount_read(&peer->ref), peer->cong_ssthresh, - peer->mtu, + peer->max_data, now - peer->last_tx_at, peer->srtt_us >> 3, peer->rto_us); diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h index 4fe6b4d20ada..42f70e4636f8 100644 --- a/net/rxrpc/protocol.h +++ b/net/rxrpc/protocol.h @@ -92,11 +92,16 @@ struct rxrpc_jumbo_header { /* * The maximum number of subpackets that can possibly fit in a UDP packet is: * - * ((max_IP - IP_hdr - UDP_hdr) / RXRPC_JUMBO_SUBPKTLEN) + 1 - * = ((65535 - 28 - 28) / 1416) + 1 - * = 46 non-terminal packets and 1 terminal packet. + * (max_UDP - wirehdr + jumbohdr) / (jumbohdr + 1412) + * = ((65535 - 28 + 4) / 1416) + * = 45 non-terminal packets and 1 terminal packet. */ -#define RXRPC_MAX_NR_JUMBO 47 +#define RXRPC_MAX_NR_JUMBO 46 + +/* Size of a jumbo packet with N subpackets, excluding UDP+IP */ +#define RXRPC_JUMBO(N) ((int)sizeof(struct rxrpc_wire_header) + \ + RXRPC_JUMBO_DATALEN + \ + ((N) - 1) * RXRPC_JUMBO_SUBPKTLEN) /*****************************************************************************/ /* diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 9bf9a1f6e4cb..46a20cf4c402 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -11,6 +11,8 @@ #include "ar-internal.h" static struct ctl_table_header *rxrpc_sysctl_reg_table; +static const unsigned int rxrpc_rx_mtu_min = 500; +static const unsigned int rxrpc_jumbo_max = RXRPC_MAX_NR_JUMBO; static const unsigned int four = 4; static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1; static const unsigned int n_65535 = 65535; @@ -115,7 +117,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = (void *)SYSCTL_ONE, + .extra1 = (void *)&rxrpc_rx_mtu_min, .extra2 = (void *)&n_65535, }, { @@ -125,7 +127,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)SYSCTL_ONE, - .extra2 = (void *)&four, + .extra2 = (void *)&rxrpc_jumbo_max, }, }; diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index c3913d8a50d3..2a4291617d40 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -179,7 +179,8 @@ static void rxrpc_free_txbuf(struct rxrpc_txbuf *txb) trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 0, rxrpc_txbuf_free); for (i = 0; i < txb->nr_kvec; i++) - if (txb->kvec[i].iov_base) + if (txb->kvec[i].iov_base && + !is_zero_pfn(page_to_pfn(virt_to_page(txb->kvec[i].iov_base)))) page_frag_free(txb->kvec[i].iov_base); kfree(txb); atomic_dec(&rxrpc_nr_txbuf); -- cgit v1.2.3 From 149d002bee706f51772bd320cda90c922844bb0e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:40 +0000 Subject: rxrpc: Add a tracepoint to show variables pertinent to jumbo packet size Add a tracepoint to be called right before packets are transmitted for the first time that shows variable values that are pertinent to how many subpackets will be added to a jumbo DATA packet. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-13-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 41 +++++++++++++++++++++++++++++++++++++++++ net/rxrpc/call_event.c | 2 ++ 2 files changed, 43 insertions(+) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 9dcadad88e76..71f07e726a90 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -903,6 +903,47 @@ TRACE_EVENT(rxrpc_txqueue, __entry->tx_winsize) ); +TRACE_EVENT(rxrpc_transmit, + TP_PROTO(struct rxrpc_call *call, int space), + + TP_ARGS(call, space), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, seq) + __field(u16, space) + __field(u16, tx_winsize) + __field(u16, cong_cwnd) + __field(u16, cong_extra) + __field(u16, in_flight) + __field(u16, prepared) + __field(u16, pmtud_jumbo) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->seq = call->tx_bottom; + __entry->space = space; + __entry->tx_winsize = call->tx_winsize; + __entry->cong_cwnd = call->cong_cwnd; + __entry->cong_extra = call->cong_extra; + __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->in_flight = call->tx_top - call->acks_hard_ack; + __entry->pmtud_jumbo = call->peer->pmtud_jumbo; + ), + + TP_printk("c=%08x q=%08x sp=%u tw=%u cw=%u+%u pr=%u if=%u pj=%u", + __entry->call, + __entry->seq, + __entry->space, + __entry->tx_winsize, + __entry->cong_cwnd, + __entry->cong_extra, + __entry->prepared, + __entry->in_flight, + __entry->pmtud_jumbo) + ); + TRACE_EVENT(rxrpc_rx_data, TP_PROTO(unsigned int call, rxrpc_seq_t seq, rxrpc_serial_t serial, u8 flags), diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 3379adfaaf65..1f716f09d441 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -259,6 +259,8 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) if (list_empty(&call->tx_sendmsg)) break; + trace_rxrpc_transmit(call, space); + spin_lock(&call->tx_lock); do { txb = list_first_entry(&call->tx_sendmsg, -- cgit v1.2.3 From 9e3cccd176b5ec6ff78693287fb03097e453e69c Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:41 +0000 Subject: rxrpc: Fix CPU time starvation in I/O thread Starvation can happen in the rxrpc I/O thread because it goes back to the top of the I/O loop after it does any one thing without trying to give any other connection or call CPU time. Also, because it processes one call packet at a time, it tries to do the retransmission loop after each ACK without checking to see if there are other ACKs already in the queue that can update the SACK state. Fix this by: (1) Add a received-packet queue on each call. (2) Distribute packets from the master Rx queue to the individual call, conn and error queues and 'poking' calls to add them to the attend queue first thing in the I/O thread. (3) Go through all the attention-seeking connections and calls before going back to the top of the I/O thread. Each queue is extracted as a whole and then gone through so that new additions to insert themselves into the queue. (4) Make the call event handler go through all the packets currently on the call's rx_queue before transmitting and retransmitting DATA packets. (5) Drop the skb argument from the call event handler as this is now replaced with the rx_queue. Instead, keep track of whether we received a packet or an ACK for the tests that used to rely on that. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-14-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 3 ++ net/rxrpc/ar-internal.h | 10 ++++- net/rxrpc/call_accept.c | 2 +- net/rxrpc/call_event.c | 34 +++++++------- net/rxrpc/call_object.c | 2 + net/rxrpc/conn_client.c | 12 ++--- net/rxrpc/input.c | 2 +- net/rxrpc/io_thread.c | 104 ++++++++++++++++++++++--------------------- net/rxrpc/peer_event.c | 2 +- 9 files changed, 96 insertions(+), 75 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 71f07e726a90..28fa7be31ff8 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -120,6 +120,7 @@ EM(rxrpc_call_poke_conn_abort, "Conn-abort") \ EM(rxrpc_call_poke_error, "Error") \ EM(rxrpc_call_poke_idle, "Idle") \ + EM(rxrpc_call_poke_rx_packet, "Rx-packet") \ EM(rxrpc_call_poke_set_timeout, "Set-timo") \ EM(rxrpc_call_poke_start, "Start") \ EM(rxrpc_call_poke_timer, "Timer") \ @@ -128,6 +129,7 @@ #define rxrpc_skb_traces \ EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_call_rx, "GET call-rx ") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ EM(rxrpc_skb_get_last_nack, "GET last-nack") \ @@ -139,6 +141,7 @@ EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_call_rx, "PUT call-rx ") \ EM(rxrpc_skb_put_conn_secured, "PUT conn-secd") \ EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 4386b2e6cca5..55cc68dd1b40 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -705,6 +705,7 @@ struct rxrpc_call { /* Received data tracking */ struct sk_buff_head recvmsg_queue; /* Queue of packets ready for recvmsg() */ + struct sk_buff_head rx_queue; /* Queue of packets for this call to receive */ struct sk_buff_head rx_oos_queue; /* Queue of out of sequence packets */ rxrpc_seq_t rx_highest_seq; /* Higest sequence number received */ @@ -906,7 +907,7 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); -bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); +bool rxrpc_input_call_event(struct rxrpc_call *call); /* * call_object.c @@ -1352,6 +1353,13 @@ static inline bool after_eq(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) +{ + rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); + __skb_queue_tail(&call->rx_queue, skb); + rxrpc_poke_call(call, rxrpc_call_poke_rx_packet); +} + /* * debug tracing */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0f5a1d77b890..a6776b1604ba 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -408,7 +408,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local, } _leave(" = %p{%d}", call, call->debug_id); - rxrpc_input_call_event(call, skb); + rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); return true; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1f716f09d441..ef47de3f41c6 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -324,10 +324,11 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) /* * Handle retransmission and deferred ACK/abort generation. */ -bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) +bool rxrpc_input_call_event(struct rxrpc_call *call) { + struct sk_buff *skb; ktime_t now, t; - bool resend = false; + bool resend = false, did_receive = false, saw_ack = false; s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); @@ -337,9 +338,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)], call->events); - if (__rxrpc_call_is_complete(call)) - goto out; - /* Handle abort request locklessly, vs rxrpc_propose_abort(). */ abort_code = smp_load_acquire(&call->send_abort); if (abort_code) { @@ -348,11 +346,21 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) goto out; } - if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) - goto out; + while ((skb = __skb_dequeue(&call->rx_queue))) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (__rxrpc_call_is_complete(call) || + skb->mark == RXRPC_SKB_MARK_ERROR) { + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + goto out; + } + + saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - if (skb) rxrpc_input_call_packet(call, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + did_receive = true; + } /* If we see our async-event poke, check for timeout trippage. */ now = ktime_get_real(); @@ -418,12 +426,8 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_keepalive); } - if (skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) - rxrpc_congestion_degrade(call); - } + if (saw_ack) + rxrpc_congestion_degrade(call); if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) rxrpc_send_initial_ping(call); @@ -494,7 +498,7 @@ out: if (call->security) call->security->free_call_crypto(call); } else { - if (skb && + if (did_receive && call->peer->ackr_adv_pmtud && call->peer->pmtud_pending) rxrpc_send_probe_for_pmtud(call); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0df647d1d3a2..c026f16f891e 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -148,6 +148,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, INIT_LIST_HEAD(&call->attend_link); INIT_LIST_HEAD(&call->tx_sendmsg); INIT_LIST_HEAD(&call->tx_buffer); + skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->recvmsg_queue); skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); @@ -536,6 +537,7 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) static void rxrpc_cleanup_ring(struct rxrpc_call *call) { rxrpc_purge_queue(&call->recvmsg_queue); + rxrpc_purge_queue(&call->rx_queue); rxrpc_purge_queue(&call->rx_oos_queue); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 86fb18bcd188..706631e6ac2f 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -508,16 +508,18 @@ static void rxrpc_activate_channels(struct rxrpc_bundle *bundle) void rxrpc_connect_client_calls(struct rxrpc_local *local) { struct rxrpc_call *call; + LIST_HEAD(new_client_calls); - while ((call = list_first_entry_or_null(&local->new_client_calls, - struct rxrpc_call, wait_link)) - ) { + spin_lock(&local->client_call_lock); + list_splice_tail_init(&local->new_client_calls, &new_client_calls); + spin_unlock(&local->client_call_lock); + + while ((call = list_first_entry_or_null(&new_client_calls, + struct rxrpc_call, wait_link))) { struct rxrpc_bundle *bundle = call->bundle; - spin_lock(&local->client_call_lock); list_move_tail(&call->wait_link, &bundle->waiting_calls); rxrpc_see_call(call, rxrpc_call_see_waiting_call); - spin_unlock(&local->client_call_lock); if (rxrpc_bundle_has_space(bundle)) rxrpc_activate_channels(bundle); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8398fa10ee8d..96fe005c5e81 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1124,5 +1124,5 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) break; } - rxrpc_input_call_event(call, skb); + rxrpc_input_call_event(call); } diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index bd6d4f5e97b4..bc678a299bd8 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -338,7 +338,6 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; unsigned int channel; - bool ret; if (sp->hdr.securityIndex != conn->security_ix) return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security, @@ -425,9 +424,9 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, peer_srx, skb); } - ret = rxrpc_input_call_event(call, skb); + rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); - return ret; + return true; } /* @@ -444,6 +443,8 @@ int rxrpc_io_thread(void *data) ktime_t now; #endif bool should_stop; + LIST_HEAD(conn_attend_q); + LIST_HEAD(call_attend_q); complete(&local->io_thread_ready); @@ -454,43 +455,25 @@ int rxrpc_io_thread(void *data) for (;;) { rxrpc_inc_stat(local->rxnet, stat_io_loop); - /* Deal with connections that want immediate attention. */ - conn = list_first_entry_or_null(&local->conn_attend_q, - struct rxrpc_connection, - attend_link); - if (conn) { - spin_lock_bh(&local->lock); - list_del_init(&conn->attend_link); - spin_unlock_bh(&local->lock); - - rxrpc_input_conn_event(conn, NULL); - rxrpc_put_connection(conn, rxrpc_conn_put_poke); - continue; + /* Inject a delay into packets if requested. */ +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + now = ktime_get_real(); + while ((skb = skb_peek(&local->rx_delay_queue))) { + if (ktime_before(now, skb->tstamp)) + break; + skb = skb_dequeue(&local->rx_delay_queue); + skb_queue_tail(&local->rx_queue, skb); } +#endif - if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, - &local->client_conn_flags)) - rxrpc_discard_expired_client_conns(local); - - /* Deal with calls that want immediate attention. */ - if ((call = list_first_entry_or_null(&local->call_attend_q, - struct rxrpc_call, - attend_link))) { - spin_lock_bh(&local->lock); - list_del_init(&call->attend_link); - spin_unlock_bh(&local->lock); - - trace_rxrpc_call_poked(call); - rxrpc_input_call_event(call, NULL); - rxrpc_put_call(call, rxrpc_call_put_poke); - continue; + if (!skb_queue_empty(&local->rx_queue)) { + spin_lock_irq(&local->rx_queue.lock); + skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); + spin_unlock_irq(&local->rx_queue.lock); } - if (!list_empty(&local->new_client_calls)) - rxrpc_connect_client_calls(local); - - /* Process received packets and errors. */ - if ((skb = __skb_dequeue(&rx_queue))) { + /* Distribute packets and errors. */ + while ((skb = __skb_dequeue(&rx_queue))) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: @@ -514,27 +497,46 @@ int rxrpc_io_thread(void *data) rxrpc_free_skb(skb, rxrpc_skb_put_unknown); break; } - continue; } - /* Inject a delay into packets if requested. */ -#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY - now = ktime_get_real(); - while ((skb = skb_peek(&local->rx_delay_queue))) { - if (ktime_before(now, skb->tstamp)) - break; - skb = skb_dequeue(&local->rx_delay_queue); - skb_queue_tail(&local->rx_queue, skb); + /* Deal with connections that want immediate attention. */ + spin_lock_bh(&local->lock); + list_splice_tail_init(&local->conn_attend_q, &conn_attend_q); + spin_unlock_bh(&local->lock); + + while ((conn = list_first_entry_or_null(&conn_attend_q, + struct rxrpc_connection, + attend_link))) { + spin_lock_bh(&local->lock); + list_del_init(&conn->attend_link); + spin_unlock_bh(&local->lock); + rxrpc_input_conn_event(conn, NULL); + rxrpc_put_connection(conn, rxrpc_conn_put_poke); } -#endif - if (!skb_queue_empty(&local->rx_queue)) { - spin_lock_irq(&local->rx_queue.lock); - skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); - spin_unlock_irq(&local->rx_queue.lock); - continue; + if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, + &local->client_conn_flags)) + rxrpc_discard_expired_client_conns(local); + + /* Deal with calls that want immediate attention. */ + spin_lock_bh(&local->lock); + list_splice_tail_init(&local->call_attend_q, &call_attend_q); + spin_unlock_bh(&local->lock); + + while ((call = list_first_entry_or_null(&call_attend_q, + struct rxrpc_call, + attend_link))) { + spin_lock_bh(&local->lock); + list_del_init(&call->attend_link); + spin_unlock_bh(&local->lock); + trace_rxrpc_call_poked(call); + rxrpc_input_call_event(call); + rxrpc_put_call(call, rxrpc_call_put_poke); } + if (!list_empty(&local->new_client_calls)) + rxrpc_connect_client_calls(local); + set_current_state(TASK_INTERRUPTIBLE); should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 8fc9464a960c..ff30e0c05507 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -224,7 +224,7 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb, rxrpc_see_call(call, rxrpc_call_see_distribute_error); rxrpc_set_call_completion(call, compl, 0, -err); - rxrpc_input_call_event(call, skb); + rxrpc_input_call_event(call); spin_lock(&peer->lock); } -- cgit v1.2.3 From b341a0263b1b804d329f864c2dc24815364510ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:46 +0000 Subject: rxrpc: Implement progressive transmission queue struct We need to scan the buffers in the transmission queue occasionally when processing ACKs, but the transmission queue is currently a linked list of transmission buffers which, when we eventually expand the Tx window to 8192 packets will be very slow to walk. Instead, pull the fields we need to examine a lot (last sent time, retransmitted flag) into a new struct rxrpc_txqueue and make each one hold an array of 32 or 64 packets. The transmission queue is then a list of these structs, each pointing to a contiguous set of packets. Scanning is then a lot faster as the flags and timestamps are concentrated in the CPU dcache. The transmission timestamps are stored as a number of microseconds from a base ktime to reduce memory requirements. This should be fine provided we manage to transmit an entire buffer within an hour. This will make implementing RACK-TLP [RFC8985] easier as it will be less costly to scan the transmission buffers. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-19-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 98 +++++++++++++++++---- net/rxrpc/ar-internal.h | 47 ++++++++-- net/rxrpc/call_event.c | 204 +++++++++++++++++++++++++++---------------- net/rxrpc/call_object.c | 38 ++++---- net/rxrpc/input.c | 72 +++++++++++---- net/rxrpc/output.c | 163 +++++++++++++++++----------------- net/rxrpc/sendmsg.c | 69 ++++++++++++--- net/rxrpc/txbuf.c | 41 +-------- 8 files changed, 467 insertions(+), 265 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 28fa7be31ff8..e6cf9ec940aa 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -297,7 +297,6 @@ #define rxrpc_txqueue_traces \ EM(rxrpc_txqueue_await_reply, "AWR") \ - EM(rxrpc_txqueue_dequeue, "DEQ") \ EM(rxrpc_txqueue_end, "END") \ EM(rxrpc_txqueue_queue, "QUE") \ EM(rxrpc_txqueue_queue_last, "QLS") \ @@ -482,6 +481,19 @@ EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") +#define rxrpc_tq_traces \ + EM(rxrpc_tq_alloc, "ALLOC") \ + EM(rxrpc_tq_cleaned, "CLEAN") \ + EM(rxrpc_tq_decant, "DCNT ") \ + EM(rxrpc_tq_decant_advance, "DCNT>") \ + EM(rxrpc_tq_queue, "QUEUE") \ + EM(rxrpc_tq_queue_dup, "QUE!!") \ + EM(rxrpc_tq_rotate, "ROT ") \ + EM(rxrpc_tq_rotate_and_free, "ROT-F") \ + EM(rxrpc_tq_rotate_and_keep, "ROT-K") \ + EM(rxrpc_tq_transmit, "XMIT ") \ + E_(rxrpc_tq_transmit_advance, "XMIT>") + #define rxrpc_pmtud_reduce_traces \ EM(rxrpc_pmtud_reduce_ack, "Ack ") \ EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ @@ -518,6 +530,7 @@ enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); +enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); @@ -554,6 +567,7 @@ rxrpc_rtt_tx_traces; rxrpc_sack_traces; rxrpc_skb_traces; rxrpc_timer_traces; +rxrpc_tq_traces; rxrpc_tx_points; rxrpc_txbuf_traces; rxrpc_txqueue_traces; @@ -881,7 +895,7 @@ TRACE_EVENT(rxrpc_txqueue, __field(rxrpc_seq_t, acks_hard_ack) __field(rxrpc_seq_t, tx_bottom) __field(rxrpc_seq_t, tx_top) - __field(rxrpc_seq_t, tx_prepared) + __field(rxrpc_seq_t, send_top) __field(int, tx_winsize) ), @@ -891,7 +905,7 @@ TRACE_EVENT(rxrpc_txqueue, __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_bottom = call->tx_bottom; __entry->tx_top = call->tx_top; - __entry->tx_prepared = call->tx_prepared; + __entry->send_top = call->send_top; __entry->tx_winsize = call->tx_winsize; ), @@ -902,14 +916,14 @@ TRACE_EVENT(rxrpc_txqueue, __entry->acks_hard_ack, __entry->tx_top - __entry->tx_bottom, __entry->tx_top - __entry->acks_hard_ack, - __entry->tx_prepared - __entry->tx_bottom, + __entry->send_top - __entry->tx_top, __entry->tx_winsize) ); TRACE_EVENT(rxrpc_transmit, - TP_PROTO(struct rxrpc_call *call, int space), + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t send_top, int space), - TP_ARGS(call, space), + TP_ARGS(call, send_top, space), TP_STRUCT__entry( __field(unsigned int, call) @@ -925,12 +939,12 @@ TRACE_EVENT(rxrpc_transmit, TP_fast_assign( __entry->call = call->debug_id; - __entry->seq = call->tx_bottom; + __entry->seq = call->tx_top + 1; __entry->space = space; __entry->tx_winsize = call->tx_winsize; __entry->cong_cwnd = call->cong_cwnd; __entry->cong_extra = call->cong_extra; - __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->prepared = send_top - call->tx_bottom; __entry->in_flight = call->tx_top - call->acks_hard_ack; __entry->pmtud_jumbo = call->peer->pmtud_jumbo; ), @@ -947,6 +961,32 @@ TRACE_EVENT(rxrpc_transmit, __entry->pmtud_jumbo) ); +TRACE_EVENT(rxrpc_tx_rotate, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_seq_t to), + + TP_ARGS(call, seq, to), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, seq) + __field(rxrpc_seq_t, to) + __field(rxrpc_seq_t, top) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->seq = seq; + __entry->to = to; + __entry->top = call->tx_top; + ), + + TP_printk("c=%08x q=%08x-%08x-%08x", + __entry->call, + __entry->seq, + __entry->to, + __entry->top) + ); + TRACE_EVENT(rxrpc_rx_data, TP_PROTO(unsigned int call, rxrpc_seq_t seq, rxrpc_serial_t serial, u8 flags), @@ -1621,10 +1661,11 @@ TRACE_EVENT(rxrpc_drop_ack, ); TRACE_EVENT(rxrpc_retransmit, - TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, ktime_t expiry), + TP_PROTO(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_txbuf *txb, ktime_t expiry), - TP_ARGS(call, seq, serial, expiry), + TP_ARGS(call, req, txb, expiry), TP_STRUCT__entry( __field(unsigned int, call) @@ -1635,8 +1676,8 @@ TRACE_EVENT(rxrpc_retransmit, TP_fast_assign( __entry->call = call->debug_id; - __entry->seq = seq; - __entry->serial = serial; + __entry->seq = req->seq; + __entry->serial = txb->serial; __entry->expiry = expiry; ), @@ -1714,9 +1755,9 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; __entry->hard_ack = call->acks_hard_ack; - __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->prepared = call->send_top - call->tx_bottom; __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); - __entry->has_data = !list_empty(&call->tx_sendmsg); + __entry->has_data = call->tx_bottom != call->tx_top; ), TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", @@ -2024,6 +2065,33 @@ TRACE_EVENT(rxrpc_txbuf, __entry->ref) ); +TRACE_EVENT(rxrpc_tq, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, + rxrpc_seq_t seq, enum rxrpc_tq_trace trace), + + TP_ARGS(call, tq, seq, trace), + + TP_STRUCT__entry( + __field(unsigned int, call_debug_id) + __field(rxrpc_seq_t, qbase) + __field(rxrpc_seq_t, seq) + __field(enum rxrpc_tq_trace, trace) + ), + + TP_fast_assign( + __entry->call_debug_id = call->debug_id; + __entry->qbase = tq ? tq->qbase : call->tx_qbase; + __entry->seq = seq; + __entry->trace = trace; + ), + + TP_printk("c=%08x bq=%08x q=%08x %s", + __entry->call_debug_id, + __entry->qbase, + __entry->seq, + __print_symbolic(__entry->trace, rxrpc_tq_traces)) + ); + TRACE_EVENT(rxrpc_poke_call, TP_PROTO(struct rxrpc_call *call, bool busy, enum rxrpc_call_poke_trace what), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 84efa21f176c..bcce4862b0b7 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -30,6 +30,7 @@ struct rxrpc_crypt { struct key_preparsed_payload; struct rxrpc_connection; struct rxrpc_txbuf; +struct rxrpc_txqueue; /* * Mark applied to socket buffers in skb->mark. skb->priority is used @@ -691,13 +692,17 @@ struct rxrpc_call { unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ + /* Sendmsg data tracking. */ + rxrpc_seq_t send_top; /* Highest Tx slot filled by sendmsg. */ + struct rxrpc_txqueue *send_queue; /* Queue that sendmsg is writing into */ + /* Transmitted data tracking. */ spinlock_t tx_lock; /* Transmit queue lock */ - struct list_head tx_sendmsg; /* Sendmsg prepared packets */ - struct list_head tx_buffer; /* Buffer of transmissible packets */ + struct rxrpc_txqueue *tx_queue; /* Start of transmission buffers */ + struct rxrpc_txqueue *tx_qtail; /* End of transmission buffers */ + rxrpc_seq_t tx_qbase; /* First slot in tx_queue */ rxrpc_seq_t tx_bottom; /* First packet in buffer */ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ - rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ u8 tx_winsize; /* Maximum size of Tx window */ @@ -815,9 +820,6 @@ struct rxrpc_send_params { * Buffer of data to be output as a packet. */ struct rxrpc_txbuf { - struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ - struct list_head tx_link; /* Link in live Enc queue or Tx queue */ - ktime_t last_sent; /* Time at which last transmitted */ refcount_t ref; rxrpc_seq_t seq; /* Sequence number of this packet */ rxrpc_serial_t serial; /* Last serial number transmitted with */ @@ -849,6 +851,36 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) return !rxrpc_sending_to_server(txb); } +/* + * Transmit queue element, including RACK [RFC8985] per-segment metadata. The + * transmission timestamp is in usec from the base. + */ +struct rxrpc_txqueue { + /* Start with the members we want to prefetch. */ + struct rxrpc_txqueue *next; + ktime_t xmit_ts_base; + rxrpc_seq_t qbase; + + /* The arrays we want to pack into as few cache lines as possible. */ + struct { +#define RXRPC_NR_TXQUEUE BITS_PER_LONG +#define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) + struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; + unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; + } ____cacheline_aligned; +}; + +/* + * Data transmission request. + */ +struct rxrpc_send_data_req { + ktime_t now; /* Current time */ + struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */ + rxrpc_seq_t seq; /* Sequence of first data */ + int n; /* Number of DATA packets to glue into jumbo */ + bool did_send; /* T if did actually send */ +}; + #include /* @@ -905,7 +937,6 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); -void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); bool rxrpc_input_call_event(struct rxrpc_call *call); @@ -1191,10 +1222,10 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why); void rxrpc_send_probe_for_pmtud(struct rxrpc_call *call); int rxrpc_send_abort_packet(struct rxrpc_call *); +void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req); void rxrpc_send_conn_abort(struct rxrpc_connection *conn); void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); -void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n); /* * peer_event.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index ef47de3f41c6..90e3d9395675 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -62,57 +62,85 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); } +/* + * Retransmit one or more packets. + */ +static void rxrpc_retransmit_data(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + ktime_t rto) +{ + struct rxrpc_txqueue *tq = req->tq; + unsigned int ix = req->seq & RXRPC_TXQ_MASK; + struct rxrpc_txbuf *txb = tq->bufs[ix]; + ktime_t xmit_ts, resend_at; + + _enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id); + + xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); + resend_at = ktime_add(xmit_ts, rto); + trace_rxrpc_retransmit(call, req, txb, + ktime_sub(resend_at, req->now)); + + txb->flags |= RXRPC_TXBUF_RESENT; + rxrpc_send_data_packet(call, req); + rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); + + req->tq = NULL; + req->n = 0; + req->did_send = true; + req->now = ktime_get_real(); +} + /* * Perform retransmission of NAK'd and unack'd packets. */ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) { + struct rxrpc_send_data_req req = { + .now = ktime_get_real(), + }; struct rxrpc_ackpacket *ack = NULL; struct rxrpc_skb_priv *sp; + struct rxrpc_txqueue *tq; struct rxrpc_txbuf *txb; - rxrpc_seq_t transmitted = call->tx_transmitted; + rxrpc_seq_t transmitted = call->tx_transmitted, seq; ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - ktime_t resend_at = KTIME_MAX, now, delay; + ktime_t resend_at = KTIME_MAX, delay; bool unacked = false, did_send = false; - unsigned int i; + unsigned int qix; _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); - now = ktime_get_real(); - - if (list_empty(&call->tx_buffer)) + if (call->tx_bottom == call->tx_top) goto no_resend; trace_rxrpc_resend(call, ack_skb); - txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); + tq = call->tx_queue; + seq = call->tx_bottom; - /* Scan the soft ACK table without dropping the lock and resend any - * explicitly NAK'd packets. - */ + /* Scan the soft ACK table and resend any explicitly NAK'd packets. */ if (ack_skb) { sp = rxrpc_skb(ack_skb); ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - for (i = 0; i < sp->ack.nr_acks; i++) { - rxrpc_seq_t seq; + for (int i = 0; i < sp->ack.nr_acks; i++) { + rxrpc_seq_t aseq; if (ack->acks[i] & 1) continue; - seq = sp->ack.first_ack + i; - if (after(txb->seq, transmitted)) - break; - if (after(txb->seq, seq)) - continue; /* A new hard ACK probably came in */ - list_for_each_entry_from(txb, &call->tx_buffer, call_link) { - if (txb->seq == seq) - goto found_txb; - } - goto no_further_resend; + aseq = sp->ack.first_ack + i; + while (after_eq(aseq, tq->qbase + RXRPC_NR_TXQUEUE)) + tq = tq->next; + seq = aseq; + qix = seq - tq->qbase; + txb = tq->bufs[qix]; + if (after(seq, transmitted)) + goto no_further_resend; - found_txb: - resend_at = ktime_add(txb->last_sent, rto); + resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); + resend_at = ktime_add(resend_at, rto); if (after(txb->serial, call->acks_highest_serial)) { - if (ktime_after(resend_at, now) && + if (ktime_after(resend_at, req.now) && ktime_before(resend_at, next_resend)) next_resend = resend_at; continue; /* Ack point not yet reached */ @@ -120,17 +148,13 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); - trace_rxrpc_retransmit(call, txb->seq, txb->serial, - ktime_sub(resend_at, now)); - - txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_transmit_data(call, txb, 1); - did_send = true; - now = ktime_get_real(); + req.tq = tq; + req.seq = seq; + req.n = 1; + rxrpc_retransmit_data(call, &req, rto); - if (list_is_last(&txb->call_link, &call->tx_buffer)) + if (after_eq(seq, call->tx_top)) goto no_further_resend; - txb = list_next_entry(txb, call_link); } } @@ -139,35 +163,43 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) * ACK'd or NACK'd in due course, so don't worry about it here; here we * need to consider retransmitting anything beyond that point. */ - if (after_eq(call->acks_prev_seq, call->tx_transmitted)) + seq = call->acks_prev_seq; + if (after_eq(seq, call->tx_transmitted)) goto no_further_resend; + seq++; - list_for_each_entry_from(txb, &call->tx_buffer, call_link) { - resend_at = ktime_add(txb->last_sent, rto); + while (after_eq(seq, tq->qbase + RXRPC_NR_TXQUEUE)) + tq = tq->next; - if (before_eq(txb->seq, call->acks_prev_seq)) + while (before_eq(seq, call->tx_transmitted)) { + qix = seq - tq->qbase; + if (qix >= RXRPC_NR_TXQUEUE) { + tq = tq->next; continue; - if (after(txb->seq, call->tx_transmitted)) - break; /* Not transmitted yet */ + } + txb = tq->bufs[qix]; + resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); + resend_at = ktime_add(resend_at, rto); if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && before(txb->serial, ntohl(ack->serial))) goto do_resend; /* Wasn't accounted for by a more recent ping. */ - if (ktime_after(resend_at, now)) { + if (ktime_after(resend_at, req.now)) { if (ktime_before(resend_at, next_resend)) next_resend = resend_at; + seq++; continue; } do_resend: unacked = true; - txb->flags |= RXRPC_TXBUF_RESENT; - rxrpc_transmit_data(call, txb, 1); - did_send = true; - rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); - now = ktime_get_real(); + req.tq = tq; + req.seq = seq; + req.n = 1; + rxrpc_retransmit_data(call, &req, rto); + seq++; } no_further_resend: @@ -175,7 +207,8 @@ no_resend: if (resend_at < KTIME_MAX) { delay = rxrpc_get_rto_backoff(call->peer, did_send); resend_at = ktime_add(resend_at, delay); - trace_rxrpc_timer_set(call, resend_at - now, rxrpc_timer_trace_resend_reset); + trace_rxrpc_timer_set(call, resend_at - req.now, + rxrpc_timer_trace_resend_reset); } call->resend_at = resend_at; @@ -186,11 +219,11 @@ no_resend: * that an ACK got lost somewhere. Send a ping to find out instead of * retransmitting data. */ - if (!did_send) { + if (!req.did_send) { ktime_t next_ping = ktime_add_us(call->acks_latest_ts, call->peer->srtt_us >> 3); - if (ktime_sub(next_ping, now) <= 0) + if (ktime_sub(next_ping, req.now) <= 0) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_0_retrans); } @@ -240,47 +273,68 @@ static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) } /* - * Decant some if the sendmsg prepared queue into the transmission buffer. + * Transmit some as-yet untransmitted data. */ -static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) +static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) { int space = rxrpc_tx_window_space(call); if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { - if (list_empty(&call->tx_sendmsg)) + if (call->send_top == call->tx_top) return; rxrpc_expose_client_call(call); } while (space > 0) { - struct rxrpc_txbuf *head = NULL, *txb; - int count = 0, limit = min(space, 1); - - if (list_empty(&call->tx_sendmsg)) + struct rxrpc_send_data_req req = { + .now = ktime_get_real(), + .seq = call->tx_transmitted + 1, + .n = 0, + }; + struct rxrpc_txqueue *tq; + struct rxrpc_txbuf *txb; + rxrpc_seq_t send_top, seq; + int limit = min(space, 1); + + /* Order send_top before the contents of the new txbufs and + * txqueue pointers + */ + send_top = smp_load_acquire(&call->send_top); + if (call->tx_top == send_top) break; - trace_rxrpc_transmit(call, space); + trace_rxrpc_transmit(call, send_top, space); + + tq = call->tx_qtail; + seq = call->tx_top; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant); - spin_lock(&call->tx_lock); do { - txb = list_first_entry(&call->tx_sendmsg, - struct rxrpc_txbuf, call_link); - if (!head) - head = txb; - list_move_tail(&txb->call_link, &call->tx_buffer); - count++; + int ix; + + seq++; + ix = seq & RXRPC_TXQ_MASK; + if (!ix) { + tq = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_decant_advance); + } + if (!req.tq) + req.tq = tq; + txb = tq->bufs[ix]; + req.n++; if (!txb->jumboable) break; - } while (count < limit && !list_empty(&call->tx_sendmsg)); + } while (req.n < limit && before(seq, send_top)); - spin_unlock(&call->tx_lock); - - call->tx_top = txb->seq; - if (txb->flags & RXRPC_LAST_PACKET) + if (txb->flags & RXRPC_LAST_PACKET) { rxrpc_close_tx_phase(call); + tq = NULL; + } + call->tx_qtail = tq; + call->tx_top = seq; - space -= count; - rxrpc_transmit_data(call, head, count); + space -= req.n; + rxrpc_send_data_packet(call, &req); } } @@ -288,7 +342,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) { switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: - if (list_empty(&call->tx_sendmsg)) + if (call->tx_bottom == READ_ONCE(call->send_top)) return; rxrpc_begin_service_reply(call); fallthrough; @@ -297,11 +351,11 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) case RXRPC_CALL_CLIENT_SEND_REQUEST: if (!rxrpc_tx_window_space(call)) return; - if (list_empty(&call->tx_sendmsg)) { + if (call->tx_bottom == READ_ONCE(call->send_top)) { rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); return; } - rxrpc_decant_prepared_tx(call); + rxrpc_transmit_fresh_data(call); break; default: return; @@ -503,8 +557,6 @@ out: call->peer->pmtud_pending) rxrpc_send_probe_for_pmtud(call); } - if (call->acks_hard_ack != call->tx_bottom) - rxrpc_shrink_call_tx_buffer(call); _leave(""); return true; diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c026f16f891e..a9682b31a4f9 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -146,8 +146,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); INIT_LIST_HEAD(&call->attend_link); - INIT_LIST_HEAD(&call->tx_sendmsg); - INIT_LIST_HEAD(&call->tx_buffer); skb_queue_head_init(&call->rx_queue); skb_queue_head_init(&call->recvmsg_queue); skb_queue_head_init(&call->rx_oos_queue); @@ -532,9 +530,26 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) } /* - * Clean up the Rx skb ring. + * Clean up the transmission buffers. */ -static void rxrpc_cleanup_ring(struct rxrpc_call *call) +static void rxrpc_cleanup_tx_buffers(struct rxrpc_call *call) +{ + struct rxrpc_txqueue *tq, *next; + + for (tq = call->tx_queue; tq; tq = next) { + next = tq->next; + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) + if (tq->bufs[i]) + rxrpc_put_txbuf(tq->bufs[i], rxrpc_txbuf_put_cleaned); + trace_rxrpc_tq(call, tq, 0, rxrpc_tq_cleaned); + kfree(tq); + } +} + +/* + * Clean up the receive buffers. + */ +static void rxrpc_cleanup_rx_buffers(struct rxrpc_call *call) { rxrpc_purge_queue(&call->recvmsg_queue); rxrpc_purge_queue(&call->rx_queue); @@ -673,23 +688,12 @@ static void rxrpc_rcu_free_call(struct rcu_head *rcu) static void rxrpc_destroy_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); - struct rxrpc_txbuf *txb; del_timer_sync(&call->timer); rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - rxrpc_cleanup_ring(call); - while ((txb = list_first_entry_or_null(&call->tx_sendmsg, - struct rxrpc_txbuf, call_link))) { - list_del(&txb->call_link); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); - } - while ((txb = list_first_entry_or_null(&call->tx_buffer, - struct rxrpc_txbuf, call_link))) { - list_del(&txb->call_link); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); - } - + rxrpc_cleanup_tx_buffers(call); + rxrpc_cleanup_rx_buffers(call); rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); rxrpc_put_connection(call->conn, rxrpc_conn_put_call); rxrpc_deactivate_bundle(call->bundle); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 96fe005c5e81..cfdd23042d4c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -214,24 +214,71 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { - struct rxrpc_txbuf *txb; + struct rxrpc_txqueue *tq = call->tx_queue; + rxrpc_seq_t seq = call->tx_bottom + 1; bool rot_last = false; - list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { - if (before_eq(txb->seq, call->acks_hard_ack)) - continue; - if (txb->flags & RXRPC_LAST_PACKET) { + _enter("%x,%x,%x", call->tx_bottom, call->acks_hard_ack, to); + + trace_rxrpc_tx_rotate(call, seq, to); + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); + + /* We may have a left over fully-consumed buffer at the front that we + * couldn't drop before (rotate_and_keep below). + */ + if (seq == call->tx_qbase + RXRPC_NR_TXQUEUE) { + call->tx_qbase += RXRPC_NR_TXQUEUE; + call->tx_queue = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + tq = call->tx_queue; + } + + do { + unsigned int ix = seq - call->tx_qbase; + + _debug("tq=%x seq=%x i=%d f=%x", tq->qbase, seq, ix, tq->bufs[ix]->flags); + if (tq->bufs[ix]->flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } - if (txb->seq == to) - break; - } + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); + tq->bufs[ix] = NULL; + + WRITE_ONCE(call->tx_bottom, seq); + WRITE_ONCE(call->acks_hard_ack, seq); + trace_rxrpc_txqueue(call, (rot_last ? + rxrpc_txqueue_rotate_last : + rxrpc_txqueue_rotate)); - if (rot_last) + seq++; + if (!(seq & RXRPC_TXQ_MASK)) { + prefetch(tq->next); + if (tq != call->tx_qtail) { + call->tx_qbase += RXRPC_NR_TXQUEUE; + call->tx_queue = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + tq = call->tx_queue; + } else { + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_keep); + tq = NULL; + break; + } + } + + } while (before_eq(seq, to)); + + if (rot_last) { set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); + if (tq) { + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + call->tx_queue = NULL; + } + } - _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); + _debug("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); if (call->acks_lowest_nak == call->acks_hard_ack) { call->acks_lowest_nak = to; @@ -240,11 +287,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, call->acks_lowest_nak = to; } - smp_store_release(&call->acks_hard_ack, to); - - trace_rxrpc_txqueue(call, (rot_last ? - rxrpc_txqueue_rotate_last : - rxrpc_txqueue_rotate)); wake_up(&call->waitq); return rot_last; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 400c3389d492..c2044d593237 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -375,10 +375,10 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) /* * Prepare a (sub)packet for transmission. */ -static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - rxrpc_serial_t serial, - int subpkt, int nr_subpkts, - ktime_t now) +static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, + struct rxrpc_send_data_req *req, + struct rxrpc_txbuf *txb, + rxrpc_serial_t serial, int subpkt) { struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; struct rxrpc_jumbo_header *jumbo = (void *)(whdr + 1) - sizeof(*jumbo); @@ -386,7 +386,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc struct rxrpc_connection *conn = call->conn; struct kvec *kv = &call->local->kvec[subpkt]; size_t len = txb->pkt_len; - bool last, more; + bool last; u8 flags; _enter("%x,%zd", txb->seq, len); @@ -401,14 +401,11 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; last = txb->flags & RXRPC_LAST_PACKET; - if (subpkt < nr_subpkts - 1) { + if (subpkt < req->n - 1) { len = RXRPC_JUMBO_DATALEN; goto dont_set_request_ack; } - more = (!list_is_last(&txb->call_link, &call->tx_buffer) || - !list_empty(&call->tx_sendmsg)); - /* If our RTT cache needs working on, request an ACK. Also request * ACKs if a DATA packet appears to have been lost. * @@ -430,7 +427,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc why = rxrpc_reqack_more_rtt; else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) why = rxrpc_reqack_old_rtt; - else if (!last && !more) + else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) why = rxrpc_reqack_app_stall; else goto dont_set_request_ack; @@ -439,13 +436,13 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, struct rxrpc trace_rxrpc_req_ack(call->debug_id, txb->seq, why); if (why != rxrpc_reqack_no_srv_last) { flags |= RXRPC_REQUEST_ACK; - rxrpc_begin_rtt_probe(call, serial, now, rxrpc_rtt_tx_data); - call->peer->rtt_last_req = now; + rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data); + call->peer->rtt_last_req = req->now; } dont_set_request_ack: /* The jumbo header overlays the wire header in the txbuf. */ - if (subpkt < nr_subpkts - 1) + if (subpkt < req->n - 1) flags |= RXRPC_JUMBO_PACKET; else flags &= ~RXRPC_JUMBO_PACKET; @@ -469,62 +466,100 @@ dont_set_request_ack: return len; } +/* + * Prepare a transmission queue object for initial transmission. Returns the + * number of microseconds since the transmission queue base timestamp. + */ +static unsigned int rxrpc_prepare_txqueue(struct rxrpc_txqueue *tq, + struct rxrpc_send_data_req *req) +{ + if (!tq) + return 0; + if (tq->xmit_ts_base == KTIME_MIN) { + tq->xmit_ts_base = req->now; + return 0; + } + return ktime_to_us(ktime_sub(req->now, tq->xmit_ts_base)); +} + /* * Prepare a (jumbo) packet for transmission. */ -static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *head, int n) +static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { - struct rxrpc_txbuf *txb = head; + struct rxrpc_txqueue *tq = req->tq; rxrpc_serial_t serial; - ktime_t now = ktime_get_real(); + unsigned int xmit_ts; + rxrpc_seq_t seq = req->seq; size_t len = 0; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); + /* Each transmission of a Tx packet needs a new serial number */ - serial = rxrpc_get_next_serials(call->conn, n); + serial = rxrpc_get_next_serials(call->conn, req->n); - for (int i = 0; i < n; i++) { - txb->last_sent = now; - len += rxrpc_prepare_data_subpacket(call, txb, serial, i, n, now); - serial++; - txb = list_next_entry(txb, call_link); - } + call->tx_last_sent = req->now; + xmit_ts = rxrpc_prepare_txqueue(tq, req); + prefetch(tq->next); - /* Set timeouts */ - if (call->peer->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + for (int i = 0;;) { + int ix = seq & RXRPC_TXQ_MASK; + struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; - call->ack_lost_at = ktime_add(now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); + _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); + tq->segment_xmit_ts[ix] = xmit_ts; + len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); + serial++; + seq++; + i++; + if (i >= req->n) + break; + if (!(seq & RXRPC_TXQ_MASK)) { + tq = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit_advance); + xmit_ts = rxrpc_prepare_txqueue(tq, req); + } } + /* Set timeouts */ if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); - call->expect_rx_by = ktime_add(now, delay); + call->expect_rx_by = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } + if (call->resend_at == KTIME_MAX) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + + call->resend_at = ktime_add(req->now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); + } - rxrpc_set_keepalive(call, now); + rxrpc_set_keepalive(call, req->now); return len; } /* - * send a packet through the transport endpoint + * Send one or more packets through the transport endpoint */ -static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) +void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req *req) { struct rxrpc_connection *conn = call->conn; enum rxrpc_tx_point frag; + struct rxrpc_txqueue *tq = req->tq; + struct rxrpc_txbuf *txb; struct msghdr msg; + rxrpc_seq_t seq = req->seq; size_t len; bool new_call = test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags); int ret; - _enter("%x,{%d}", txb->seq, txb->pkt_len); + _enter("%x,%x-%x", tq->qbase, seq, seq + req->n - 1); - len = rxrpc_prepare_data_packet(call, txb, n); + len = rxrpc_prepare_data_packet(call, req); + txb = tq->bufs[seq & RXRPC_TXQ_MASK]; - iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, n, len); + iov_iter_kvec(&msg.msg_iter, WRITE, call->local->kvec, req->n, len); msg.msg_name = &call->peer->srx.transport; msg.msg_namelen = call->peer->srx.transport_len; @@ -535,7 +570,7 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t /* Send the packet with the don't fragment bit set unless we think it's * too big or if this is a retransmission. */ - if (txb->seq == call->tx_transmitted + 1 && + if (seq == call->tx_transmitted + 1 && len >= sizeof(struct rxrpc_wire_header) + call->peer->max_data) { rxrpc_local_dont_fragment(conn->local, false); frag = rxrpc_tx_point_call_data_frag; @@ -548,8 +583,8 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t * retransmission algorithm doesn't try to resend what we haven't sent * yet. */ - if (txb->seq == call->tx_transmitted + 1) - call->tx_transmitted = txb->seq + n - 1; + if (seq == call->tx_transmitted + 1) + call->tx_transmitted = seq + req->n - 1; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; @@ -586,19 +621,21 @@ static int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t rxrpc_tx_backoff(call, ret); if (ret < 0) { - /* Cancel the call if the initial transmission fails, - * particularly if that's due to network routing issues that - * aren't going away anytime soon. The layer above can arrange - * the retransmission. + /* Cancel the call if the initial transmission fails or if we + * hit due to network routing issues that aren't going away + * anytime soon. The layer above can arrange the + * retransmission. */ - if (new_call) + if (new_call || + ret == -ENETUNREACH || + ret == -EHOSTUNREACH || + ret == -ECONNREFUSED) rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_USER_ABORT, ret); } done: _leave(" = %d [%u]", ret, call->peer->max_data); - return ret; } /* @@ -773,41 +810,3 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) peer->last_tx_at = ktime_get_seconds(); _leave(""); } - -/* - * Schedule an instant Tx resend. - */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call, - struct rxrpc_txbuf *txb) -{ - if (!__rxrpc_call_is_complete(call)) - kdebug("resend"); -} - -/* - * Transmit a packet, possibly gluing several subpackets together. - */ -void rxrpc_transmit_data(struct rxrpc_call *call, struct rxrpc_txbuf *txb, int n) -{ - int ret; - - ret = rxrpc_send_data_packet(call, txb, n); - if (ret < 0) { - switch (ret) { - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - 0, ret); - break; - default: - _debug("need instant resend %d", ret); - rxrpc_instant_resend(call, txb); - } - } else { - ktime_t delay = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - - call->resend_at = ktime_add(ktime_get_real(), delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend_tx); - } -} diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 467c9402882e..85b35b11755d 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -98,7 +98,7 @@ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) if (_tx_win) *_tx_win = tx_bottom; - return call->tx_prepared - tx_bottom < 256; + return call->send_top - tx_bottom < 256; } /* @@ -242,36 +242,74 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, struct rxrpc_txbuf *txb, rxrpc_notify_end_tx_t notify_end_tx) { + struct rxrpc_txqueue *sq = call->send_queue; rxrpc_seq_t seq = txb->seq; bool poke, last = txb->flags & RXRPC_LAST_PACKET; - + int ix = seq & RXRPC_TXQ_MASK; rxrpc_inc_stat(call->rxnet, stat_tx_data); - ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); - - /* We have to set the timestamp before queueing as the retransmit - * algorithm can see the packet as soon as we queue it. - */ - txb->last_sent = ktime_get_real(); + ASSERTCMP(txb->seq, ==, call->send_top + 1); if (last) trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); else trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); + if (WARN_ON_ONCE(sq->bufs[ix])) + trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue_dup); + else + trace_rxrpc_tq(call, sq, seq, rxrpc_tq_queue); + /* Add the packet to the call's output buffer */ spin_lock(&call->tx_lock); - poke = list_empty(&call->tx_sendmsg); - list_add_tail(&txb->call_link, &call->tx_sendmsg); - call->tx_prepared = seq; - if (last) + poke = (READ_ONCE(call->tx_bottom) == call->send_top); + sq->bufs[ix] = txb; + /* Order send_top after the queue->next pointer and txb content. */ + smp_store_release(&call->send_top, seq); + if (last) { rxrpc_notify_end_tx(rx, call, notify_end_tx); + call->send_queue = NULL; + } spin_unlock(&call->tx_lock); if (poke) rxrpc_poke_call(call, rxrpc_call_poke_start); } +/* + * Allocate a new txqueue unit and add it to the transmission queue. + */ +static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call) +{ + struct rxrpc_txqueue *tq; + + tq = kzalloc(sizeof(*tq), sk->sk_allocation); + if (!tq) + return -ENOMEM; + + tq->xmit_ts_base = KTIME_MIN; + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) + tq->segment_xmit_ts[i] = UINT_MAX; + + if (call->send_queue) { + tq->qbase = call->send_top + 1; + call->send_queue->next = tq; + call->send_queue = tq; + } else if (WARN_ON(call->tx_queue)) { + kfree(tq); + return -ENOMEM; + } else { + tq->qbase = 0; + call->tx_qbase = 0; + call->send_queue = tq; + call->tx_qtail = tq; + call->tx_queue = tq; + } + + trace_rxrpc_tq(call, tq, call->send_top, rxrpc_tq_alloc); + return 0; +} + /* * send data through a socket * - must be called in process context @@ -346,6 +384,13 @@ reload: if (!rxrpc_check_tx_space(call, NULL)) goto wait_for_space; + /* See if we need to begin/extend the Tx queue. */ + if (!call->send_queue || !((call->send_top + 1) & RXRPC_TXQ_MASK)) { + ret = rxrpc_alloc_txqueue(sk, call); + if (ret < 0) + goto maybe_error; + } + /* Work out the maximum size of a packet. Assume that * the security header is going to be in the padded * region (enc blocksize), but the trailer is not. diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 0cc8f49d69a9..067223c8c35f 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -43,17 +43,14 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ whdr = buf + hoff; - INIT_LIST_HEAD(&txb->call_link); - INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); - txb->last_sent = KTIME_MIN; txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->alloc_size = data_size; txb->space = data_size; txb->offset = sizeof(*whdr); txb->flags = call->conn->out_clientflag; - txb->seq = call->tx_prepared + 1; + txb->seq = call->send_top + 1; txb->nr_kvec = 1; txb->kvec[0].iov_base = whdr; txb->kvec[0].iov_len = sizeof(*whdr); @@ -114,8 +111,6 @@ struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_s filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; - INIT_LIST_HEAD(&txb->call_link); - INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); @@ -200,37 +195,3 @@ void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) rxrpc_free_txbuf(txb); } } - -/* - * Shrink the transmit buffer. - */ -void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) -{ - struct rxrpc_txbuf *txb; - rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack); - bool wake = false; - - _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); - - while ((txb = list_first_entry_or_null(&call->tx_buffer, - struct rxrpc_txbuf, call_link))) { - hard_ack = call->acks_hard_ack; - if (before(hard_ack, txb->seq)) - break; - - if (txb->seq != call->tx_bottom + 1) - rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); - ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); - WRITE_ONCE(call->tx_bottom, call->tx_bottom + 1); - list_del_rcu(&txb->call_link); - - trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); - - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); - if (after(call->acks_hard_ack, call->tx_bottom + 128)) - wake = true; - } - - if (wake) - wake_up(&call->waitq); -} -- cgit v1.2.3 From 692c4caa074c0d6092bd713babc6fc3872b5592a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:47 +0000 Subject: rxrpc: call->acks_hard_ack is now the same call->tx_bottom, so remove it Now that packets are removed from the Tx queue in the rotation function rather than being cleaned up later, call->acks_hard_ack now advances in step with call->tx_bottom, so remove it. Some of the places call->acks_hard_ack is used in the rxrpc tracepoints are replaced by call->acks_first_seq instead as that's the peer's reported idea of the hard-ACK point. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-20-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 20 ++++++++++---------- net/rxrpc/ar-internal.h | 1 - net/rxrpc/call_event.c | 4 ++-- net/rxrpc/input.c | 17 ++++++++--------- net/rxrpc/proc.c | 6 +++--- net/rxrpc/sendmsg.c | 6 +++--- 6 files changed, 26 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e6cf9ec940aa..0f253287de00 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -892,8 +892,8 @@ TRACE_EVENT(rxrpc_txqueue, TP_STRUCT__entry( __field(unsigned int, call) __field(enum rxrpc_txqueue_trace, why) - __field(rxrpc_seq_t, acks_hard_ack) __field(rxrpc_seq_t, tx_bottom) + __field(rxrpc_seq_t, acks_first_seq) __field(rxrpc_seq_t, tx_top) __field(rxrpc_seq_t, send_top) __field(int, tx_winsize) @@ -902,8 +902,8 @@ TRACE_EVENT(rxrpc_txqueue, TP_fast_assign( __entry->call = call->debug_id; __entry->why = why; - __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_bottom = call->tx_bottom; + __entry->acks_first_seq = call->acks_first_seq; __entry->tx_top = call->tx_top; __entry->send_top = call->send_top; __entry->tx_winsize = call->tx_winsize; @@ -913,9 +913,9 @@ TRACE_EVENT(rxrpc_txqueue, __entry->call, __print_symbolic(__entry->why, rxrpc_txqueue_traces), __entry->tx_bottom, - __entry->acks_hard_ack, - __entry->tx_top - __entry->tx_bottom, - __entry->tx_top - __entry->acks_hard_ack, + __entry->acks_first_seq, + __entry->acks_first_seq - __entry->tx_bottom, + __entry->tx_top - __entry->acks_first_seq, __entry->send_top - __entry->tx_top, __entry->tx_winsize) ); @@ -945,7 +945,7 @@ TRACE_EVENT(rxrpc_transmit, __entry->cong_cwnd = call->cong_cwnd; __entry->cong_extra = call->cong_extra; __entry->prepared = send_top - call->tx_bottom; - __entry->in_flight = call->tx_top - call->acks_hard_ack; + __entry->in_flight = call->tx_top - call->tx_bottom; __entry->pmtud_jumbo = call->peer->pmtud_jumbo; ), @@ -1707,7 +1707,7 @@ TRACE_EVENT(rxrpc_congest, TP_fast_assign( __entry->call = call->debug_id; __entry->change = change; - __entry->hard_ack = call->acks_hard_ack; + __entry->hard_ack = call->acks_first_seq; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; @@ -1754,7 +1754,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->mode = call->cong_mode; __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; - __entry->hard_ack = call->acks_hard_ack; + __entry->hard_ack = call->acks_first_seq; __entry->prepared = call->send_top - call->tx_bottom; __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); __entry->has_data = call->tx_bottom != call->tx_top; @@ -1855,7 +1855,7 @@ TRACE_EVENT(rxrpc_resend, TP_fast_assign( struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; - __entry->seq = call->acks_hard_ack; + __entry->seq = call->acks_first_seq; __entry->transmitted = call->tx_transmitted; __entry->ack_serial = sp ? sp->hdr.serial : 0; ), @@ -1944,7 +1944,7 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_id = call->call_id; __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; - __entry->tx_seq = call->acks_hard_ack; + __entry->tx_seq = call->acks_first_seq; __entry->rx_seq = call->rx_highest_seq; ), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bcce4862b0b7..6683043cee3f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -759,7 +759,6 @@ struct rxrpc_call { ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ rxrpc_seq_t acks_first_seq; /* first sequence number received */ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ - rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 90e3d9395675..2311e5c737e8 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -109,7 +109,7 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) bool unacked = false, did_send = false; unsigned int qix; - _enter("{%d,%d}", call->acks_hard_ack, call->tx_top); + _enter("{%d,%d}", call->tx_bottom, call->tx_top); if (call->tx_bottom == call->tx_top) goto no_resend; @@ -267,7 +267,7 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) { int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); - int in_flight = call->tx_top - call->acks_hard_ack; + int in_flight = call->tx_top - call->tx_bottom; return max(winsize - in_flight, 0); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index cfdd23042d4c..afb87a3322da 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -40,7 +40,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, bool resend = false; summary->flight_size = - (call->tx_top - call->acks_hard_ack) - summary->nr_acks; + (call->tx_top - call->tx_bottom) - summary->nr_acks; if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; @@ -175,7 +175,7 @@ send_extra_data: * state. */ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) || - summary->nr_acks != call->tx_top - call->acks_hard_ack) { + summary->nr_acks != call->tx_top - call->tx_bottom) { call->cong_extra++; wake_up(&call->waitq); } @@ -218,7 +218,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rxrpc_seq_t seq = call->tx_bottom + 1; bool rot_last = false; - _enter("%x,%x,%x", call->tx_bottom, call->acks_hard_ack, to); + _enter("%x,%x", call->tx_bottom, to); trace_rxrpc_tx_rotate(call, seq, to); trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); @@ -246,7 +246,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, tq->bufs[ix] = NULL; WRITE_ONCE(call->tx_bottom, seq); - WRITE_ONCE(call->acks_hard_ack, seq); trace_rxrpc_txqueue(call, (rot_last ? rxrpc_txqueue_rotate_last : rxrpc_txqueue_rotate)); @@ -278,9 +277,9 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, } } - _debug("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); + _debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last); - if (call->acks_lowest_nak == call->acks_hard_ack) { + if (call->acks_lowest_nak == call->tx_bottom) { call->acks_lowest_nak = to; } else if (after(to, call->acks_lowest_nak)) { summary->new_low_nack = true; @@ -968,7 +967,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) && first_soft_ack == 1 && prev_pkt == 0 && - call->acks_hard_ack == 0 && + call->tx_bottom == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); @@ -1033,13 +1032,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) goto send_response; } - if (before(hard_ack, call->acks_hard_ack) || + if (before(hard_ack, call->tx_bottom) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window); if (nr_acks > call->tx_top - hard_ack) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); - if (after(hard_ack, call->acks_hard_ack)) { + if (after(hard_ack, call->tx_bottom)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); goto send_response; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 249e1ed9c5c9..a8325b8e33c2 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -52,7 +52,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); enum rxrpc_call_state state; - rxrpc_seq_t acks_hard_ack; + rxrpc_seq_t tx_bottom; char lbuff[50], rbuff[50]; long timeout = 0; @@ -79,7 +79,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) if (state != RXRPC_CALL_SERVER_PREALLOC) timeout = ktime_ms_delta(READ_ONCE(call->expect_rx_by), ktime_get_real()); - acks_hard_ack = READ_ONCE(call->acks_hard_ack); + tx_bottom = READ_ONCE(call->tx_bottom); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n", @@ -93,7 +93,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) rxrpc_call_states[state], call->abort_code, call->debug_id, - acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, + tx_bottom, READ_ONCE(call->tx_top) - tx_bottom, call->ackr_window, call->ackr_wtop - call->ackr_window, call->rx_serial, call->cong_cwnd, diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 85b35b11755d..dfbf9f4b24b6 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -140,7 +140,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rtt = 2; timeout = rtt; - tx_start = READ_ONCE(call->acks_hard_ack); + tx_start = READ_ONCE(call->tx_bottom); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -197,8 +197,8 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, DECLARE_WAITQUEUE(myself, current); int ret; - _enter(",{%u,%u,%u,%u}", - call->tx_bottom, call->acks_hard_ack, call->tx_top, call->tx_winsize); + _enter(",{%u,%u,%u}", + call->tx_bottom, call->tx_top, call->tx_winsize); add_wait_queue(&call->waitq, &myself); -- cgit v1.2.3 From 203457e11b591f80ada571f981dd5f4d683b0009 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:48 +0000 Subject: rxrpc: Replace call->acks_first_seq with tracking of the hard ACK point Replace the call->acks_first_seq variable (which holds ack.firstPacket from the latest ACK packet and indicates the sequence number of the first ack slot in the SACK table) with call->acks_hard_ack which will hold the highest sequence hard ACK'd. This is 1 less than call->acks_first_seq, but it fits in the same schema as the other tracking variables which hold the sequence of a packet, not one past it. This will fix the rxrpc_congest tracepoint's calculation of SACK window size which shows one fewer than it should - and will occasionally go to -1. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-21-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 68 +++++++++++++++++++++----------------------- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/input.c | 56 +++++++++++++++++------------------- 3 files changed, 59 insertions(+), 67 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0f253287de00..91108e0de3af 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -893,7 +893,7 @@ TRACE_EVENT(rxrpc_txqueue, __field(unsigned int, call) __field(enum rxrpc_txqueue_trace, why) __field(rxrpc_seq_t, tx_bottom) - __field(rxrpc_seq_t, acks_first_seq) + __field(rxrpc_seq_t, acks_hard_ack) __field(rxrpc_seq_t, tx_top) __field(rxrpc_seq_t, send_top) __field(int, tx_winsize) @@ -903,19 +903,19 @@ TRACE_EVENT(rxrpc_txqueue, __entry->call = call->debug_id; __entry->why = why; __entry->tx_bottom = call->tx_bottom; - __entry->acks_first_seq = call->acks_first_seq; + __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_top = call->tx_top; __entry->send_top = call->send_top; __entry->tx_winsize = call->tx_winsize; ), - TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u/%u", + TP_printk("c=%08x %s b=%08x h=%08x n=%u/%u/%u/%u", __entry->call, __print_symbolic(__entry->why, rxrpc_txqueue_traces), __entry->tx_bottom, - __entry->acks_first_seq, - __entry->acks_first_seq - __entry->tx_bottom, - __entry->tx_top - __entry->acks_first_seq, + __entry->acks_hard_ack, + __entry->acks_hard_ack - __entry->tx_bottom, + __entry->tx_top - __entry->acks_hard_ack, __entry->send_top - __entry->tx_top, __entry->tx_winsize) ); @@ -1015,11 +1015,9 @@ TRACE_EVENT(rxrpc_rx_data, ); TRACE_EVENT(rxrpc_rx_ack, - TP_PROTO(struct rxrpc_call *call, - rxrpc_serial_t serial, rxrpc_serial_t ack_serial, - rxrpc_seq_t first, rxrpc_seq_t prev, u8 reason, u8 n_acks), + TP_PROTO(struct rxrpc_call *call, struct rxrpc_skb_priv *sp), - TP_ARGS(call, serial, ack_serial, first, prev, reason, n_acks), + TP_ARGS(call, sp), TP_STRUCT__entry( __field(unsigned int, call) @@ -1032,13 +1030,13 @@ TRACE_EVENT(rxrpc_rx_ack, ), TP_fast_assign( - __entry->call = call->debug_id; - __entry->serial = serial; - __entry->ack_serial = ack_serial; - __entry->first = first; - __entry->prev = prev; - __entry->reason = reason; - __entry->n_acks = n_acks; + __entry->call = call->debug_id; + __entry->serial = sp->hdr.serial; + __entry->ack_serial = sp->ack.acked_serial; + __entry->first = sp->ack.first_ack; + __entry->prev = sp->ack.prev_ack; + __entry->reason = sp->ack.reason; + __entry->n_acks = sp->ack.nr_acks; ), TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u", @@ -1707,7 +1705,7 @@ TRACE_EVENT(rxrpc_congest, TP_fast_assign( __entry->call = call->debug_id; __entry->change = change; - __entry->hard_ack = call->acks_first_seq; + __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; @@ -1754,7 +1752,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->mode = call->cong_mode; __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; - __entry->hard_ack = call->acks_first_seq; + __entry->hard_ack = call->acks_hard_ack; __entry->prepared = call->send_top - call->tx_bottom; __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); __entry->has_data = call->tx_bottom != call->tx_top; @@ -1855,7 +1853,7 @@ TRACE_EVENT(rxrpc_resend, TP_fast_assign( struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; - __entry->seq = call->acks_first_seq; + __entry->seq = call->acks_hard_ack; __entry->transmitted = call->tx_transmitted; __entry->ack_serial = sp ? sp->hdr.serial : 0; ), @@ -1944,7 +1942,7 @@ TRACE_EVENT(rxrpc_call_reset, __entry->call_id = call->call_id; __entry->call_serial = call->rx_serial; __entry->conn_serial = call->conn->hi_serial; - __entry->tx_seq = call->acks_first_seq; + __entry->tx_seq = call->acks_hard_ack; __entry->rx_seq = call->rx_highest_seq; ), @@ -1976,38 +1974,36 @@ TRACE_EVENT(rxrpc_notify_socket, ); TRACE_EVENT(rxrpc_rx_discard_ack, - TP_PROTO(unsigned int debug_id, rxrpc_serial_t serial, - rxrpc_seq_t first_soft_ack, rxrpc_seq_t call_ackr_first, - rxrpc_seq_t prev_pkt, rxrpc_seq_t call_ackr_prev), + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt), - TP_ARGS(debug_id, serial, first_soft_ack, call_ackr_first, - prev_pkt, call_ackr_prev), + TP_ARGS(call, serial, hard_ack, prev_pkt), TP_STRUCT__entry( __field(unsigned int, debug_id) __field(rxrpc_serial_t, serial) - __field(rxrpc_seq_t, first_soft_ack) - __field(rxrpc_seq_t, call_ackr_first) + __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, prev_pkt) - __field(rxrpc_seq_t, call_ackr_prev) + __field(rxrpc_seq_t, acks_hard_ack) + __field(rxrpc_seq_t, acks_prev_seq) ), TP_fast_assign( - __entry->debug_id = debug_id; + __entry->debug_id = call->debug_id; __entry->serial = serial; - __entry->first_soft_ack = first_soft_ack; - __entry->call_ackr_first = call_ackr_first; + __entry->hard_ack = hard_ack; __entry->prev_pkt = prev_pkt; - __entry->call_ackr_prev = call_ackr_prev; + __entry->acks_hard_ack = call->acks_hard_ack; + __entry->acks_prev_seq = call->acks_prev_seq; ), TP_printk("c=%08x r=%08x %08x<%08x %08x<%08x", __entry->debug_id, __entry->serial, - __entry->first_soft_ack, - __entry->call_ackr_first, + __entry->hard_ack, + __entry->acks_hard_ack, __entry->prev_pkt, - __entry->call_ackr_prev) + __entry->acks_prev_seq) ); TRACE_EVENT(rxrpc_req_ack, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 6683043cee3f..3e57cef7385f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -757,7 +757,7 @@ struct rxrpc_call { /* Transmission-phase ACK management (ACKs we've received). */ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ - rxrpc_seq_t acks_first_seq; /* first sequence number received */ + rxrpc_seq_t acks_hard_ack; /* Highest sequence hard acked */ rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index afb87a3322da..b89fd0dee324 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -782,12 +782,12 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb */ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - rxrpc_seq_t seq) + rxrpc_seq_t hard_ack) { struct sk_buff *skb = call->cong_last_nack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int i, new_acks = 0, retained_nacks = 0; - rxrpc_seq_t old_seq = sp->ack.first_ack; + rxrpc_seq_t seq = hard_ack + 1, old_seq = sp->ack.first_ack; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); if (after_eq(seq, old_seq + sp->ack.nr_acks)) { @@ -810,7 +810,7 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, summary->nr_retained_nacks = retained_nacks; } - return old_seq + sp->ack.nr_acks; + return old_seq + sp->ack.nr_acks - 1; } /* @@ -825,22 +825,23 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, static void rxrpc_input_soft_acks(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, struct sk_buff *skb, - rxrpc_seq_t seq, rxrpc_seq_t since) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned int i, old_nacks = 0; - rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; + rxrpc_seq_t lowest_nak = call->acks_hard_ack + sp->ack.nr_acks + 1; + rxrpc_seq_t seq = call->acks_hard_ack; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); for (i = 0; i < sp->ack.nr_acks; i++) { + seq++; if (acks[i] == RXRPC_ACK_TYPE_ACK) { summary->nr_acks++; - if (after_eq(seq, since)) + if (after(seq, since)) summary->nr_new_acks++; } else { summary->saw_nacks = true; - if (before(seq, since)) { + if (before_eq(seq, since)) { /* Overlap with previous ACK */ old_nacks++; } else { @@ -851,7 +852,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, if (before(seq, lowest_nak)) lowest_nak = seq; } - seq++; } if (lowest_nak != call->acks_lowest_nak) { @@ -874,21 +874,21 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * with respect to the ack state conveyed by preceding ACKs. */ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, - rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) + rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt) { - rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); + rxrpc_seq_t base = READ_ONCE(call->acks_hard_ack); - if (after(first_pkt, base)) + if (after(hard_ack, base)) return true; /* The window advanced */ - if (before(first_pkt, base)) + if (before(hard_ack, base)) return false; /* firstPacket regressed */ if (after_eq(prev_pkt, call->acks_prev_seq)) return true; /* previousPacket hasn't regressed. */ /* Some rx implementations put a serial number in previousPacket. */ - if (after_eq(prev_pkt, base + call->tx_winsize)) + if (after(prev_pkt, base + call->tx_winsize)) return false; return true; } @@ -906,8 +906,8 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_acktrailer trailer; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_serial_t ack_serial, acked_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; int nr_acks, offset, ioffset; @@ -925,9 +925,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? sp->ack.reason : RXRPC_ACK__INVALID); - trace_rxrpc_rx_ack(call, ack_serial, acked_serial, - first_soft_ack, prev_pkt, - summary.ack_reason, nr_acks); + trace_rxrpc_rx_ack(call, sp); rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); if (acked_serial != 0) { @@ -952,7 +950,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * lost the call because it switched to a different peer. */ if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) && - first_soft_ack == 1 && + hard_ack == 0 && prev_pkt == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, @@ -965,7 +963,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * if we still have it buffered to the beginning. */ if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) && - first_soft_ack == 1 && + hard_ack == 0 && prev_pkt == 0 && call->tx_bottom == 0 && rxrpc_is_client_call(call)) { @@ -975,10 +973,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { - trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->acks_first_seq, - prev_pkt, call->acks_prev_seq); + if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call, ack_serial, hard_ack, prev_pkt); goto send_response; } @@ -992,17 +988,17 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) skb_condense(skb); if (call->cong_last_nack) { - since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); + since = rxrpc_input_check_prev_ack(call, &summary, hard_ack); rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); call->cong_last_nack = NULL; } else { - summary.nr_new_acks = first_soft_ack - call->acks_first_seq; - call->acks_lowest_nak = first_soft_ack + nr_acks; - since = first_soft_ack; + summary.nr_new_acks = hard_ack - call->acks_hard_ack; + call->acks_lowest_nak = hard_ack + nr_acks; + since = hard_ack; } call->acks_latest_ts = skb->tstamp; - call->acks_first_seq = first_soft_ack; + call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; switch (summary.ack_reason) { @@ -1018,7 +1014,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (trailer.maxMTU) rxrpc_input_ack_trailer(call, skb, &trailer); - if (first_soft_ack == 0) + if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ @@ -1048,7 +1044,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); + rxrpc_input_soft_acks(call, &summary, skb, since); rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); call->cong_last_nack = skb; } -- cgit v1.2.3 From f7dd0dc9651326f609579fa81cbdda69b0467c2a Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:50 +0000 Subject: rxrpc: Adjust names and types of congestion-related fields Adjust some of the names of fields and constants to make them look a bit more like the TCP congestion symbol names, such as flight_size -> in_flight and congest_mode to ca_state. Move the persistent congestion-related fields from the rxrpc_ack_summary struct into the rxrpc_call struct rather than copying them out and back in again. The rxrpc_congest tracepoint can fetch them from the call struct. Rename the counters for soft acks and nacks to have an 's' on the front to reflect the softness, e.g. nr_acks -> nr_sacks. Make fields counting numbers of packets or numbers of acks u16 rather than u8 to allow for windows of up to 8192 DATA packets in flight in future. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-23-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 58 ++++++++++------- net/rxrpc/ar-internal.h | 51 +++++++-------- net/rxrpc/conn_client.c | 4 +- net/rxrpc/input.c | 151 ++++++++++++++++++++----------------------- net/rxrpc/output.c | 2 +- 5 files changed, 132 insertions(+), 134 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 91108e0de3af..d47b8235fad3 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -378,11 +378,11 @@ EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \ E_(rxrpc_propose_ack_terminal_ack, "ClTerm ") -#define rxrpc_congest_modes \ - EM(RXRPC_CALL_CONGEST_AVOIDANCE, "CongAvoid") \ - EM(RXRPC_CALL_FAST_RETRANSMIT, "FastReTx ") \ - EM(RXRPC_CALL_PACKET_LOSS, "PktLoss ") \ - E_(RXRPC_CALL_SLOW_START, "SlowStart") +#define rxrpc_ca_states \ + EM(RXRPC_CA_CONGEST_AVOIDANCE, "CongAvoid") \ + EM(RXRPC_CA_FAST_RETRANSMIT, "FastReTx ") \ + EM(RXRPC_CA_PACKET_LOSS, "PktLoss ") \ + E_(RXRPC_CA_SLOW_START, "SlowStart") #define rxrpc_congest_changes \ EM(rxrpc_cong_begin_retransmission, " Retrans") \ @@ -550,11 +550,11 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); rxrpc_abort_reasons; rxrpc_bundle_traces; +rxrpc_ca_states; rxrpc_call_poke_traces; rxrpc_call_traces; rxrpc_client_traces; rxrpc_congest_changes; -rxrpc_congest_modes; rxrpc_conn_traces; rxrpc_local_traces; rxrpc_pmtud_reduce_traces; @@ -1688,27 +1688,39 @@ TRACE_EVENT(rxrpc_retransmit, TRACE_EVENT(rxrpc_congest, TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - rxrpc_serial_t ack_serial, enum rxrpc_congest_change change), + rxrpc_serial_t ack_serial), - TP_ARGS(call, summary, ack_serial, change), + TP_ARGS(call, summary, ack_serial), TP_STRUCT__entry( __field(unsigned int, call) - __field(enum rxrpc_congest_change, change) + __field(enum rxrpc_ca_state, ca_state) __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, top) __field(rxrpc_seq_t, lowest_nak) __field(rxrpc_serial_t, ack_serial) + __field(u16, nr_sacks) + __field(u16, nr_snacks) + __field(u16, cwnd) + __field(u16, ssthresh) + __field(u16, cumul_acks) + __field(u16, dup_acks) __field_struct(struct rxrpc_ack_summary, sum) ), TP_fast_assign( __entry->call = call->debug_id; - __entry->change = change; + __entry->ca_state = call->cong_ca_state; __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; __entry->ack_serial = ack_serial; + __entry->nr_sacks = call->acks_nr_sacks; + __entry->nr_snacks = call->acks_nr_snacks; + __entry->cwnd = call->cong_cwnd; + __entry->ssthresh = call->cong_ssthresh; + __entry->cumul_acks = call->cong_cumul_acks; + __entry->dup_acks = call->cong_dup_acks; memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), @@ -1717,17 +1729,17 @@ TRACE_EVENT(rxrpc_congest, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), __entry->hard_ack, - __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), - __entry->sum.cwnd, - __entry->sum.ssthresh, - __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, - __entry->sum.nr_new_acks, - __entry->sum.nr_new_nacks, + __print_symbolic(__entry->ca_state, rxrpc_ca_states), + __entry->cwnd, + __entry->ssthresh, + __entry->nr_sacks, __entry->sum.nr_retained_snacks, + __entry->sum.nr_new_sacks, + __entry->sum.nr_new_snacks, __entry->top - __entry->hard_ack, - __entry->sum.cumulative_acks, - __entry->sum.dup_acks, - __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "", - __print_symbolic(__entry->change, rxrpc_congest_changes), + __entry->cumul_acks, + __entry->dup_acks, + __entry->lowest_nak, __entry->sum.new_low_snack ? "!" : "", + __print_symbolic(__entry->sum.change, rxrpc_congest_changes), __entry->sum.retrans_timeo ? " rTxTo" : "") ); @@ -1738,7 +1750,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, TP_STRUCT__entry( __field(unsigned int, call) - __field(enum rxrpc_congest_mode, mode) + __field(enum rxrpc_ca_state, ca_state) __field(unsigned short, cwnd) __field(unsigned short, extra) __field(rxrpc_seq_t, hard_ack) @@ -1749,7 +1761,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, TP_fast_assign( __entry->call = call->debug_id; - __entry->mode = call->cong_mode; + __entry->ca_state = call->cong_ca_state; __entry->cwnd = call->cong_cwnd; __entry->extra = call->cong_extra; __entry->hard_ack = call->acks_hard_ack; @@ -1761,7 +1773,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", __entry->call, __entry->hard_ack, - __print_symbolic(__entry->mode, rxrpc_congest_modes), + __print_symbolic(__entry->ca_state, rxrpc_ca_states), __entry->cwnd, __entry->extra, __entry->prepared, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 840293f913a3..f6e6b2ab6c2a 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -623,13 +623,13 @@ enum rxrpc_call_state { /* * Call Tx congestion management modes. */ -enum rxrpc_congest_mode { - RXRPC_CALL_SLOW_START, - RXRPC_CALL_CONGEST_AVOIDANCE, - RXRPC_CALL_PACKET_LOSS, - RXRPC_CALL_FAST_RETRANSMIT, - NR__RXRPC_CONGEST_MODES -}; +enum rxrpc_ca_state { + RXRPC_CA_SLOW_START, + RXRPC_CA_CONGEST_AVOIDANCE, + RXRPC_CA_PACKET_LOSS, + RXRPC_CA_FAST_RETRANSMIT, + NR__RXRPC_CA_STATES +} __mode(byte); /* * RxRPC call definition @@ -727,12 +727,12 @@ struct rxrpc_call { */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN #define RXRPC_MIN_CWND 4 - u8 cong_cwnd; /* Congestion window size */ + enum rxrpc_ca_state cong_ca_state; /* Congestion control state */ u8 cong_extra; /* Extra to send for congestion management */ - u8 cong_ssthresh; /* Slow-start threshold */ - enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ - u8 cong_dup_acks; /* Count of ACKs showing missing packets */ - u8 cong_cumul_acks; /* Cumulative ACK count */ + u16 cong_cwnd; /* Congestion window size */ + u16 cong_ssthresh; /* Slow-start threshold */ + u16 cong_dup_acks; /* Count of ACKs showing missing packets */ + u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ @@ -763,27 +763,24 @@ struct rxrpc_call { rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ + unsigned short acks_nr_sacks; /* Number of soft acks recorded */ + unsigned short acks_nr_snacks; /* Number of soft nacks recorded */ }; /* * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { - u16 nr_acks; /* Number of ACKs in packet */ - u16 nr_new_acks; /* Number of new ACKs in packet */ - u16 nr_new_nacks; /* Number of new nacks in packet */ - u16 nr_retained_nacks; /* Number of nacks retained between ACKs */ - u8 ack_reason; - bool saw_nacks; /* Saw NACKs in packet */ - bool new_low_nack; /* T if new low NACK found */ - bool retrans_timeo; /* T if reTx due to timeout happened */ - u8 flight_size; /* Number of unreceived transmissions */ - /* Place to stash values for tracing */ - enum rxrpc_congest_mode mode:8; - u8 cwnd; - u8 ssthresh; - u8 dup_acks; - u8 cumulative_acks; + u16 in_flight; /* Number of unreceived transmissions */ + u16 nr_new_hacks; /* Number of rotated new ACKs */ + u16 nr_new_sacks; /* Number of new soft ACKs in packet */ + u16 nr_new_snacks; /* Number of new soft nacks in packet */ + u16 nr_retained_snacks; /* Number of nacks retained between ACKs */ + u8 ack_reason; + bool saw_snacks:1; /* T if we saw a soft NACK */ + bool new_low_snack:1; /* T if new low soft NACK found */ + bool retrans_timeo:1; /* T if reTx due to timeout happened */ + u8 /*enum rxrpc_congest_change*/ change; }; /* diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 706631e6ac2f..5f76bd90567c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -437,9 +437,9 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, call->dest_srx.srx_service = conn->service_id; call->cong_ssthresh = call->peer->cong_ssthresh; if (call->cong_cwnd >= call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; else - call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_ca_state = RXRPC_CA_SLOW_START; chan->call_id = call_id; chan->call_debug_id = call->debug_id; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 8d7ab4b9d7d0..c25d816aafee 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -34,49 +34,41 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, rxrpc_serial_t acked_serial) { - enum rxrpc_congest_change change = rxrpc_cong_no_change; - unsigned int cumulative_acks = call->cong_cumul_acks; - unsigned int cwnd = call->cong_cwnd; bool resend = false; - summary->flight_size = - (call->tx_top - call->tx_bottom) - summary->nr_acks; + summary->change = rxrpc_cong_no_change; + summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; - call->cong_ssthresh = umax(summary->flight_size / 2, 2); - cwnd = 1; - if (cwnd >= call->cong_ssthresh && - call->cong_mode == RXRPC_CALL_SLOW_START) { - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ssthresh = umax(summary->in_flight / 2, 2); + call->cong_cwnd = 1; + if (call->cong_cwnd >= call->cong_ssthresh && + call->cong_ca_state == RXRPC_CA_SLOW_START) { + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; - cumulative_acks = 0; + call->cong_cumul_acks = 0; } } - cumulative_acks += summary->nr_new_acks; - if (cumulative_acks > 255) - cumulative_acks = 255; + call->cong_cumul_acks += summary->nr_new_sacks; + if (call->cong_cumul_acks > 255) + call->cong_cumul_acks = 255; - summary->cwnd = call->cong_cwnd; - summary->ssthresh = call->cong_ssthresh; - summary->cumulative_acks = cumulative_acks; - summary->dup_acks = call->cong_dup_acks; - - switch (call->cong_mode) { - case RXRPC_CALL_SLOW_START: - if (summary->saw_nacks) + switch (call->cong_ca_state) { + case RXRPC_CA_SLOW_START: + if (summary->saw_snacks) goto packet_loss_detected; - if (summary->cumulative_acks > 0) - cwnd += 1; - if (cwnd >= call->cong_ssthresh) { - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + if (call->cong_cumul_acks > 0) + call->cong_cwnd += 1; + if (call->cong_cwnd >= call->cong_ssthresh) { + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; call->cong_tstamp = skb->tstamp; } goto out; - case RXRPC_CALL_CONGEST_AVOIDANCE: - if (summary->saw_nacks) + case RXRPC_CA_CONGEST_AVOIDANCE: + if (summary->saw_snacks) goto packet_loss_detected; /* We analyse the number of packets that get ACK'd per RTT @@ -88,18 +80,18 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, ktime_add_us(call->cong_tstamp, call->peer->srtt_us >> 3))) goto out_no_clear_ca; - change = rxrpc_cong_rtt_window_end; + summary->change = rxrpc_cong_rtt_window_end; call->cong_tstamp = skb->tstamp; - if (cumulative_acks >= cwnd) - cwnd++; + if (call->cong_cumul_acks >= call->cong_cwnd) + call->cong_cwnd++; goto out; - case RXRPC_CALL_PACKET_LOSS: - if (!summary->saw_nacks) + case RXRPC_CA_PACKET_LOSS: + if (!summary->saw_snacks) goto resume_normality; - if (summary->new_low_nack) { - change = rxrpc_cong_new_low_nack; + if (summary->new_low_snack) { + summary->change = rxrpc_cong_new_low_nack; call->cong_dup_acks = 1; if (call->cong_extra > 1) call->cong_extra = 1; @@ -110,29 +102,29 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_dup_acks < 3) goto send_extra_data; - change = rxrpc_cong_begin_retransmission; - call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; - call->cong_ssthresh = umax(summary->flight_size / 2, 2); - cwnd = call->cong_ssthresh + 3; + summary->change = rxrpc_cong_begin_retransmission; + call->cong_ca_state = RXRPC_CA_FAST_RETRANSMIT; + call->cong_ssthresh = umax(summary->in_flight / 2, 2); + call->cong_cwnd = call->cong_ssthresh + 3; call->cong_extra = 0; call->cong_dup_acks = 0; resend = true; goto out; - case RXRPC_CALL_FAST_RETRANSMIT: - if (!summary->new_low_nack) { - if (summary->nr_new_acks == 0) - cwnd += 1; + case RXRPC_CA_FAST_RETRANSMIT: + if (!summary->new_low_snack) { + if (summary->nr_new_sacks == 0) + call->cong_cwnd += 1; call->cong_dup_acks++; if (call->cong_dup_acks == 2) { - change = rxrpc_cong_retransmit_again; + summary->change = rxrpc_cong_retransmit_again; call->cong_dup_acks = 0; resend = true; } } else { - change = rxrpc_cong_progress; - cwnd = call->cong_ssthresh; - if (!summary->saw_nacks) + summary->change = rxrpc_cong_progress; + call->cong_cwnd = call->cong_ssthresh; + if (!summary->saw_snacks) goto resume_normality; } goto out; @@ -143,30 +135,27 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } resume_normality: - change = rxrpc_cong_cleared_nacks; + summary->change = rxrpc_cong_cleared_nacks; call->cong_dup_acks = 0; call->cong_extra = 0; call->cong_tstamp = skb->tstamp; - if (cwnd < call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_SLOW_START; + if (call->cong_cwnd < call->cong_ssthresh) + call->cong_ca_state = RXRPC_CA_SLOW_START; else - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; out: - cumulative_acks = 0; + call->cong_cumul_acks = 0; out_no_clear_ca: - if (cwnd >= RXRPC_TX_MAX_WINDOW) - cwnd = RXRPC_TX_MAX_WINDOW; - call->cong_cwnd = cwnd; - call->cong_cumul_acks = cumulative_acks; - summary->mode = call->cong_mode; - trace_rxrpc_congest(call, summary, acked_serial, change); + if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) + call->cong_cwnd = RXRPC_TX_MAX_WINDOW; + trace_rxrpc_congest(call, summary, acked_serial); if (resend) rxrpc_resend(call, skb); return; packet_loss_detected: - change = rxrpc_cong_saw_nack; - call->cong_mode = RXRPC_CALL_PACKET_LOSS; + summary->change = rxrpc_cong_saw_nack; + call->cong_ca_state = RXRPC_CA_PACKET_LOSS; call->cong_dup_acks = 0; goto send_extra_data; @@ -175,7 +164,7 @@ send_extra_data: * state. */ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) || - summary->nr_acks != call->tx_top - call->tx_bottom) { + call->acks_nr_sacks != call->tx_top - call->tx_bottom) { call->cong_extra++; wake_up(&call->waitq); } @@ -189,8 +178,8 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) { ktime_t rtt, now; - if (call->cong_mode != RXRPC_CALL_SLOW_START && - call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) + if (call->cong_ca_state != RXRPC_CA_SLOW_START && + call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE) return; if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; @@ -203,7 +192,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) trace_rxrpc_reset_cwnd(call, now); rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); call->tx_last_sent = now; - call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_ca_state = RXRPC_CA_SLOW_START; call->cong_ssthresh = umax(call->cong_ssthresh, call->cong_cwnd * 3 / 4); call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND); } @@ -282,7 +271,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, if (call->acks_lowest_nak == call->tx_bottom) { call->acks_lowest_nak = to; } else if (after(to, call->acks_lowest_nak)) { - summary->new_low_nack = true; + summary->new_low_snack = true; call->acks_lowest_nak = to; } @@ -795,11 +784,11 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); if (after_eq(seq, old_seq + sp->ack.nr_acks)) { - summary->nr_new_acks += sp->ack.nr_nacks; - summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks); - summary->nr_retained_nacks = 0; + summary->nr_new_sacks += sp->ack.nr_nacks; + summary->nr_new_sacks += seq - (old_seq + sp->ack.nr_acks); + summary->nr_retained_snacks = 0; } else if (seq == old_seq) { - summary->nr_retained_nacks = sp->ack.nr_nacks; + summary->nr_retained_snacks = sp->ack.nr_nacks; } else { for (i = 0; i < sp->ack.nr_acks; i++) { if (acks[i] == RXRPC_ACK_TYPE_NACK) { @@ -810,8 +799,8 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, } } - summary->nr_new_acks += new_acks; - summary->nr_retained_nacks = retained_nacks; + summary->nr_new_sacks += new_acks; + summary->nr_retained_snacks = retained_nacks; } return old_seq + sp->ack.nr_acks - 1; @@ -840,16 +829,16 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, for (i = 0; i < sp->ack.nr_acks; i++) { seq++; if (acks[i] == RXRPC_ACK_TYPE_ACK) { - summary->nr_acks++; + call->acks_nr_sacks++; if (after(seq, since)) - summary->nr_new_acks++; + summary->nr_new_sacks++; } else { - summary->saw_nacks = true; + summary->saw_snacks = true; if (before_eq(seq, since)) { /* Overlap with previous ACK */ old_nacks++; } else { - summary->nr_new_nacks++; + summary->nr_new_snacks++; sp->ack.nr_nacks++; } @@ -860,7 +849,7 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, if (lowest_nak != call->acks_lowest_nak) { call->acks_lowest_nak = lowest_nak; - summary->new_low_nack = true; + summary->new_low_snack = true; } /* We *can* have more nacks than we did - the peer is permitted to drop @@ -868,9 +857,9 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * possible for the nack distribution to change whilst the number of * nacks stays the same or goes down. */ - if (old_nacks < summary->nr_retained_nacks) - summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; - summary->nr_retained_nacks = old_nacks; + if (old_nacks < summary->nr_retained_snacks) + summary->nr_new_sacks += summary->nr_retained_snacks - old_nacks; + summary->nr_retained_snacks = old_nacks; } /* @@ -996,7 +985,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); call->cong_last_nack = NULL; } else { - summary.nr_new_acks = hard_ack - call->acks_hard_ack; + summary.nr_new_sacks = hard_ack - call->acks_hard_ack; call->acks_lowest_nak = hard_ack + nr_acks; since = hard_ack; } @@ -1054,7 +1043,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && - summary.nr_acks == call->tx_top - hard_ack && + call->acks_nr_sacks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) rxrpc_propose_ping(call, ack_serial, rxrpc_propose_ack_ping_for_lost_reply); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 3886777d1bb6..7ed928b6f0e1 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -419,7 +419,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_ack_lost; else if (txb->flags & RXRPC_TXBUF_RESENT) why = rxrpc_reqack_retrans; - else if (call->cong_mode == RXRPC_CALL_SLOW_START && call->cong_cwnd <= 2) + else if (call->cong_ca_state == RXRPC_CA_SLOW_START && call->cong_cwnd <= 2) why = rxrpc_reqack_slow_start; else if (call->tx_winsize <= 2) why = rxrpc_reqack_small_txwin; -- cgit v1.2.3 From 9b052c6b92f9316d670bf50566f70e183d0d19cb Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:51 +0000 Subject: rxrpc: Use the new rxrpc_tx_queue struct to more efficiently process ACKs With the change in the structure of the transmission buffer to store buffers in bunches of 32 or 64 (BITS_PER_LONG) we can place sets of per-buffer flags into the rxrpc_tx_queue struct rather than storing them in rxrpc_tx_buf, thereby vastly increasing efficiency when assessing the SACK table in an ACK packet. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-24-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 86 +++++++++++++-- net/rxrpc/ar-internal.h | 23 ++-- net/rxrpc/call_event.c | 181 +++++++++++++++---------------- net/rxrpc/call_object.c | 1 - net/rxrpc/input.c | 252 +++++++++++++++++++++++++++---------------- net/rxrpc/output.c | 10 +- net/rxrpc/sendmsg.c | 3 + 7 files changed, 352 insertions(+), 204 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d47b8235fad3..609522a5bd0f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -132,7 +132,6 @@ EM(rxrpc_skb_get_call_rx, "GET call-rx ") \ EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ EM(rxrpc_skb_get_conn_work, "GET conn-work") \ - EM(rxrpc_skb_get_last_nack, "GET last-nack") \ EM(rxrpc_skb_get_local_work, "GET locl-work") \ EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ @@ -147,7 +146,6 @@ EM(rxrpc_skb_put_error_report, "PUT error-rep") \ EM(rxrpc_skb_put_input, "PUT input ") \ EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ - EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ EM(rxrpc_skb_put_purge, "PUT purge ") \ EM(rxrpc_skb_put_rotate, "PUT rotate ") \ EM(rxrpc_skb_put_unknown, "PUT unknown ") \ @@ -499,6 +497,11 @@ EM(rxrpc_pmtud_reduce_icmp, "Icmp ") \ E_(rxrpc_pmtud_reduce_route, "Route") +#define rxrpc_rotate_traces \ + EM(rxrpc_rotate_trace_hack, "hard-ack") \ + EM(rxrpc_rotate_trace_sack, "soft-ack") \ + E_(rxrpc_rotate_trace_snak, "soft-nack") + /* * Generate enums for tracing information. */ @@ -525,6 +528,7 @@ enum rxrpc_propose_ack_trace { rxrpc_propose_ack_traces } __mode(byte); enum rxrpc_receive_trace { rxrpc_receive_traces } __mode(byte); enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte); enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte); +enum rxrpc_rotate_trace { rxrpc_rotate_traces } __mode(byte); enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte); enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); @@ -562,6 +566,7 @@ rxrpc_propose_ack_traces; rxrpc_receive_traces; rxrpc_recvmsg_traces; rxrpc_req_ack_traces; +rxrpc_rotate_traces; rxrpc_rtt_rx_traces; rxrpc_rtt_tx_traces; rxrpc_sack_traces; @@ -1667,6 +1672,7 @@ TRACE_EVENT(rxrpc_retransmit, TP_STRUCT__entry( __field(unsigned int, call) + __field(unsigned int, qbase) __field(rxrpc_seq_t, seq) __field(rxrpc_serial_t, serial) __field(ktime_t, expiry) @@ -1674,13 +1680,15 @@ TRACE_EVENT(rxrpc_retransmit, TP_fast_assign( __entry->call = call->debug_id; + __entry->qbase = req->tq->qbase; __entry->seq = req->seq; __entry->serial = txb->serial; __entry->expiry = expiry; ), - TP_printk("c=%08x q=%x r=%x xp=%lld", + TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld", __entry->call, + __entry->qbase, __entry->seq, __entry->serial, ktime_to_us(__entry->expiry)) @@ -1724,7 +1732,7 @@ TRACE_EVENT(rxrpc_congest, memcpy(&__entry->sum, summary, sizeof(__entry->sum)); ), - TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", + TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, __entry->ack_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), @@ -1732,9 +1740,9 @@ TRACE_EVENT(rxrpc_congest, __print_symbolic(__entry->ca_state, rxrpc_ca_states), __entry->cwnd, __entry->ssthresh, - __entry->nr_sacks, __entry->sum.nr_retained_snacks, - __entry->sum.nr_new_sacks, - __entry->sum.nr_new_snacks, + __entry->nr_sacks, __entry->sum.nr_new_sacks, + __entry->nr_snacks, __entry->sum.nr_new_snacks, + __entry->sum.nr_new_hacks, __entry->top - __entry->hard_ack, __entry->cumul_acks, __entry->dup_acks, @@ -1850,10 +1858,36 @@ TRACE_EVENT(rxrpc_connect_call, &__entry->srx.transport) ); +TRACE_EVENT(rxrpc_apply_acks, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq), + + TP_ARGS(call, tq), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(unsigned int, nr_rep) + __field(rxrpc_seq_t, qbase) + __field(unsigned long, acks) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->acks = tq->segment_acked; + __entry->nr_rep = tq->nr_reported_acks; + ), + + TP_printk("c=%08x tq=%x acks=%016lx rep=%u", + __entry->call, + __entry->qbase, + __entry->acks, + __entry->nr_rep) + ); + TRACE_EVENT(rxrpc_resend, - TP_PROTO(struct rxrpc_call *call, struct sk_buff *ack), + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t ack_serial), - TP_ARGS(call, ack), + TP_ARGS(call, ack_serial), TP_STRUCT__entry( __field(unsigned int, call) @@ -1863,11 +1897,10 @@ TRACE_EVENT(rxrpc_resend, ), TP_fast_assign( - struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; __entry->seq = call->acks_hard_ack; __entry->transmitted = call->tx_transmitted; - __entry->ack_serial = sp ? sp->hdr.serial : 0; + __entry->ack_serial = ack_serial; ), TP_printk("c=%08x r=%x q=%x tq=%x", @@ -1877,6 +1910,37 @@ TRACE_EVENT(rxrpc_resend, __entry->transmitted) ); +TRACE_EVENT(rxrpc_rotate, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, + struct rxrpc_ack_summary *summary, rxrpc_seq_t seq, + enum rxrpc_rotate_trace trace), + + TP_ARGS(call, tq, summary, seq, trace), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(rxrpc_seq_t, seq) + __field(unsigned int, nr_rep) + __field(enum rxrpc_rotate_trace, trace) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->seq = seq; + __entry->nr_rep = tq->nr_reported_acks; + __entry->trace = trace; + ), + + TP_printk("c=%08x tq=%x q=%x nr=%x %s", + __entry->call, + __entry->qbase, + __entry->seq, + __entry->nr_rep, + __print_symbolic(__entry->trace, rxrpc_rotate_traces)) + ); + TRACE_EVENT(rxrpc_rx_icmp, TP_PROTO(struct rxrpc_peer *peer, struct sock_extended_err *ee, struct sockaddr_rxrpc *srx), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f6e6b2ab6c2a..9a70f0b86570 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -214,9 +214,8 @@ struct rxrpc_skb_priv { rxrpc_seq_t first_ack; /* First packet in acks table */ rxrpc_seq_t prev_ack; /* Highest seq seen */ rxrpc_serial_t acked_serial; /* Packet in response to (or 0) */ + u16 nr_acks; /* Number of acks+nacks */ u8 reason; /* Reason for ack */ - u8 nr_acks; /* Number of acks+nacks */ - u8 nr_nacks; /* Number of nacks */ } ack; }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -734,7 +733,6 @@ struct rxrpc_call { u16 cong_dup_acks; /* Count of ACKs showing missing packets */ u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ - struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ @@ -775,11 +773,10 @@ struct rxrpc_ack_summary { u16 nr_new_hacks; /* Number of rotated new ACKs */ u16 nr_new_sacks; /* Number of new soft ACKs in packet */ u16 nr_new_snacks; /* Number of new soft nacks in packet */ - u16 nr_retained_snacks; /* Number of nacks retained between ACKs */ u8 ack_reason; - bool saw_snacks:1; /* T if we saw a soft NACK */ bool new_low_snack:1; /* T if new low soft NACK found */ bool retrans_timeo:1; /* T if reTx due to timeout happened */ + bool need_retransmit:1; /* T if we need transmission */ u8 /*enum rxrpc_congest_change*/ change; }; @@ -858,6 +855,10 @@ struct rxrpc_txqueue { struct rxrpc_txqueue *next; ktime_t xmit_ts_base; rxrpc_seq_t qbase; + u8 nr_reported_acks; /* Number of segments explicitly acked/nacked */ + unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */ + unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ + unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ /* The arrays we want to pack into as few cache lines as possible. */ struct { @@ -935,7 +936,7 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); -void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); +void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response); bool rxrpc_input_call_event(struct rxrpc_call *call); @@ -1383,6 +1384,16 @@ static inline bool after_eq(u32 seq1, u32 seq2) return (s32)(seq1 - seq2) >= 0; } +static inline u32 earliest(u32 seq1, u32 seq2) +{ + return before(seq1, seq2) ? seq1 : seq2; +} + +static inline u32 latest(u32 seq1, u32 seq2) +{ + return after(seq1, seq2) ? seq1 : seq2; +} + static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 2311e5c737e8..e25921d39d4d 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -65,9 +65,9 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Retransmit one or more packets. */ -static void rxrpc_retransmit_data(struct rxrpc_call *call, +static bool rxrpc_retransmit_data(struct rxrpc_call *call, struct rxrpc_send_data_req *req, - ktime_t rto) + ktime_t rto, bool skip_too_young) { struct rxrpc_txqueue *tq = req->tq; unsigned int ix = req->seq & RXRPC_TXQ_MASK; @@ -78,9 +78,11 @@ static void rxrpc_retransmit_data(struct rxrpc_call *call, xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); resend_at = ktime_add(xmit_ts, rto); - trace_rxrpc_retransmit(call, req, txb, - ktime_sub(resend_at, req->now)); + trace_rxrpc_retransmit(call, req, txb, ktime_sub(resend_at, req->now)); + if (skip_too_young && ktime_after(resend_at, req->now)) + return false; + __set_bit(ix, &tq->segment_retransmitted); txb->flags |= RXRPC_TXBUF_RESENT; rxrpc_send_data_packet(call, req); rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); @@ -89,128 +91,119 @@ static void rxrpc_retransmit_data(struct rxrpc_call *call, req->n = 0; req->did_send = true; req->now = ktime_get_real(); + return true; } /* * Perform retransmission of NAK'd and unack'd packets. */ -void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) +void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response) { struct rxrpc_send_data_req req = { .now = ktime_get_real(), }; - struct rxrpc_ackpacket *ack = NULL; - struct rxrpc_skb_priv *sp; - struct rxrpc_txqueue *tq; - struct rxrpc_txbuf *txb; - rxrpc_seq_t transmitted = call->tx_transmitted, seq; - ktime_t next_resend = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); - ktime_t resend_at = KTIME_MAX, delay; - bool unacked = false, did_send = false; - unsigned int qix; + struct rxrpc_txqueue *tq = call->tx_queue; + ktime_t lowest_xmit_ts = KTIME_MAX, rto = ns_to_ktime(call->peer->rto_us * NSEC_PER_USEC); + bool unacked = false; _enter("{%d,%d}", call->tx_bottom, call->tx_top); - if (call->tx_bottom == call->tx_top) - goto no_resend; + if (call->tx_bottom == call->tx_top) { + call->resend_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); + return; + } - trace_rxrpc_resend(call, ack_skb); - tq = call->tx_queue; - seq = call->tx_bottom; + trace_rxrpc_resend(call, ack_serial); - /* Scan the soft ACK table and resend any explicitly NAK'd packets. */ - if (ack_skb) { - sp = rxrpc_skb(ack_skb); - ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); + /* Scan the transmission queue, looking for explicitly NAK'd packets. */ + do { + unsigned long naks = ~tq->segment_acked; + rxrpc_seq_t tq_top = tq->qbase + RXRPC_NR_TXQUEUE - 1; - for (int i = 0; i < sp->ack.nr_acks; i++) { - rxrpc_seq_t aseq; + if (after(tq->qbase, call->tx_transmitted)) + break; - if (ack->acks[i] & 1) - continue; - aseq = sp->ack.first_ack + i; - while (after_eq(aseq, tq->qbase + RXRPC_NR_TXQUEUE)) - tq = tq->next; - seq = aseq; - qix = seq - tq->qbase; - txb = tq->bufs[qix]; - if (after(seq, transmitted)) - goto no_further_resend; - - resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); - resend_at = ktime_add(resend_at, rto); - if (after(txb->serial, call->acks_highest_serial)) { - if (ktime_after(resend_at, req.now) && - ktime_before(resend_at, next_resend)) - next_resend = resend_at; + if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE) + naks &= (1UL << tq->nr_reported_acks) - 1; + + _debug("retr %16lx %u c=%08x [%x]", + tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase); + _debug("nack %16lx", naks); + + while (naks) { + unsigned int ix = __ffs(naks); + struct rxrpc_txbuf *txb = tq->bufs[ix]; + + __clear_bit(ix, &naks); + if (after(txb->serial, call->acks_highest_serial)) continue; /* Ack point not yet reached */ - } rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); req.tq = tq; - req.seq = seq; + req.seq = tq->qbase + ix; req.n = 1; - rxrpc_retransmit_data(call, &req, rto); - - if (after_eq(seq, call->tx_top)) - goto no_further_resend; + rxrpc_retransmit_data(call, &req, rto, false); } - } - /* Fast-forward through the Tx queue to the point the peer says it has - * seen. Anything between the soft-ACK table and that point will get - * ACK'd or NACK'd in due course, so don't worry about it here; here we - * need to consider retransmitting anything beyond that point. - */ - seq = call->acks_prev_seq; - if (after_eq(seq, call->tx_transmitted)) - goto no_further_resend; - seq++; - - while (after_eq(seq, tq->qbase + RXRPC_NR_TXQUEUE)) - tq = tq->next; - - while (before_eq(seq, call->tx_transmitted)) { - qix = seq - tq->qbase; - if (qix >= RXRPC_NR_TXQUEUE) { - tq = tq->next; - continue; + /* Anything after the soft-ACK table up to and including + * ack.previousPacket will get ACK'd or NACK'd in due course, + * so don't worry about those here. We do, however, need to + * consider retransmitting anything beyond that point. + */ + if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE && + after(tq_top, call->acks_prev_seq)) { + rxrpc_seq_t start = latest(call->acks_prev_seq, + tq->qbase + tq->nr_reported_acks); + rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); + + _debug("unrep %x-%x", start, stop); + for (rxrpc_seq_t seq = start; before(seq, stop); seq++) { + struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; + + if (ping_response && + before(txb->serial, call->acks_highest_serial)) + break; /* Wasn't accounted for by a more recent ping. */ + req.tq = tq; + req.seq = seq; + req.n = 1; + if (rxrpc_retransmit_data(call, &req, rto, true)) + unacked = true; + } } - txb = tq->bufs[qix]; - resend_at = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[qix]); - resend_at = ktime_add(resend_at, rto); - if (ack && ack->reason == RXRPC_ACK_PING_RESPONSE && - before(txb->serial, ntohl(ack->serial))) - goto do_resend; /* Wasn't accounted for by a more recent ping. */ + /* Work out the next retransmission timeout. */ + if (ktime_before(tq->xmit_ts_base, lowest_xmit_ts)) { + unsigned int lowest_us = UINT_MAX; - if (ktime_after(resend_at, req.now)) { - if (ktime_before(resend_at, next_resend)) - next_resend = resend_at; - seq++; - continue; - } + for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) + if (!test_bit(i, &tq->segment_acked) && + tq->segment_xmit_ts[i] < lowest_us) + lowest_us = tq->segment_xmit_ts[i]; + _debug("lowest[%x] %llx %u", tq->qbase, tq->xmit_ts_base, lowest_us); - do_resend: - unacked = true; + if (lowest_us != UINT_MAX) { + ktime_t lowest_ns = ktime_add_us(tq->xmit_ts_base, lowest_us); - req.tq = tq; - req.seq = seq; - req.n = 1; - rxrpc_retransmit_data(call, &req, rto); - seq++; - } + if (ktime_before(lowest_ns, lowest_xmit_ts)) + lowest_xmit_ts = lowest_ns; + } + } + } while ((tq = tq->next)); + + if (lowest_xmit_ts < KTIME_MAX) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, req.did_send); + ktime_t resend_at = ktime_add(lowest_xmit_ts, delay); -no_further_resend: -no_resend: - if (resend_at < KTIME_MAX) { - delay = rxrpc_get_rto_backoff(call->peer, did_send); - resend_at = ktime_add(resend_at, delay); + _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); + call->resend_at = resend_at; trace_rxrpc_timer_set(call, resend_at - req.now, rxrpc_timer_trace_resend_reset); + } else { + call->resend_at = KTIME_MAX; + trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); } - call->resend_at = resend_at; if (unacked) rxrpc_congestion_timeout(call); @@ -494,7 +487,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) - rxrpc_resend(call, NULL); + rxrpc_resend(call, 0, false); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index a9682b31a4f9..bba058055c97 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -691,7 +691,6 @@ static void rxrpc_destroy_call(struct work_struct *work) del_timer_sync(&call->timer); - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); rxrpc_cleanup_tx_buffers(call); rxrpc_cleanup_rx_buffers(call); rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c25d816aafee..6e7ff133b5aa 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -34,8 +34,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, rxrpc_serial_t acked_serial) { - bool resend = false; - summary->change = rxrpc_cong_no_change; summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; @@ -52,12 +50,13 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } call->cong_cumul_acks += summary->nr_new_sacks; + call->cong_cumul_acks += summary->nr_new_hacks; if (call->cong_cumul_acks > 255) call->cong_cumul_acks = 255; switch (call->cong_ca_state) { case RXRPC_CA_SLOW_START: - if (summary->saw_snacks) + if (call->acks_nr_snacks > 0) goto packet_loss_detected; if (call->cong_cumul_acks > 0) call->cong_cwnd += 1; @@ -68,7 +67,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto out; case RXRPC_CA_CONGEST_AVOIDANCE: - if (summary->saw_snacks) + if (call->acks_nr_snacks > 0) goto packet_loss_detected; /* We analyse the number of packets that get ACK'd per RTT @@ -87,7 +86,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, goto out; case RXRPC_CA_PACKET_LOSS: - if (!summary->saw_snacks) + if (call->acks_nr_snacks == 0) goto resume_normality; if (summary->new_low_snack) { @@ -108,7 +107,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_cwnd = call->cong_ssthresh + 3; call->cong_extra = 0; call->cong_dup_acks = 0; - resend = true; + summary->need_retransmit = true; goto out; case RXRPC_CA_FAST_RETRANSMIT: @@ -119,12 +118,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_dup_acks == 2) { summary->change = rxrpc_cong_retransmit_again; call->cong_dup_acks = 0; - resend = true; + summary->need_retransmit = true; } } else { summary->change = rxrpc_cong_progress; call->cong_cwnd = call->cong_ssthresh; - if (!summary->saw_snacks) + if (call->acks_nr_snacks == 0) goto resume_normality; } goto out; @@ -149,8 +148,6 @@ out_no_clear_ca: if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) call->cong_cwnd = RXRPC_TX_MAX_WINDOW; trace_rxrpc_congest(call, summary, acked_serial); - if (resend) - rxrpc_resend(call, skb); return; packet_loss_detected: @@ -212,6 +209,13 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, trace_rxrpc_tx_rotate(call, seq, to); trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); + if (call->acks_lowest_nak == call->tx_bottom) { + call->acks_lowest_nak = to; + } else if (after(to, call->acks_lowest_nak)) { + summary->new_low_snack = true; + call->acks_lowest_nak = to; + } + /* We may have a left over fully-consumed buffer at the front that we * couldn't drop before (rotate_and_keep below). */ @@ -231,6 +235,25 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } + + if (ix == tq->nr_reported_acks) { + /* Packet directly hard ACK'd. */ + tq->nr_reported_acks++; + summary->nr_new_hacks++; + __set_bit(ix, &tq->segment_acked); + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack); + } else if (test_bit(ix, &tq->segment_acked)) { + /* Soft ACK -> hard ACK. */ + call->acks_nr_sacks--; + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_sack); + } else { + /* Soft NAK -> hard ACK. */ + call->acks_nr_snacks--; + summary->nr_new_hacks++; + __set_bit(ix, &tq->segment_acked); + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak); + } + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); tq->bufs[ix] = NULL; @@ -268,13 +291,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, _debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last); - if (call->acks_lowest_nak == call->tx_bottom) { - call->acks_lowest_nak = to; - } else if (after(to, call->acks_lowest_nak)) { - summary->new_low_snack = true; - call->acks_lowest_nak = to; - } - wake_up(&call->waitq); return rot_last; } @@ -293,11 +309,6 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, call->resend_at = KTIME_MAX; trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); - if (unlikely(call->cong_last_nack)) { - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - call->cong_last_nack = NULL; - } - switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: @@ -770,40 +781,92 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb wake_up(&call->waitq); } +#if defined(CONFIG_X86) && __GNUC__ && !defined(__clang__) +/* Clang doesn't support the %z constraint modifier */ +#define shiftr_adv_rotr(shift_from, rotate_into) ({ \ + asm(" shr%z1 %1\n" \ + " inc %0\n" \ + " rcr%z2 %2\n" \ + : "+d"(shift_from), "+m"(*(shift_from)), "+rm"(rotate_into) \ + ); \ + }) +#else +#define shiftr_adv_rotr(shift_from, rotate_into) ({ \ + typeof(rotate_into) __bit0 = *(shift_from) & 1; \ + *(shift_from) >>= 1; \ + shift_from++; \ + rotate_into >>= 1; \ + rotate_into |= __bit0 << (sizeof(rotate_into) * 8 - 1); \ + }) +#endif + /* - * Determine how many nacks from the previous ACK have now been satisfied. + * Process a batch of soft ACKs specific to a transmission queue segment. */ -static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, - struct rxrpc_ack_summary *summary, - rxrpc_seq_t hard_ack) +static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long extracted_acks, + int nr_reported, + rxrpc_seq_t seq, + rxrpc_seq_t *lowest_nak) { - struct sk_buff *skb = call->cong_last_nack; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int i, new_acks = 0, retained_nacks = 0; - rxrpc_seq_t seq = hard_ack + 1, old_seq = sp->ack.first_ack; - u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); + unsigned long old_reported, flipped, new_acks, a_to_n, n_to_a; + int new, a, n; + + old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + _enter("{%x,%lx,%d},%lx,%d,%x", + tq->qbase, tq->segment_acked, tq->nr_reported_acks, + extracted_acks, nr_reported, seq); + + _debug("[%x]", tq->qbase); + _debug("tq %16lx %u", tq->segment_acked, tq->nr_reported_acks); + _debug("sack %16lx %u", extracted_acks, nr_reported); + + /* See how many previously logged ACKs/NAKs have flipped. */ + flipped = (tq->segment_acked ^ extracted_acks) & old_reported; + if (flipped) { + n_to_a = ~tq->segment_acked & flipped; /* Old NAK -> ACK */ + a_to_n = tq->segment_acked & flipped; /* Old ACK -> NAK */ + a = hweight_long(n_to_a); + n = hweight_long(a_to_n); + _debug("flip %16lx", flipped); + _debug("ntoa %16lx %d", n_to_a, a); + _debug("aton %16lx %d", a_to_n, n); + call->acks_nr_sacks += a - n; + call->acks_nr_snacks += n - a; + summary->nr_new_sacks += a; + summary->nr_new_snacks += n; + } - if (after_eq(seq, old_seq + sp->ack.nr_acks)) { - summary->nr_new_sacks += sp->ack.nr_nacks; - summary->nr_new_sacks += seq - (old_seq + sp->ack.nr_acks); - summary->nr_retained_snacks = 0; - } else if (seq == old_seq) { - summary->nr_retained_snacks = sp->ack.nr_nacks; - } else { - for (i = 0; i < sp->ack.nr_acks; i++) { - if (acks[i] == RXRPC_ACK_TYPE_NACK) { - if (before(old_seq + i, seq)) - new_acks++; - else - retained_nacks++; - } + /* See how many new ACKs/NAKs have been acquired. */ + new = nr_reported - tq->nr_reported_acks; + if (new > 0) { + new_acks = extracted_acks & ~old_reported; + if (new_acks) { + a = hweight_long(new_acks); + n = new - a; + _debug("new_a %16lx new=%d a=%d n=%d", new_acks, new, a, n); + call->acks_nr_sacks += a; + call->acks_nr_snacks += n; + summary->nr_new_sacks += a; + summary->nr_new_snacks += n; + } else { + call->acks_nr_snacks += new; + summary->nr_new_snacks += new; } - - summary->nr_new_sacks += new_acks; - summary->nr_retained_snacks = retained_nacks; } - return old_seq + sp->ack.nr_acks - 1; + tq->nr_reported_acks = nr_reported; + tq->segment_acked = extracted_acks; + trace_rxrpc_apply_acks(call, tq); + + if (extracted_acks != ~0UL) { + rxrpc_seq_t lowest = seq + ffz(extracted_acks); + + if (before(lowest, *lowest_nak)) + *lowest_nak = lowest; + } } /* @@ -817,39 +880,50 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, */ static void rxrpc_input_soft_acks(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - struct sk_buff *skb, - rxrpc_seq_t since) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int i, old_nacks = 0; - rxrpc_seq_t lowest_nak = call->acks_hard_ack + sp->ack.nr_acks + 1; - rxrpc_seq_t seq = call->acks_hard_ack; + struct rxrpc_txqueue *tq = call->tx_queue; + unsigned long extracted = ~0UL; + unsigned int nr = 0; + rxrpc_seq_t seq = call->acks_hard_ack + 1; + rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - for (i = 0; i < sp->ack.nr_acks; i++) { - seq++; - if (acks[i] == RXRPC_ACK_TYPE_ACK) { - call->acks_nr_sacks++; - if (after(seq, since)) - summary->nr_new_sacks++; - } else { - summary->saw_snacks = true; - if (before_eq(seq, since)) { - /* Overlap with previous ACK */ - old_nacks++; - } else { - summary->nr_new_snacks++; - sp->ack.nr_nacks++; - } + _enter("%x,%x,%u", tq->qbase, seq, sp->ack.nr_acks); + + while (after(seq, tq->qbase + RXRPC_NR_TXQUEUE - 1)) + tq = tq->next; - if (before(seq, lowest_nak)) - lowest_nak = seq; + for (unsigned int i = 0; i < sp->ack.nr_acks; i++) { + /* Decant ACKs until we hit a txqueue boundary. */ + shiftr_adv_rotr(acks, extracted); + if (i == 256) { + acks -= i; + i = 0; } + seq++; + nr++; + if ((seq & RXRPC_TXQ_MASK) != 0) + continue; + + _debug("bound %16lx %u", extracted, nr); + + rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, + seq - RXRPC_NR_TXQUEUE, &lowest_nak); + extracted = ~0UL; + nr = 0; + tq = tq->next; + prefetch(tq); } - if (lowest_nak != call->acks_lowest_nak) { - call->acks_lowest_nak = lowest_nak; - summary->new_low_snack = true; + if (nr) { + unsigned int nr_reported = seq & RXRPC_TXQ_MASK; + + extracted >>= RXRPC_NR_TXQUEUE - nr_reported; + _debug("tail %16lx %u", extracted, nr_reported); + rxrpc_input_soft_ack_tq(call, summary, tq, extracted, nr_reported, + seq & ~RXRPC_TXQ_MASK, &lowest_nak); } /* We *can* have more nacks than we did - the peer is permitted to drop @@ -857,9 +931,14 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * possible for the nack distribution to change whilst the number of * nacks stays the same or goes down. */ - if (old_nacks < summary->nr_retained_snacks) - summary->nr_new_sacks += summary->nr_retained_snacks - old_nacks; - summary->nr_retained_snacks = old_nacks; + if (lowest_nak != call->acks_lowest_nak) { + call->acks_lowest_nak = lowest_nak; + summary->new_low_snack = true; + } + + _debug("summary A=%d+%d N=%d+%d", + call->acks_nr_sacks, summary->nr_new_sacks, + call->acks_nr_snacks, summary->nr_new_snacks); } /* @@ -902,7 +981,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_acktrailer trailer; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_serial_t ack_serial, acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); @@ -920,6 +999,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) trace_rxrpc_rx_ack(call, sp); rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); + prefetch(call->tx_queue); if (acked_serial != 0) { switch (summary.ack_reason) { @@ -980,16 +1060,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) skb_condense(skb); - if (call->cong_last_nack) { - since = rxrpc_input_check_prev_ack(call, &summary, hard_ack); - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - call->cong_last_nack = NULL; - } else { - summary.nr_new_sacks = hard_ack - call->acks_hard_ack; - call->acks_lowest_nak = hard_ack + nr_acks; - since = hard_ack; - } - call->acks_latest_ts = skb->tstamp; call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; @@ -1037,9 +1107,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, &summary, skb, since); - rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); - call->cong_last_nack = skb; + rxrpc_input_soft_acks(call, &summary, skb); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && @@ -1049,6 +1117,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, skb, &summary, acked_serial); + if (summary.need_retransmit) + rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE); send_response: if (summary.ack_reason == RXRPC_ACK_PING) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 7ed928b6f0e1..978c2dc6a7d4 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -461,7 +461,7 @@ dont_set_request_ack: len += sizeof(*jumbo); } - trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags | flags, false); kv->iov_len = len; return len; } @@ -522,6 +522,13 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } /* Set timeouts */ + if (call->peer->rtt_count > 1) { + ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + + call->ack_lost_at = ktime_add(req->now, delay); + trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); + } + if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { ktime_t delay = ms_to_ktime(READ_ONCE(call->next_rx_timo)); @@ -596,6 +603,7 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req ret = 0; trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, true); + conn->peer->last_tx_at = ktime_get_seconds(); goto done; } } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index dfbf9f4b24b6..381b25597f4e 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -299,6 +299,9 @@ static int rxrpc_alloc_txqueue(struct sock *sk, struct rxrpc_call *call) kfree(tq); return -ENOMEM; } else { + /* We start at seq 1, so pretend seq 0 is hard-acked. */ + tq->nr_reported_acks = 1; + tq->segment_acked = 1UL; tq->qbase = 0; call->tx_qbase = 0; call->send_queue = tq; -- cgit v1.2.3 From dcdff0d8e3b61033b28c72926997d458949fcc05 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:52 +0000 Subject: rxrpc: Store the DATA serial in the txqueue and use this in RTT calc Store the serial number set on a DATA packet at the point of transmission in the rxrpc_txqueue struct and when an ACK is received, match the reference number in the ACK by trawling the txqueue rather than sharing an RTT table with ACK RTT. This can be done as part of Tx queue rotation. This means we have a lot more RTT samples available and is faster to search with all the serial numbers packed together into a few cachelines rather than being hung off different txbufs. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://patch.msgid.link/20241204074710.990092-25-dhowells@redhat.com Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 14 +++---- net/rxrpc/ar-internal.h | 4 ++ net/rxrpc/call_event.c | 8 ++-- net/rxrpc/input.c | 94 ++++++++++++++++++++++++++++---------------- net/rxrpc/output.c | 6 ++- 5 files changed, 79 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 609522a5bd0f..798bea0853c4 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -337,11 +337,10 @@ E_(rxrpc_rtt_tx_ping, "PING") #define rxrpc_rtt_rx_traces \ - EM(rxrpc_rtt_rx_other_ack, "OACK") \ + EM(rxrpc_rtt_rx_data_ack, "DACK") \ EM(rxrpc_rtt_rx_obsolete, "OBSL") \ EM(rxrpc_rtt_rx_lost, "LOST") \ - EM(rxrpc_rtt_rx_ping_response, "PONG") \ - E_(rxrpc_rtt_rx_requested_ack, "RACK") + E_(rxrpc_rtt_rx_ping_response, "PONG") #define rxrpc_timer_traces \ EM(rxrpc_timer_trace_delayed_ack, "DelayAck ") \ @@ -1695,10 +1694,9 @@ TRACE_EVENT(rxrpc_retransmit, ); TRACE_EVENT(rxrpc_congest, - TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - rxrpc_serial_t ack_serial), + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary), - TP_ARGS(call, summary, ack_serial), + TP_ARGS(call, summary), TP_STRUCT__entry( __field(unsigned int, call) @@ -1706,7 +1704,6 @@ TRACE_EVENT(rxrpc_congest, __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, top) __field(rxrpc_seq_t, lowest_nak) - __field(rxrpc_serial_t, ack_serial) __field(u16, nr_sacks) __field(u16, nr_snacks) __field(u16, cwnd) @@ -1722,7 +1719,6 @@ TRACE_EVENT(rxrpc_congest, __entry->hard_ack = call->acks_hard_ack; __entry->top = call->tx_top; __entry->lowest_nak = call->acks_lowest_nak; - __entry->ack_serial = ack_serial; __entry->nr_sacks = call->acks_nr_sacks; __entry->nr_snacks = call->acks_nr_snacks; __entry->cwnd = call->cong_cwnd; @@ -1734,7 +1730,7 @@ TRACE_EVENT(rxrpc_congest, TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u A=%u+%u/%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", __entry->call, - __entry->ack_serial, + __entry->sum.acked_serial, __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), __entry->hard_ack, __print_symbolic(__entry->ca_state, rxrpc_ca_states), diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 9a70f0b86570..297be421639c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -769,6 +769,7 @@ struct rxrpc_call { * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ struct rxrpc_ack_summary { + rxrpc_serial_t acked_serial; /* Serial number ACK'd */ u16 in_flight; /* Number of unreceived transmissions */ u16 nr_new_hacks; /* Number of rotated new ACKs */ u16 nr_new_sacks; /* Number of new soft ACKs in packet */ @@ -777,6 +778,7 @@ struct rxrpc_ack_summary { bool new_low_snack:1; /* T if new low soft NACK found */ bool retrans_timeo:1; /* T if reTx due to timeout happened */ bool need_retransmit:1; /* T if we need transmission */ + bool rtt_sample_avail:1; /* T if RTT sample available */ u8 /*enum rxrpc_congest_change*/ change; }; @@ -859,12 +861,14 @@ struct rxrpc_txqueue { unsigned long segment_acked; /* Bit-per-buf: Set if ACK'd */ unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ + unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */ /* The arrays we want to pack into as few cache lines as possible. */ struct { #define RXRPC_NR_TXQUEUE BITS_PER_LONG #define RXRPC_TXQ_MASK (RXRPC_NR_TXQUEUE - 1) struct rxrpc_txbuf *bufs[RXRPC_NR_TXQUEUE]; + unsigned int segment_serial[RXRPC_NR_TXQUEUE]; unsigned int segment_xmit_ts[RXRPC_NR_TXQUEUE]; } ____cacheline_aligned; }; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e25921d39d4d..f71773b18e22 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -159,11 +159,11 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); _debug("unrep %x-%x", start, stop); - for (rxrpc_seq_t seq = start; before(seq, stop); seq++) { - struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; + for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) { + rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK]; if (ping_response && - before(txb->serial, call->acks_highest_serial)) + before(serial, call->acks_highest_serial)) break; /* Wasn't accounted for by a more recent ping. */ req.tq = tq; req.seq = seq; @@ -198,7 +198,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); call->resend_at = resend_at; - trace_rxrpc_timer_set(call, resend_at - req.now, + trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now), rxrpc_timer_trace_resend_reset); } else { call->resend_at = KTIME_MAX; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 6e7ff133b5aa..41b4fb56f96c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -30,9 +30,7 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, * Do TCP-style congestion management [RFC 5681]. */ static void rxrpc_congestion_management(struct rxrpc_call *call, - struct sk_buff *skb, - struct rxrpc_ack_summary *summary, - rxrpc_serial_t acked_serial) + struct rxrpc_ack_summary *summary) { summary->change = rxrpc_cong_no_change; summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; @@ -44,7 +42,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_cwnd >= call->cong_ssthresh && call->cong_ca_state == RXRPC_CA_SLOW_START) { call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; call->cong_cumul_acks = 0; } } @@ -62,7 +60,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_cwnd += 1; if (call->cong_cwnd >= call->cong_ssthresh) { call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; } goto out; @@ -75,12 +73,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, */ if (call->peer->rtt_count == 0) goto out; - if (ktime_before(skb->tstamp, + if (ktime_before(call->acks_latest_ts, ktime_add_us(call->cong_tstamp, call->peer->srtt_us >> 3))) goto out_no_clear_ca; summary->change = rxrpc_cong_rtt_window_end; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; if (call->cong_cumul_acks >= call->cong_cwnd) call->cong_cwnd++; goto out; @@ -137,7 +135,7 @@ resume_normality: summary->change = rxrpc_cong_cleared_nacks; call->cong_dup_acks = 0; call->cong_extra = 0; - call->cong_tstamp = skb->tstamp; + call->cong_tstamp = call->acks_latest_ts; if (call->cong_cwnd < call->cong_ssthresh) call->cong_ca_state = RXRPC_CA_SLOW_START; else @@ -147,7 +145,7 @@ out: out_no_clear_ca: if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) call->cong_cwnd = RXRPC_TX_MAX_WINDOW; - trace_rxrpc_congest(call, summary, acked_serial); + trace_rxrpc_congest(call, summary); return; packet_loss_detected: @@ -194,11 +192,29 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND); } +/* + * Add an RTT sample derived from an ACK'd DATA packet. + */ +static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + int ix, + rxrpc_serial_t ack_serial) +{ + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, + summary->acked_serial, ack_serial, + ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), + call->acks_latest_ts); + summary->rtt_sample_avail = false; + __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */ +} + /* * Apply a hard ACK by advancing the Tx window. */ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, - struct rxrpc_ack_summary *summary) + struct rxrpc_ack_summary *summary, + rxrpc_serial_t ack_serial) { struct rxrpc_txqueue *tq = call->tx_queue; rxrpc_seq_t seq = call->tx_bottom + 1; @@ -236,6 +252,11 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rot_last = true; } + if (summary->rtt_sample_avail && + summary->acked_serial == tq->segment_serial[ix] && + test_bit(ix, &tq->rtt_samples)) + rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); + if (ix == tq->nr_reported_acks) { /* Packet directly hard ACK'd. */ tq->nr_reported_acks++; @@ -348,7 +369,7 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) } if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { - if (!rxrpc_rotate_tx_window(call, top, &summary)) { + if (!rxrpc_rotate_tx_window(call, top, &summary, 0)) { rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply); return false; } @@ -800,6 +821,19 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb }) #endif +/* + * Deal with RTT samples from soft ACKs. + */ +static void rxrpc_input_soft_rtt(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + rxrpc_serial_t ack_serial) +{ + for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++) + if (summary->acked_serial == tq->segment_serial[ix]) + return rxrpc_add_data_rtt_sample(call, summary, tq, ix, ack_serial); +} + /* * Process a batch of soft ACKs specific to a transmission queue segment. */ @@ -909,6 +943,8 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, _debug("bound %16lx %u", extracted, nr); + if (summary->rtt_sample_avail) + rxrpc_input_soft_rtt(call, summary, tq, sp->hdr.serial); rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, seq - RXRPC_NR_TXQUEUE, &lowest_nak); extracted = ~0UL; @@ -980,7 +1016,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ack_summary summary = { 0 }; struct rxrpc_acktrailer trailer; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_serial_t ack_serial, acked_serial; + rxrpc_serial_t ack_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; @@ -989,11 +1025,11 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); ack_serial = sp->hdr.serial; - acked_serial = sp->ack.acked_serial; first_soft_ack = sp->ack.first_ack; prev_pkt = sp->ack.prev_ack; nr_acks = sp->ack.nr_acks; hard_ack = first_soft_ack - 1; + summary.acked_serial = sp->ack.acked_serial; summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? sp->ack.reason : RXRPC_ACK__INVALID); @@ -1001,21 +1037,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); prefetch(call->tx_queue); - if (acked_serial != 0) { - switch (summary.ack_reason) { - case RXRPC_ACK_PING_RESPONSE: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_ping_response); - break; - case RXRPC_ACK_REQUESTED: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_requested_ack); - break; - default: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_other_ack); - break; - } + if (summary.acked_serial != 0) { + if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_complete_rtt_probe(call, skb->tstamp, summary.acked_serial, + ack_serial, rxrpc_rtt_rx_ping_response); + else + summary.rtt_sample_avail = true; } /* If we get an EXCEEDS_WINDOW ACK from the server, it probably @@ -1068,8 +1095,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_ACK_PING: break; default: - if (acked_serial && after(acked_serial, call->acks_highest_serial)) - call->acks_highest_serial = acked_serial; + if (summary.acked_serial && + after(summary.acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = summary.acked_serial; break; } @@ -1098,7 +1126,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); if (after(hard_ack, call->tx_bottom)) { - if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { + if (rxrpc_rotate_tx_window(call, hard_ack, &summary, ack_serial)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); goto send_response; } @@ -1116,7 +1144,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ping(call, ack_serial, rxrpc_propose_ack_ping_for_lost_reply); - rxrpc_congestion_management(call, skb, &summary, acked_serial); + rxrpc_congestion_management(call, &summary); if (summary.need_retransmit) rxrpc_resend(call, ack_serial, summary.ack_reason == RXRPC_ACK_PING_RESPONSE); @@ -1136,7 +1164,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) + if (rxrpc_rotate_tx_window(call, call->tx_top, &summary, 0)) rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall); } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 978c2dc6a7d4..20bf45317264 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -436,7 +436,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, trace_rxrpc_req_ack(call->debug_id, txb->seq, why); if (why != rxrpc_reqack_no_srv_last) { flags |= RXRPC_REQUEST_ACK; - rxrpc_begin_rtt_probe(call, serial, req->now, rxrpc_rtt_tx_data); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); call->peer->rtt_last_req = req->now; } dont_set_request_ack: @@ -508,6 +508,10 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); tq->segment_xmit_ts[ix] = xmit_ts; + tq->segment_serial[ix] = serial; + if (i + 1 == req->n) + /* Only sample the last subpacket in a jumbo. */ + __set_bit(ix, &tq->rtt_samples); len += rxrpc_prepare_data_subpacket(call, req, txb, serial, i); serial++; seq++; -- cgit v1.2.3 From 93dfca65a1df42a3c8b1094299dc42ab8f18e5c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:55 +0000 Subject: rxrpc: Adjust the rxrpc_rtt_rx tracepoint Adjust the rxrpc_rtt_rx tracepoint in the following ways: (1) Display the collected RTT sample in the rxrpc_rtt_rx trace. (2) Move the division of srtt by 8 to the TP_printk() rather doing it before invoking the trace point. (3) Display the min_rtt value. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 14 ++++++++++---- net/rxrpc/input.c | 4 ++-- net/rxrpc/rtt.c | 2 +- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 798bea0853c4..6e929f4448ac 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1415,9 +1415,9 @@ TRACE_EVENT(rxrpc_rtt_rx, TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - u32 rtt, u32 rto), + u32 rtt, u32 srtt, u32 rto), - TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, rto), + TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, srtt, rto), TP_STRUCT__entry( __field(unsigned int, call) @@ -1426,7 +1426,9 @@ TRACE_EVENT(rxrpc_rtt_rx, __field(rxrpc_serial_t, send_serial) __field(rxrpc_serial_t, resp_serial) __field(u32, rtt) + __field(u32, srtt) __field(u32, rto) + __field(u32, min_rtt) ), TP_fast_assign( @@ -1436,17 +1438,21 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->send_serial = send_serial; __entry->resp_serial = resp_serial; __entry->rtt = rtt; + __entry->srtt = srtt; __entry->rto = rto; + __entry->min_rtt = minmax_get(&call->peer->min_rtt) ), - TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u rto=%u", + TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u srtt=%u rto=%u min=%u", __entry->call, __entry->slot, __print_symbolic(__entry->why, rxrpc_rtt_rx_traces), __entry->send_serial, __entry->resp_serial, __entry->rtt, - __entry->rto) + __entry->srtt / 8, + __entry->rto, + __entry->min_rtt) ); TRACE_EVENT(rxrpc_timer_set, diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index c682e95e15dc..1eb9c22aba51 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -740,7 +740,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, */ if (after(acked_serial, orig_serial)) { trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i, - orig_serial, acked_serial, 0, 0); + orig_serial, acked_serial, 0, 0, 0); clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_wmb(); set_bit(i, &call->rtt_avail); @@ -748,7 +748,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, } if (!matched) - trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0); + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0, 0); } /* diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 8048467f4bee..e0b7d99854b4 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -175,7 +175,7 @@ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, spin_unlock(&peer->rtt_input_lock); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, - peer->srtt_us >> 3, peer->rto_us); + rtt_us, peer->srtt_us, peer->rto_us); } /* -- cgit v1.2.3 From a3d7f46d983fb2ed528b9cceb457c067fe4277a2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:46:56 +0000 Subject: rxrpc: Display userStatus in rxrpc_rx_ack trace Display the userStatus field from the Rx packet header in the rxrpc_rx_ack trace line. This is used for flow control purposes by FS.StoreData-type kafs RPC calls. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 6e929f4448ac..7681c67f7d65 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1031,11 +1031,13 @@ TRACE_EVENT(rxrpc_rx_ack, __field(rxrpc_seq_t, prev) __field(u8, reason) __field(u8, n_acks) + __field(u8, user_status) ), TP_fast_assign( __entry->call = call->debug_id; __entry->serial = sp->hdr.serial; + __entry->user_status = sp->hdr.userStatus; __entry->ack_serial = sp->ack.acked_serial; __entry->first = sp->ack.first_ack; __entry->prev = sp->ack.prev_ack; @@ -1043,11 +1045,12 @@ TRACE_EVENT(rxrpc_rx_ack, __entry->n_acks = sp->ack.nr_acks; ), - TP_printk("c=%08x %08x %s r=%08x f=%08x p=%08x n=%u", + TP_printk("c=%08x %08x %s r=%08x us=%02x f=%08x p=%08x n=%u", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), __entry->ack_serial, + __entry->user_status, __entry->first, __entry->prev, __entry->n_acks) -- cgit v1.2.3 From 08d55d7cf3f33c730ce2694393efe16b7983a9c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:00 +0000 Subject: rxrpc: Don't allocate a txbuf for an ACK transmission Don't allocate an rxrpc_txbuf struct for an ACK transmission. There's now no need as the memory to hold the ACK content is allocated with a page frag allocator. The allocation and freeing of a txbuf is just unnecessary overhead. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 2 - net/rxrpc/ar-internal.h | 5 +- net/rxrpc/output.c | 210 ++++++++++++++++++++++++++----------------- net/rxrpc/txbuf.c | 76 ---------------- 4 files changed, 131 insertions(+), 162 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 7681c67f7d65..326a4c257aea 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -462,13 +462,11 @@ /* ---- Must update size of stat_why_req_ack[] if more are added! */ #define rxrpc_txbuf_traces \ - EM(rxrpc_txbuf_alloc_ack, "ALLOC ACK ") \ EM(rxrpc_txbuf_alloc_data, "ALLOC DATA ") \ EM(rxrpc_txbuf_free, "FREE ") \ EM(rxrpc_txbuf_get_buffer, "GET BUFFER ") \ EM(rxrpc_txbuf_get_trans, "GET TRANS ") \ EM(rxrpc_txbuf_get_retrans, "GET RETRANS") \ - EM(rxrpc_txbuf_put_ack_tx, "PUT ACK TX ") \ EM(rxrpc_txbuf_put_cleaned, "PUT CLEANED") \ EM(rxrpc_txbuf_put_nomem, "PUT NOMEM ") \ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1307749a1a74..db93d7f78902 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -834,11 +834,9 @@ struct rxrpc_txbuf { #define RXRPC_TXBUF_WIRE_FLAGS 0xff /* The wire protocol flags */ #define RXRPC_TXBUF_RESENT 0x100 /* Set if has been resent */ __be16 cksum; /* Checksum to go in header */ - unsigned short ack_rwind; /* ACK receive window */ - u8 /*enum rxrpc_propose_ack_trace*/ ack_why; /* If ack, why */ bool jumboable; /* Can be non-terminal jumbo subpacket */ u8 nr_kvec; /* Amount of kvec[] used */ - struct kvec kvec[3]; + struct kvec kvec[1]; }; static inline bool rxrpc_sending_to_server(const struct rxrpc_txbuf *txb) @@ -1364,7 +1362,6 @@ static inline void rxrpc_sysctl_exit(void) {} extern atomic_t rxrpc_nr_txbuf; struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_size, size_t data_align, gfp_t gfp); -struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size); void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_see_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); void rxrpc_put_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 20bf45317264..a7de8a02f419 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -71,22 +71,97 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call, ktime_t now) trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_keepalive); } +/* + * Allocate transmission buffers for an ACK and attach them to local->kv[]. + */ +static int rxrpc_alloc_ack(struct rxrpc_call *call, size_t sack_size) +{ + struct rxrpc_wire_header *whdr; + struct rxrpc_acktrailer *trailer; + struct rxrpc_ackpacket *ack; + struct kvec *kv = call->local->kvec; + gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; + void *buf, *buf2 = NULL; + u8 *filler; + + buf = page_frag_alloc(&call->local->tx_alloc, + sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); + if (!buf) + return -ENOMEM; + + if (sack_size) { + buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); + if (!buf2) { + page_frag_free(buf); + return -ENOMEM; + } + } + + whdr = buf; + ack = buf + sizeof(*whdr); + filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; + trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; + + kv[0].iov_base = whdr; + kv[0].iov_len = sizeof(*whdr) + sizeof(*ack); + kv[1].iov_base = buf2; + kv[1].iov_len = sack_size; + kv[2].iov_base = filler; + kv[2].iov_len = 3 + sizeof(*trailer); + return 3; /* Number of kvec[] used. */ +} + +static void rxrpc_free_ack(struct rxrpc_call *call) +{ + page_frag_free(call->local->kvec[0].iov_base); + if (call->local->kvec[1].iov_base) + page_frag_free(call->local->kvec[1].iov_base); +} + +/* + * Record the beginning of an RTT probe. + */ +static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, + ktime_t now, enum rxrpc_rtt_tx_trace why) +{ + unsigned long avail = call->rtt_avail; + int rtt_slot = 9; + + if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK)) + goto no_slot; + + rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK); + if (!test_and_clear_bit(rtt_slot, &call->rtt_avail)) + goto no_slot; + + call->rtt_serial[rtt_slot] = serial; + call->rtt_sent_at[rtt_slot] = now; + smp_wmb(); /* Write data before avail bit */ + set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); + + trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); + return; + +no_slot: + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); +} + /* * Fill out an ACK packet. */ -static void rxrpc_fill_out_ack(struct rxrpc_call *call, - struct rxrpc_txbuf *txb, - u8 ack_reason, - rxrpc_serial_t serial) +static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason, + rxrpc_serial_t serial_to_ack, rxrpc_serial_t *_ack_serial) { - struct rxrpc_wire_header *whdr = txb->kvec[0].iov_base; - struct rxrpc_acktrailer *trailer = txb->kvec[2].iov_base + 3; + struct kvec *kv = call->local->kvec; + struct rxrpc_wire_header *whdr = kv[0].iov_base; + struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); unsigned int qsize, sack, wrap, to, max_mtu, if_mtu; rxrpc_seq_t window, wtop; + ktime_t now = ktime_get_real(); int rsize; - u8 *filler = txb->kvec[2].iov_base; - u8 *sackp = txb->kvec[1].iov_base; + u8 *filler = kv[2].iov_base; + u8 *sackp = kv[1].iov_base; rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); @@ -94,14 +169,25 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, wtop = call->ackr_wtop; sack = call->ackr_sack_base % RXRPC_SACK_SIZE; + *_ack_serial = rxrpc_get_next_serial(call->conn); + + whdr->epoch = htonl(call->conn->proto.epoch); + whdr->cid = htonl(call->cid); + whdr->callNumber = htonl(call->call_id); + whdr->serial = htonl(*_ack_serial); whdr->seq = 0; whdr->type = RXRPC_PACKET_TYPE_ACK; - txb->flags |= RXRPC_SLOW_START_OK; + whdr->flags = call->conn->out_clientflag | RXRPC_SLOW_START_OK; + whdr->userStatus = 0; + whdr->securityIndex = call->security_ix; + whdr->_rsvd = 0; + whdr->serviceId = htons(call->dest_srx.srx_service); + ack->bufferSpace = 0; ack->maxSkew = 0; ack->firstPacket = htonl(window); ack->previousPacket = htonl(call->rx_highest_seq); - ack->serial = htonl(serial); + ack->serial = htonl(serial_to_ack); ack->reason = ack_reason; ack->nAcks = wtop - window; filler[0] = 0; @@ -109,12 +195,10 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, filler[2] = 0; if (ack_reason == RXRPC_ACK_PING) - txb->flags |= RXRPC_REQUEST_ACK; + whdr->flags |= RXRPC_REQUEST_ACK; if (after(wtop, window)) { - txb->len += ack->nAcks; - txb->kvec[1].iov_base = sackp; - txb->kvec[1].iov_len = ack->nAcks; + kv[1].iov_len = ack->nAcks; wrap = RXRPC_SACK_SIZE - sack; to = umin(ack->nAcks, RXRPC_SACK_SIZE); @@ -133,7 +217,6 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); - txb->ack_rwind = rsize; if_mtu = call->peer->if_mtu - call->peer->hdrsize; if (call->peer->ackr_adv_pmtud) { @@ -147,48 +230,27 @@ static void rxrpc_fill_out_ack(struct rxrpc_call *call, trailer->ifMTU = htonl(if_mtu); trailer->rwind = htonl(rsize); trailer->jumbo_max = 0; /* Advertise pmtu discovery */ -} - -/* - * Record the beginning of an RTT probe. - */ -static void rxrpc_begin_rtt_probe(struct rxrpc_call *call, rxrpc_serial_t serial, - ktime_t now, enum rxrpc_rtt_tx_trace why) -{ - unsigned long avail = call->rtt_avail; - int rtt_slot = 9; - if (!(avail & RXRPC_CALL_RTT_AVAIL_MASK)) - goto no_slot; - - rtt_slot = __ffs(avail & RXRPC_CALL_RTT_AVAIL_MASK); - if (!test_and_clear_bit(rtt_slot, &call->rtt_avail)) - goto no_slot; - - call->rtt_serial[rtt_slot] = serial; - call->rtt_sent_at[rtt_slot] = now; - smp_wmb(); /* Write data before avail bit */ - set_bit(rtt_slot + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); - - trace_rxrpc_rtt_tx(call, why, rtt_slot, serial); - return; - -no_slot: - trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_no_slot, rtt_slot, serial); + if (ack_reason == RXRPC_ACK_PING) + rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping); + if (whdr->flags & RXRPC_REQUEST_ACK) + call->peer->rtt_last_req = now; + rxrpc_set_keepalive(call, now); + return nr_kv; } /* * Transmit an ACK packet. */ -static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb, - int nr_kv, enum rxrpc_propose_ack_trace why) +static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len, + rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) { struct kvec *kv = call->local->kvec; struct rxrpc_wire_header *whdr = kv[0].iov_base; + struct rxrpc_acktrailer *trailer = kv[2].iov_base + 3; struct rxrpc_connection *conn; struct rxrpc_ackpacket *ack = (struct rxrpc_ackpacket *)(whdr + 1); struct msghdr msg; - ktime_t now; int ret; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -202,41 +264,31 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t msg.msg_controllen = 0; msg.msg_flags = MSG_SPLICE_PAGES; - whdr->flags = txb->flags & RXRPC_TXBUF_WIRE_FLAGS; - - txb->serial = rxrpc_get_next_serial(conn); - whdr->serial = htonl(txb->serial); - trace_rxrpc_tx_ack(call->debug_id, txb->serial, + trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(ack->firstPacket), ntohl(ack->serial), ack->reason, ack->nAcks, - txb->ack_rwind); + ntohl(trailer->rwind)); rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); - iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, txb->len); + iov_iter_kvec(&msg.msg_iter, WRITE, kv, nr_kv, len); rxrpc_local_dont_fragment(conn->local, why == rxrpc_propose_ack_ping_for_mtu_probe); - ret = do_udp_sendmsg(conn->local->socket, &msg, txb->len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) { - trace_rxrpc_tx_fail(call->debug_id, txb->serial, ret, + trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_ack); if (why == rxrpc_propose_ack_ping_for_mtu_probe && ret == -EMSGSIZE) - rxrpc_input_probe_for_pmtud(conn, txb->serial, true); + rxrpc_input_probe_for_pmtud(conn, serial, true); } else { trace_rxrpc_tx_packet(call->debug_id, whdr, rxrpc_tx_point_call_ack); - now = ktime_get_real(); - if (ack->reason == RXRPC_ACK_PING) - rxrpc_begin_rtt_probe(call, txb->serial, now, rxrpc_rtt_tx_ping); - if (txb->flags & RXRPC_REQUEST_ACK) - call->peer->rtt_last_req = now; - rxrpc_set_keepalive(call, now); if (why == rxrpc_propose_ack_ping_for_mtu_probe) { call->peer->pmtud_pending = false; call->peer->pmtud_probing = true; - call->conn->pmtud_probe = txb->serial; + call->conn->pmtud_probe = serial; call->conn->pmtud_call = call->debug_id; trace_rxrpc_pmtud_tx(call); } @@ -248,10 +300,11 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *t * Queue an ACK for immediate transmission. */ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, - rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) + rxrpc_serial_t serial_to_ack, enum rxrpc_propose_ack_trace why) { - struct rxrpc_txbuf *txb; struct kvec *kv = call->local->kvec; + rxrpc_serial_t ack_serial; + size_t len; int nr_kv; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) @@ -259,32 +312,29 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); - txb = rxrpc_alloc_ack_txbuf(call, call->ackr_wtop - call->ackr_window); - if (!txb) { + nr_kv = rxrpc_alloc_ack(call, call->ackr_wtop - call->ackr_window); + if (nr_kv < 0) { kleave(" = -ENOMEM"); return; } - txb->ack_why = why; - - rxrpc_fill_out_ack(call, txb, ack_reason, serial); + nr_kv = rxrpc_fill_out_ack(call, nr_kv, ack_reason, serial_to_ack, &ack_serial); + len = kv[0].iov_len; + len += kv[1].iov_len; + len += kv[2].iov_len; /* Extend a path MTU probe ACK. */ - nr_kv = txb->nr_kvec; - kv[0] = txb->kvec[0]; - kv[1] = txb->kvec[1]; - kv[2] = txb->kvec[2]; if (why == rxrpc_propose_ack_ping_for_mtu_probe) { size_t probe_mtu = call->peer->pmtud_trial + sizeof(struct rxrpc_wire_header); - if (txb->len > probe_mtu) + if (len > probe_mtu) goto skip; - while (txb->len < probe_mtu) { - size_t part = umin(probe_mtu - txb->len, PAGE_SIZE); + while (len < probe_mtu) { + size_t part = umin(probe_mtu - len, PAGE_SIZE); kv[nr_kv].iov_base = page_address(ZERO_PAGE(0)); kv[nr_kv].iov_len = part; - txb->len += part; + len += part; nr_kv++; } } @@ -293,10 +343,10 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, atomic_set(&call->ackr_nr_consumed, 0); clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); - trace_rxrpc_send_ack(call, why, ack_reason, serial); - rxrpc_send_ack_packet(call, txb, nr_kv, why); + trace_rxrpc_send_ack(call, why, ack_reason, ack_serial); + rxrpc_send_ack_packet(call, nr_kv, len, ack_serial, why); skip: - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); + rxrpc_free_ack(call); } /* diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 067223c8c35f..131d9e55c8e9 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -73,82 +73,6 @@ struct rxrpc_txbuf *rxrpc_alloc_data_txbuf(struct rxrpc_call *call, size_t data_ return txb; } -/* - * Allocate and partially initialise an ACK packet. - */ -struct rxrpc_txbuf *rxrpc_alloc_ack_txbuf(struct rxrpc_call *call, size_t sack_size) -{ - struct rxrpc_wire_header *whdr; - struct rxrpc_acktrailer *trailer; - struct rxrpc_ackpacket *ack; - struct rxrpc_txbuf *txb; - gfp_t gfp = rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS; - void *buf, *buf2 = NULL; - u8 *filler; - - txb = kmalloc(sizeof(*txb), gfp); - if (!txb) - return NULL; - - buf = page_frag_alloc(&call->local->tx_alloc, - sizeof(*whdr) + sizeof(*ack) + 1 + 3 + sizeof(*trailer), gfp); - if (!buf) { - kfree(txb); - return NULL; - } - - if (sack_size) { - buf2 = page_frag_alloc(&call->local->tx_alloc, sack_size, gfp); - if (!buf2) { - page_frag_free(buf); - kfree(txb); - return NULL; - } - } - - whdr = buf; - ack = buf + sizeof(*whdr); - filler = buf + sizeof(*whdr) + sizeof(*ack) + 1; - trailer = buf + sizeof(*whdr) + sizeof(*ack) + 1 + 3; - - refcount_set(&txb->ref, 1); - txb->call_debug_id = call->debug_id; - txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); - txb->space = 0; - txb->len = sizeof(*whdr) + sizeof(*ack) + 3 + sizeof(*trailer); - txb->offset = 0; - txb->flags = call->conn->out_clientflag; - txb->ack_rwind = 0; - txb->seq = 0; - txb->serial = 0; - txb->cksum = 0; - txb->nr_kvec = 3; - txb->kvec[0].iov_base = whdr; - txb->kvec[0].iov_len = sizeof(*whdr) + sizeof(*ack); - txb->kvec[1].iov_base = buf2; - txb->kvec[1].iov_len = sack_size; - txb->kvec[2].iov_base = filler; - txb->kvec[2].iov_len = 3 + sizeof(*trailer); - - whdr->epoch = htonl(call->conn->proto.epoch); - whdr->cid = htonl(call->cid); - whdr->callNumber = htonl(call->call_id); - whdr->seq = 0; - whdr->type = RXRPC_PACKET_TYPE_ACK; - whdr->flags = 0; - whdr->userStatus = 0; - whdr->securityIndex = call->security_ix; - whdr->_rsvd = 0; - whdr->serviceId = htons(call->dest_srx.srx_service); - - get_page(virt_to_head_page(trailer)); - - trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, - rxrpc_txbuf_alloc_ack); - atomic_inc(&rxrpc_nr_txbuf); - return txb; -} - void rxrpc_get_txbuf(struct rxrpc_txbuf *txb, enum rxrpc_txbuf_trace what) { int r; -- cgit v1.2.3 From 372d12d191cb80720319e224d401fd82c602e9e4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:03 +0000 Subject: rxrpc: Add a reason indicator to the tx_data tracepoint Add an indicator to the rxrpc_tx_data tracepoint to indicate what triggered the transmission of a particular packet. At this point, it's only normal transmission and retransmission, plus the tracepoint is also used to record loss injection, but in a future patch, TLP-induced (re-)transmission will also be a thing. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 21 ++++++++++++++------- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_event.c | 12 ++++++++---- net/rxrpc/output.c | 6 +++--- 4 files changed, 26 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 326a4c257aea..d79623fff746 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -302,6 +302,11 @@ EM(rxrpc_txqueue_rotate_last, "RLS") \ E_(rxrpc_txqueue_wait, "WAI") +#define rxrpc_txdata_traces \ + EM(rxrpc_txdata_inject_loss, " *INJ-LOSS*") \ + EM(rxrpc_txdata_new_data, " ") \ + E_(rxrpc_txdata_retransmit, " *RETRANS*") + #define rxrpc_receive_traces \ EM(rxrpc_receive_end, "END") \ EM(rxrpc_receive_front, "FRN") \ @@ -534,6 +539,7 @@ enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); +enum rxrpc_txdata_trace { rxrpc_txdata_traces } __mode(byte); enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); #endif /* end __RXRPC_DECLARE_TRACE_ENUMS_ONCE_ONLY */ @@ -572,6 +578,7 @@ rxrpc_timer_traces; rxrpc_tq_traces; rxrpc_tx_points; rxrpc_txbuf_traces; +rxrpc_txdata_traces; rxrpc_txqueue_traces; /* @@ -1222,9 +1229,10 @@ TRACE_EVENT(rxrpc_tx_packet, TRACE_EVENT(rxrpc_tx_data, TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, - rxrpc_serial_t serial, unsigned int flags, bool lose), + rxrpc_serial_t serial, unsigned int flags, + enum rxrpc_txdata_trace trace), - TP_ARGS(call, seq, serial, flags, lose), + TP_ARGS(call, seq, serial, flags, trace), TP_STRUCT__entry( __field(unsigned int, call) @@ -1233,7 +1241,7 @@ TRACE_EVENT(rxrpc_tx_data, __field(u32, cid) __field(u32, call_id) __field(u16, flags) - __field(bool, lose) + __field(enum rxrpc_txdata_trace, trace) ), TP_fast_assign( @@ -1243,18 +1251,17 @@ TRACE_EVENT(rxrpc_tx_data, __entry->seq = seq; __entry->serial = serial; __entry->flags = flags; - __entry->lose = lose; + __entry->trace = trace; ), - TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s%s", + TP_printk("c=%08x DATA %08x:%08x %08x q=%08x fl=%02x%s", __entry->call, __entry->cid, __entry->call_id, __entry->serial, __entry->seq, __entry->flags & RXRPC_TXBUF_WIRE_FLAGS, - __entry->flags & RXRPC_TXBUF_RESENT ? " *RETRANS*" : "", - __entry->lose ? " *LOSE*" : "") + __print_symbolic(__entry->trace, rxrpc_txdata_traces)) ); TRACE_EVENT(rxrpc_tx_ack, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index aa240b4b4bec..139575032ae2 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -883,6 +883,7 @@ struct rxrpc_send_data_req { rxrpc_seq_t seq; /* Sequence of first data */ int n; /* Number of DATA packets to glue into jumbo */ bool did_send; /* T if did actually send */ + int /* enum rxrpc_txdata_trace */ trace; }; #include diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 39772459426b..99d9502564cc 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -101,6 +101,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ { struct rxrpc_send_data_req req = { .now = ktime_get_real(), + .trace = rxrpc_txdata_retransmit, }; struct rxrpc_txqueue *tq = call->tx_queue; ktime_t lowest_xmit_ts = KTIME_MAX; @@ -269,7 +270,8 @@ static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) /* * Transmit some as-yet untransmitted data. */ -static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) +static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, + enum rxrpc_txdata_trace trace) { int space = rxrpc_tx_window_space(call); @@ -284,6 +286,7 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) .now = ktime_get_real(), .seq = call->tx_transmitted + 1, .n = 0, + .trace = trace, }; struct rxrpc_txqueue *tq; struct rxrpc_txbuf *txb; @@ -332,7 +335,8 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call) } } -static void rxrpc_transmit_some_data(struct rxrpc_call *call) +static void rxrpc_transmit_some_data(struct rxrpc_call *call, + enum rxrpc_txdata_trace trace) { switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -349,7 +353,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call) rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); return; } - rxrpc_transmit_fresh_data(call); + rxrpc_transmit_fresh_data(call, trace); break; default: return; @@ -463,7 +467,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) resend = true; } - rxrpc_transmit_some_data(call); + rxrpc_transmit_some_data(call, rxrpc_txdata_new_data); now = ktime_get_real(); t = ktime_sub(call->keepalive_at, now); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a7de8a02f419..2633f955d1d0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -511,7 +511,7 @@ dont_set_request_ack: len += sizeof(*jumbo); } - trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags | flags, false); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, flags, req->trace); kv->iov_len = len; return len; } @@ -655,8 +655,8 @@ void rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_send_data_req if ((lose++ & 7) == 7) { ret = 0; - trace_rxrpc_tx_data(call, txb->seq, txb->serial, - txb->flags, true); + trace_rxrpc_tx_data(call, txb->seq, txb->serial, txb->flags, + rxrpc_txdata_inject_loss); conn->peer->last_tx_at = ktime_get_seconds(); goto done; } -- cgit v1.2.3 From b509934094fd52ac3a49ee2a2c144e885517069f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:04 +0000 Subject: rxrpc: Add a reason indicator to the tx_ack tracepoint Record the reason for the transmission of an ACK in the rxrpc_tx_ack tracepoint, and not just in the rxrpc_propose_ack tracepoint. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 13 +++++++++---- net/rxrpc/conn_event.c | 3 ++- net/rxrpc/output.c | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d79623fff746..0cfc8e1baf1f 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -375,6 +375,7 @@ EM(rxrpc_propose_ack_processing_op, "ProcOp ") \ EM(rxrpc_propose_ack_respond_to_ack, "Rsp2Ack") \ EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \ + EM(rxrpc_propose_ack_retransmit, "Retrans") \ EM(rxrpc_propose_ack_retry_tx, "RetryTx") \ EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \ EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \ @@ -1267,9 +1268,10 @@ TRACE_EVENT(rxrpc_tx_data, TRACE_EVENT(rxrpc_tx_ack, TP_PROTO(unsigned int call, rxrpc_serial_t serial, rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, - u8 reason, u8 n_acks, u16 rwind), + u8 reason, u8 n_acks, u16 rwind, + enum rxrpc_propose_ack_trace trace), - TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind), + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind, trace), TP_STRUCT__entry( __field(unsigned int, call) @@ -1279,6 +1281,7 @@ TRACE_EVENT(rxrpc_tx_ack, __field(u8, reason) __field(u8, n_acks) __field(u16, rwind) + __field(enum rxrpc_propose_ack_trace, trace) ), TP_fast_assign( @@ -1289,16 +1292,18 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->reason = reason; __entry->n_acks = n_acks; __entry->rwind = rwind; + __entry->trace = trace; ), - TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u", + TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u %s", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), __entry->ack_first, __entry->ack_serial, __entry->n_acks, - __entry->rwind) + __entry->rwind, + __print_symbolic(__entry->trace, rxrpc_propose_ack_traces)) ); TRACE_EVENT(rxrpc_receive, diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 6b29a294ee07..713e04394ceb 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -177,7 +177,8 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), - pkt.ack.reason, 0, rxrpc_rx_window_size); + pkt.ack.reason, 0, rxrpc_rx_window_size, + rxrpc_propose_ack_retransmit); break; default: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 2633f955d1d0..74c3ff55b482 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -267,7 +267,7 @@ static void rxrpc_send_ack_packet(struct rxrpc_call *call, int nr_kv, size_t len trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(ack->firstPacket), ntohl(ack->serial), ack->reason, ack->nAcks, - ntohl(trailer->rwind)); + ntohl(trailer->rwind), why); rxrpc_inc_stat(call->rxnet, stat_tx_ack_send); -- cgit v1.2.3 From b40ef2b85a7d117dd323b5910e504899e0a3e7dc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:05 +0000 Subject: rxrpc: Manage RTT per-call rather than per-peer Manage the determination of RTT on a per-call (ie. per-RPC op) basis rather than on a per-peer basis, averaging across all calls going to that peer. The problem is that the RTT measurements from the initial packets on a call may be off because the server may do some setting up (such as getting a lock on a file) before accepting the rest of the data in the RPC and, further, the RTT may be affected by server-side file operations, for instance if a large amount of data is being written or read. Note: When handling the FS.StoreData-type RPCs, for example, the server uses the userStatus field in the header of ACK packets as supplementary flow control to aid in managing this. AF_RXRPC does not yet support this, but it should be added. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 2 +- net/rxrpc/ar-internal.h | 39 ++++++++++--------- net/rxrpc/call_event.c | 18 ++++----- net/rxrpc/call_object.c | 2 + net/rxrpc/input.c | 10 ++--- net/rxrpc/output.c | 14 +++---- net/rxrpc/peer_object.c | 9 +---- net/rxrpc/proc.c | 6 +-- net/rxrpc/rtt.c | 91 ++++++++++++++++++++++---------------------- net/rxrpc/sendmsg.c | 2 +- 10 files changed, 97 insertions(+), 96 deletions(-) (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 0cfc8e1baf1f..71df5c48a413 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1453,7 +1453,7 @@ TRACE_EVENT(rxrpc_rtt_rx, __entry->rtt = rtt; __entry->srtt = srtt; __entry->rto = rto; - __entry->min_rtt = minmax_get(&call->peer->min_rtt) + __entry->min_rtt = minmax_get(&call->min_rtt) ), TP_printk("c=%08x [%d] %s sr=%08x rr=%08x rtt=%u srtt=%u rto=%u min=%u", diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 139575032ae2..a9d732ba6df0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -366,20 +366,9 @@ struct rxrpc_peer { unsigned short hdrsize; /* header size (IP + UDP + RxRPC) */ unsigned short tx_seg_max; /* Maximum number of transmissable segments */ - /* calculated RTT cache */ -#define RXRPC_RTT_CACHE_SIZE 32 - spinlock_t rtt_input_lock; /* RTT lock for input routine */ - ktime_t rtt_last_req; /* Time of last RTT request */ - unsigned int rtt_count; /* Number of samples we've got */ - unsigned int rtt_taken; /* Number of samples taken (wrapping) */ - struct minmax min_rtt; /* Estimated minimum RTT */ - - u32 srtt_us; /* smoothed round trip time << 3 in usecs */ - u32 mdev_us; /* medium deviation */ - u32 mdev_max_us; /* maximal mdev for the last rtt period */ - u32 rttvar_us; /* smoothed mdev_max */ - u32 rto_us; /* Retransmission timeout in usec */ - u8 backoff; /* Backoff timeout (as shift) */ + /* Calculated RTT cache */ + unsigned int recent_srtt_us; + unsigned int recent_rto_us; u8 cong_ssthresh; /* Congestion slow-start threshold */ }; @@ -765,6 +754,18 @@ struct rxrpc_call { rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ unsigned short acks_nr_sacks; /* Number of soft acks recorded */ unsigned short acks_nr_snacks; /* Number of soft nacks recorded */ + + /* Calculated RTT cache */ + ktime_t rtt_last_req; /* Time of last RTT request */ + unsigned int rtt_count; /* Number of samples we've got */ + unsigned int rtt_taken; /* Number of samples taken (wrapping) */ + struct minmax min_rtt; /* Estimated minimum RTT */ + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_us; /* Retransmission timeout in usec */ + u8 backoff; /* Backoff timeout (as shift) */ }; /* @@ -1287,10 +1288,12 @@ static inline int rxrpc_abort_eproto(struct rxrpc_call *call, /* * rtt.c */ -void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, int, - rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); -ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans); -void rxrpc_peer_init_rtt(struct rxrpc_peer *); +void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + int rtt_slot, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time); +ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans); +void rxrpc_call_init_rtt(struct rxrpc_call *call); /* * rxkad.c diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 99d9502564cc..7af275544251 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -44,8 +44,8 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, trace_rxrpc_propose_ack(call, why, RXRPC_ACK_DELAY, serial); - if (call->peer->srtt_us) - delay = (call->peer->srtt_us >> 3) * NSEC_PER_USEC; + if (call->srtt_us) + delay = (call->srtt_us >> 3) * NSEC_PER_USEC; else delay = ms_to_ktime(READ_ONCE(rxrpc_soft_ack_delay)); ktime_add_ms(delay, call->tx_backoff); @@ -105,7 +105,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ }; struct rxrpc_txqueue *tq = call->tx_queue; ktime_t lowest_xmit_ts = KTIME_MAX; - ktime_t rto = rxrpc_get_rto_backoff(call->peer, false); + ktime_t rto = rxrpc_get_rto_backoff(call, false); bool unacked = false; _enter("{%d,%d}", call->tx_bottom, call->tx_top); @@ -195,7 +195,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ } while ((tq = tq->next)); if (lowest_xmit_ts < KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, req.did_send); + ktime_t delay = rxrpc_get_rto_backoff(call, req.did_send); ktime_t resend_at = ktime_add(lowest_xmit_ts, delay); _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); @@ -216,7 +216,7 @@ void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_ */ if (!req.did_send) { ktime_t next_ping = ktime_add_us(call->acks_latest_ts, - call->peer->srtt_us >> 3); + call->srtt_us >> 3); if (ktime_sub(next_ping, req.now) <= 0) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, @@ -366,8 +366,8 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call, */ static void rxrpc_send_initial_ping(struct rxrpc_call *call) { - if (call->peer->rtt_count < 3 || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + if (call->rtt_count < 3 || + ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_params); @@ -499,10 +499,10 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_rx_idle); if (call->ackr_nr_unacked > 2) { - if (call->peer->rtt_count < 3) + if (call->rtt_count < 3) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_rtt); - else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_old_rtt); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 75cd0b06e14c..fb4ee0d2e9e1 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -176,6 +176,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->cong_cwnd = RXRPC_MIN_CWND; call->cong_ssthresh = RXRPC_TX_MAX_WINDOW; + rxrpc_call_init_rtt(call); + call->rxnet = rxnet; call->rtt_avail = RXRPC_CALL_RTT_AVAIL_MASK; atomic_inc(&rxnet->nr_calls); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 036cf440b63b..9f308bd512e9 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -71,11 +71,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_count == 0) + if (call->rtt_count == 0) goto out; if (ktime_before(call->acks_latest_ts, ktime_add_us(call->cong_tstamp, - call->peer->srtt_us >> 3))) + call->srtt_us >> 3))) goto out_no_clear_ca; summary->change = rxrpc_cong_rtt_window_end; call->cong_tstamp = call->acks_latest_ts; @@ -179,7 +179,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; - rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); + rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8)); now = ktime_get_real(); if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) return; @@ -200,7 +200,7 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, struct rxrpc_txqueue *tq, int ix) { - rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, + rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, summary->acked_serial, summary->ack_serial, ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), call->acks_latest_ts); @@ -725,7 +725,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_mb(); /* Read data before setting avail bit */ set_bit(i, &call->rtt_avail); - rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + rxrpc_call_add_rtt(call, type, i, acked_serial, ack_serial, sent_at, resp_time); matched = true; } diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 74c3ff55b482..ecaf3becee40 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -234,7 +234,7 @@ static int rxrpc_fill_out_ack(struct rxrpc_call *call, int nr_kv, u8 ack_reason, if (ack_reason == RXRPC_ACK_PING) rxrpc_begin_rtt_probe(call, *_ack_serial, now, rxrpc_rtt_tx_ping); if (whdr->flags & RXRPC_REQUEST_ACK) - call->peer->rtt_last_req = now; + call->rtt_last_req = now; rxrpc_set_keepalive(call, now); return nr_kv; } @@ -473,9 +473,9 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, why = rxrpc_reqack_slow_start; else if (call->tx_winsize <= 2) why = rxrpc_reqack_small_txwin; - else if (call->peer->rtt_count < 3 && txb->seq & 1) + else if (call->rtt_count < 3) why = rxrpc_reqack_more_rtt; - else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real())) + else if (ktime_before(ktime_add_ms(call->rtt_last_req, 1000), ktime_get_real())) why = rxrpc_reqack_old_rtt; else if (!last && !after(READ_ONCE(call->send_top), txb->seq)) why = rxrpc_reqack_app_stall; @@ -487,7 +487,7 @@ static size_t rxrpc_prepare_data_subpacket(struct rxrpc_call *call, if (why != rxrpc_reqack_no_srv_last) { flags |= RXRPC_REQUEST_ACK; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, -1, serial); - call->peer->rtt_last_req = req->now; + call->rtt_last_req = req->now; } dont_set_request_ack: @@ -576,8 +576,8 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } /* Set timeouts */ - if (call->peer->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + if (call->rtt_count > 1) { + ktime_t delay = rxrpc_get_rto_backoff(call, false); call->ack_lost_at = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); @@ -590,7 +590,7 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } if (call->resend_at == KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call->peer, false); + ktime_t delay = rxrpc_get_rto_backoff(call, false); call->resend_at = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 27b34ed4d76a..e1c63129586b 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -235,12 +235,9 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); spin_lock_init(&peer->lock); - spin_lock_init(&peer->rtt_input_lock); seqcount_init(&peer->mtu_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); - - rxrpc_peer_init_rtt(peer); - + peer->recent_srtt_us = UINT_MAX; peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; trace_rxrpc_peer(peer->debug_id, 1, why); } @@ -283,8 +280,6 @@ static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, peer->max_data = peer->if_mtu - peer->hdrsize; rxrpc_assess_MTU_size(local, peer); - - peer->rtt_last_req = ktime_get_real(); } /* @@ -496,7 +491,7 @@ EXPORT_SYMBOL(rxrpc_kernel_get_call_peer); */ unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer) { - return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX; + return READ_ONCE(peer->recent_srtt_us); } EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 5f974ec13d69..d803562ca0ac 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -296,15 +296,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, - "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8u %8u\n", + "UDP %-47.47s %-47.47s %3u %4u %5u %6llus %8d %8d\n", lbuff, rbuff, refcount_read(&peer->ref), peer->cong_ssthresh, peer->max_data, now - peer->last_tx_at, - peer->srtt_us >> 3, - peer->rto_us); + READ_ONCE(peer->recent_srtt_us), + READ_ONCE(peer->recent_rto_us)); return 0; } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index aff75e168de8..7474f88d7b18 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -15,14 +15,14 @@ #define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ -static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +static u32 rxrpc_rto_min_us(struct rxrpc_call *call) { return 200; } -static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +static u32 __rxrpc_set_rto(const struct rxrpc_call *call) { - return (peer->srtt_us >> 3) + peer->rttvar_us; + return (call->srtt_us >> 3) + call->rttvar_us; } static u32 rxrpc_bound_rto(u32 rto) @@ -40,10 +40,10 @@ static u32 rxrpc_bound_rto(u32 rto) * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +static void rxrpc_rtt_estimator(struct rxrpc_call *call, long sample_rtt_us) { long m = sample_rtt_us; /* RTT */ - u32 srtt = peer->srtt_us; + u32 srtt = call->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev @@ -66,7 +66,7 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ - m -= (peer->mdev_us >> 2); /* similar update on mdev */ + m -= (call->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain @@ -78,31 +78,31 @@ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) if (m > 0) m >>= 3; } else { - m -= (peer->mdev_us >> 2); /* similar update on mdev */ + m -= (call->mdev_us >> 2); /* similar update on mdev */ } - peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ - if (peer->mdev_us > peer->mdev_max_us) { - peer->mdev_max_us = peer->mdev_us; - if (peer->mdev_max_us > peer->rttvar_us) - peer->rttvar_us = peer->mdev_max_us; + call->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (call->mdev_us > call->mdev_max_us) { + call->mdev_max_us = call->mdev_us; + if (call->mdev_max_us > call->rttvar_us) + call->rttvar_us = call->mdev_max_us; } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ - peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ - peer->rttvar_us = umax(peer->mdev_us, rxrpc_rto_min_us(peer)); - peer->mdev_max_us = peer->rttvar_us; + call->mdev_us = m << 1; /* make sure rto = 3*rtt */ + call->rttvar_us = umax(call->mdev_us, rxrpc_rto_min_us(call)); + call->mdev_max_us = call->rttvar_us; } - peer->srtt_us = umax(srtt, 1); + call->srtt_us = umax(srtt, 1); } /* * Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static void rxrpc_set_rto(struct rxrpc_peer *peer) +static void rxrpc_set_rto(struct rxrpc_call *call) { u32 rto; @@ -113,7 +113,7 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ - rto = __rxrpc_set_rto(peer); + rto = __rxrpc_set_rto(call); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, @@ -124,73 +124,73 @@ static void rxrpc_set_rto(struct rxrpc_peer *peer) /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo * guarantees that rto is higher. */ - peer->rto_us = rxrpc_bound_rto(rto); + call->rto_us = rxrpc_bound_rto(rto); } -static void rxrpc_update_rtt_min(struct rxrpc_peer *peer, ktime_t resp_time, long rtt_us) +static void rxrpc_update_rtt_min(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) { /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */ u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024; - minmax_running_min(&peer->min_rtt, wlen_us, resp_time / 1024, + minmax_running_min(&call->min_rtt, wlen_us, resp_time / 1024, (u32)rtt_us ? : jiffies_to_usecs(1)); } -static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, ktime_t resp_time, long rtt_us) +static void rxrpc_ack_update_rtt(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) { if (rtt_us < 0) return; /* Update RACK min RTT [RFC8985 6.1 Step 1]. */ - rxrpc_update_rtt_min(peer, resp_time, rtt_us); + rxrpc_update_rtt_min(call, resp_time, rtt_us); - rxrpc_rtt_estimator(peer, rtt_us); - rxrpc_set_rto(peer); + rxrpc_rtt_estimator(call, rtt_us); + rxrpc_set_rto(call); /* Only reset backoff on valid RTT measurement [RFC6298]. */ - peer->backoff = 0; + call->backoff = 0; } /* * Add RTT information to cache. This is called in softirq mode and has - * exclusive access to the peer RTT data. + * exclusive access to the call RTT data. */ -void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, +void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int rtt_slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, ktime_t send_time, ktime_t resp_time) { - struct rxrpc_peer *peer = call->peer; s64 rtt_us; rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); if (rtt_us < 0) return; - spin_lock(&peer->rtt_input_lock); - rxrpc_ack_update_rtt(peer, resp_time, rtt_us); - if (peer->rtt_count < 3) - peer->rtt_count++; - peer->rtt_taken++; - spin_unlock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(call, resp_time, rtt_us); + if (call->rtt_count < 3) + call->rtt_count++; + call->rtt_taken++; + + WRITE_ONCE(call->peer->recent_srtt_us, call->srtt_us / 8); + WRITE_ONCE(call->peer->recent_rto_us, call->rto_us); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, - rtt_us, peer->srtt_us, peer->rto_us); + rtt_us, call->srtt_us, call->rto_us); } /* * Get the retransmission timeout to set in nanoseconds, backing it off each * time we retransmit. */ -ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans) { u64 timo_us; - u32 backoff = READ_ONCE(peer->backoff); + u32 backoff = READ_ONCE(call->backoff); - timo_us = peer->rto_us; + timo_us = call->rto_us; timo_us <<= backoff; if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) - WRITE_ONCE(peer->backoff, backoff + 1); + WRITE_ONCE(call->backoff, backoff + 1); if (timo_us < 1) timo_us = 1; @@ -198,10 +198,11 @@ ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) return ns_to_ktime(timo_us * NSEC_PER_USEC); } -void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +void rxrpc_call_init_rtt(struct rxrpc_call *call) { - peer->rto_us = RXRPC_TIMEOUT_INIT; - peer->mdev_us = RXRPC_TIMEOUT_INIT; - peer->backoff = 0; - //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); + call->rtt_last_req = KTIME_MIN; + call->rto_us = RXRPC_TIMEOUT_INIT; + call->mdev_us = RXRPC_TIMEOUT_INIT; + call->backoff = 0; + //minmax_reset(&call->rtt_min, rxrpc_jiffies32, ~0U); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index df501a7c92fa..c4c8b718cafa 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -134,7 +134,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, rxrpc_seq_t tx_start, tx_win; signed long rtt, timeout; - rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = READ_ONCE(call->srtt_us) >> 3; rtt = usecs_to_jiffies(rtt) * 2; if (rtt < 2) rtt = 2; -- cgit v1.2.3 From 7c482665931b6ce7bc72fa5feae6c35567070296 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 4 Dec 2024 07:47:07 +0000 Subject: rxrpc: Implement RACK/TLP to deal with transmission stalls [RFC8985] When an rxrpc call is in its transmission phase and is sending a lot of packets, stalls occasionally occur that cause severe performance degradation (eg. increasing the transmission time for a 256MiB payload from 0.7s to 2.5s over a 10G link). rxrpc already implements TCP-style congestion control [RFC5681] and this helps mitigate the effects, but occasionally we're missing a time event that deals with a missing ACK, leading to a stall until the RTO expires. Fix this by implementing RACK/TLP in rxrpc. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Signed-off-by: Jakub Kicinski --- include/trace/events/rxrpc.h | 342 +++++++++++++++++++++++++++++++++-- net/rxrpc/Makefile | 1 + net/rxrpc/ar-internal.h | 107 ++++++++++- net/rxrpc/call_event.c | 247 +++++++++---------------- net/rxrpc/call_object.c | 3 +- net/rxrpc/input.c | 117 ++++++++---- net/rxrpc/input_rack.c | 418 +++++++++++++++++++++++++++++++++++++++++++ net/rxrpc/io_thread.c | 1 + net/rxrpc/output.c | 41 +++-- 9 files changed, 1041 insertions(+), 236 deletions(-) create mode 100644 net/rxrpc/input_rack.c (limited to 'include') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 71df5c48a413..2f119d18a061 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -305,7 +305,9 @@ #define rxrpc_txdata_traces \ EM(rxrpc_txdata_inject_loss, " *INJ-LOSS*") \ EM(rxrpc_txdata_new_data, " ") \ - E_(rxrpc_txdata_retransmit, " *RETRANS*") + EM(rxrpc_txdata_retransmit, " *RETRANS*") \ + EM(rxrpc_txdata_tlp_new_data, " *TLP-NEW*") \ + E_(rxrpc_txdata_tlp_retransmit, " *TLP-RETRANS*") #define rxrpc_receive_traces \ EM(rxrpc_receive_end, "END") \ @@ -353,11 +355,12 @@ EM(rxrpc_timer_trace_hard, "HardLimit") \ EM(rxrpc_timer_trace_idle, "IdleLimit") \ EM(rxrpc_timer_trace_keepalive, "KeepAlive") \ - EM(rxrpc_timer_trace_lost_ack, "LostAck ") \ EM(rxrpc_timer_trace_ping, "DelayPing") \ - EM(rxrpc_timer_trace_resend, "Resend ") \ - EM(rxrpc_timer_trace_resend_reset, "ResendRst") \ - E_(rxrpc_timer_trace_resend_tx, "ResendTx ") + EM(rxrpc_timer_trace_rack_off, "RACK-OFF ") \ + EM(rxrpc_timer_trace_rack_zwp, "RACK-ZWP ") \ + EM(rxrpc_timer_trace_rack_reo, "RACK-Reo ") \ + EM(rxrpc_timer_trace_rack_tlp_pto, "TLP-PTO ") \ + E_(rxrpc_timer_trace_rack_rto, "RTO ") #define rxrpc_propose_ack_traces \ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ @@ -478,9 +481,9 @@ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \ EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \ + EM(rxrpc_txbuf_see_lost, "SEE LOST ") \ EM(rxrpc_txbuf_see_out_of_step, "OUT-OF-STEP") \ - EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ - E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") + E_(rxrpc_txbuf_see_send_more, "SEE SEND+ ") #define rxrpc_tq_traces \ EM(rxrpc_tq_alloc, "ALLOC") \ @@ -505,6 +508,24 @@ EM(rxrpc_rotate_trace_sack, "soft-ack") \ E_(rxrpc_rotate_trace_snak, "soft-nack") +#define rxrpc_rack_timer_modes \ + EM(RXRPC_CALL_RACKTIMER_OFF, "---") \ + EM(RXRPC_CALL_RACKTIMER_RACK_REORDER, "REO") \ + EM(RXRPC_CALL_RACKTIMER_TLP_PTO, "TLP") \ + E_(RXRPC_CALL_RACKTIMER_RTO, "RTO") + +#define rxrpc_tlp_probe_traces \ + EM(rxrpc_tlp_probe_trace_busy, "busy") \ + EM(rxrpc_tlp_probe_trace_transmit_new, "transmit-new") \ + E_(rxrpc_tlp_probe_trace_retransmit, "retransmit") + +#define rxrpc_tlp_ack_traces \ + EM(rxrpc_tlp_ack_trace_acked, "acked") \ + EM(rxrpc_tlp_ack_trace_dup_acked, "dup-acked") \ + EM(rxrpc_tlp_ack_trace_hard_beyond, "hard-beyond") \ + EM(rxrpc_tlp_ack_trace_incomplete, "incomplete") \ + E_(rxrpc_tlp_ack_trace_new_data, "new-data") + /* * Generate enums for tracing information. */ @@ -537,6 +558,8 @@ enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); +enum rxrpc_tlp_ack_trace { rxrpc_tlp_ack_traces } __mode(byte); +enum rxrpc_tlp_probe_trace { rxrpc_tlp_probe_traces } __mode(byte); enum rxrpc_tq_trace { rxrpc_tq_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); enum rxrpc_txbuf_trace { rxrpc_txbuf_traces } __mode(byte); @@ -567,6 +590,7 @@ rxrpc_conn_traces; rxrpc_local_traces; rxrpc_pmtud_reduce_traces; rxrpc_propose_ack_traces; +rxrpc_rack_timer_modes; rxrpc_receive_traces; rxrpc_recvmsg_traces; rxrpc_req_ack_traces; @@ -576,6 +600,8 @@ rxrpc_rtt_tx_traces; rxrpc_sack_traces; rxrpc_skb_traces; rxrpc_timer_traces; +rxrpc_tlp_ack_traces; +rxrpc_tlp_probe_traces; rxrpc_tq_traces; rxrpc_tx_points; rxrpc_txbuf_traces; @@ -618,6 +644,20 @@ TRACE_EVENT(rxrpc_local, __entry->usage) ); +TRACE_EVENT(rxrpc_iothread_rx, + TP_PROTO(struct rxrpc_local *local, unsigned int nr_rx), + TP_ARGS(local, nr_rx), + TP_STRUCT__entry( + __field(unsigned int, local) + __field(unsigned int, nr_rx) + ), + TP_fast_assign( + __entry->local = local->debug_id; + __entry->nr_rx = nr_rx; + ), + TP_printk("L=%08x nrx=%u", __entry->local, __entry->nr_rx) + ); + TRACE_EVENT(rxrpc_peer, TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why), @@ -1684,16 +1724,15 @@ TRACE_EVENT(rxrpc_drop_ack, TRACE_EVENT(rxrpc_retransmit, TP_PROTO(struct rxrpc_call *call, struct rxrpc_send_data_req *req, - struct rxrpc_txbuf *txb, ktime_t expiry), + struct rxrpc_txbuf *txb), - TP_ARGS(call, req, txb, expiry), + TP_ARGS(call, req, txb), TP_STRUCT__entry( __field(unsigned int, call) __field(unsigned int, qbase) __field(rxrpc_seq_t, seq) __field(rxrpc_serial_t, serial) - __field(ktime_t, expiry) ), TP_fast_assign( @@ -1701,15 +1740,13 @@ TRACE_EVENT(rxrpc_retransmit, __entry->qbase = req->tq->qbase; __entry->seq = req->seq; __entry->serial = txb->serial; - __entry->expiry = expiry; ), - TP_printk("c=%08x tq=%x q=%x r=%x xp=%lld", + TP_printk("c=%08x tq=%x q=%x r=%x", __entry->call, __entry->qbase, __entry->seq, - __entry->serial, - ktime_to_us(__entry->expiry)) + __entry->serial) ); TRACE_EVENT(rxrpc_congest, @@ -1767,9 +1804,9 @@ TRACE_EVENT(rxrpc_congest, ); TRACE_EVENT(rxrpc_reset_cwnd, - TP_PROTO(struct rxrpc_call *call, ktime_t now), + TP_PROTO(struct rxrpc_call *call, ktime_t since_last_tx, ktime_t rtt), - TP_ARGS(call, now), + TP_ARGS(call, since_last_tx, rtt), TP_STRUCT__entry( __field(unsigned int, call) @@ -1779,6 +1816,7 @@ TRACE_EVENT(rxrpc_reset_cwnd, __field(rxrpc_seq_t, hard_ack) __field(rxrpc_seq_t, prepared) __field(ktime_t, since_last_tx) + __field(ktime_t, rtt) __field(bool, has_data) ), @@ -1789,18 +1827,20 @@ TRACE_EVENT(rxrpc_reset_cwnd, __entry->extra = call->cong_extra; __entry->hard_ack = call->acks_hard_ack; __entry->prepared = call->send_top - call->tx_bottom; - __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); + __entry->since_last_tx = since_last_tx; + __entry->rtt = rtt; __entry->has_data = call->tx_bottom != call->tx_top; ), - TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", + TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu/%llu d=%u", __entry->call, __entry->hard_ack, __print_symbolic(__entry->ca_state, rxrpc_ca_states), __entry->cwnd, __entry->extra, __entry->prepared, - ktime_to_ns(__entry->since_last_tx), + ktime_to_us(__entry->since_last_tx), + ktime_to_us(__entry->rtt), __entry->has_data) ); @@ -1925,6 +1965,32 @@ TRACE_EVENT(rxrpc_resend, __entry->transmitted) ); +TRACE_EVENT(rxrpc_resend_lost, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, unsigned long lost), + + TP_ARGS(call, tq, lost), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(u8, nr_rep) + __field(unsigned long, lost) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->nr_rep = tq->nr_reported_acks; + __entry->lost = lost; + ), + + TP_printk("c=%08x tq=%x lost=%016lx nr=%u", + __entry->call, + __entry->qbase, + __entry->lost, + __entry->nr_rep) + ); + TRACE_EVENT(rxrpc_rotate, TP_PROTO(struct rxrpc_call *call, struct rxrpc_txqueue *tq, struct rxrpc_ack_summary *summary, rxrpc_seq_t seq, @@ -2363,6 +2429,244 @@ TRACE_EVENT(rxrpc_pmtud_reduce, __entry->serial, __entry->max_data) ); +TRACE_EVENT(rxrpc_rack, + TP_PROTO(struct rxrpc_call *call, ktime_t timo), + + TP_ARGS(call, timo), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, ack_serial) + __field(rxrpc_seq_t, seq) + __field(enum rxrpc_rack_timer_mode, mode) + __field(unsigned short, nr_sent) + __field(unsigned short, nr_lost) + __field(unsigned short, nr_resent) + __field(unsigned short, nr_sacked) + __field(ktime_t, timo) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ack_serial = call->rx_serial; + __entry->seq = call->rack_end_seq; + __entry->mode = call->rack_timer_mode; + __entry->nr_sent = call->tx_nr_sent; + __entry->nr_lost = call->tx_nr_lost; + __entry->nr_resent = call->tx_nr_resent; + __entry->nr_sacked = call->acks_nr_sacks; + __entry->timo = timo; + ), + + TP_printk("c=%08x r=%08x q=%08x %s slrs=%u,%u,%u,%u t=%lld", + __entry->call, __entry->ack_serial, __entry->seq, + __print_symbolic(__entry->mode, rxrpc_rack_timer_modes), + __entry->nr_sent, __entry->nr_lost, + __entry->nr_resent, __entry->nr_sacked, + ktime_to_us(__entry->timo)) + ); + +TRACE_EVENT(rxrpc_rack_update, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary), + + TP_ARGS(call, summary), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, ack_serial) + __field(rxrpc_seq_t, seq) + __field(int, xmit_ts) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ack_serial = call->rx_serial; + __entry->seq = call->rack_end_seq; + __entry->xmit_ts = ktime_sub(call->acks_latest_ts, call->rack_xmit_ts); + ), + + TP_printk("c=%08x r=%08x q=%08x xt=%lld", + __entry->call, __entry->ack_serial, __entry->seq, + ktime_to_us(__entry->xmit_ts)) + ); + +TRACE_EVENT(rxrpc_rack_scan_loss, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(ktime_t, rack_rtt) + __field(ktime_t, rack_reo_wnd) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->rack_rtt = call->rack_rtt; + __entry->rack_reo_wnd = call->rack_reo_wnd; + ), + + TP_printk("c=%08x rtt=%lld reow=%lld", + __entry->call, ktime_to_us(__entry->rack_rtt), + ktime_to_us(__entry->rack_reo_wnd)) + ); + +TRACE_EVENT(rxrpc_rack_scan_loss_tq, + TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq, + unsigned long nacks), + + TP_ARGS(call, tq, nacks), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(unsigned long, nacks) + __field(unsigned long, lost) + __field(unsigned long, retrans) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->nacks = nacks; + __entry->lost = tq->segment_lost; + __entry->retrans = tq->segment_retransmitted; + ), + + TP_printk("c=%08x q=%08x n=%lx l=%lx r=%lx", + __entry->call, __entry->qbase, + __entry->nacks, __entry->lost, __entry->retrans) + ); + +TRACE_EVENT(rxrpc_rack_detect_loss, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + rxrpc_seq_t seq), + + TP_ARGS(call, summary, seq), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, ack_serial) + __field(rxrpc_seq_t, seq) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->ack_serial = call->rx_serial; + __entry->seq = seq; + ), + + TP_printk("c=%08x r=%08x q=%08x", + __entry->call, __entry->ack_serial, __entry->seq) + ); + +TRACE_EVENT(rxrpc_rack_mark_loss_tq, + TP_PROTO(struct rxrpc_call *call, const struct rxrpc_txqueue *tq), + + TP_ARGS(call, tq), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_seq_t, qbase) + __field(rxrpc_seq_t, trans) + __field(unsigned long, acked) + __field(unsigned long, lost) + __field(unsigned long, retrans) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->qbase = tq->qbase; + __entry->trans = call->tx_transmitted; + __entry->acked = tq->segment_acked; + __entry->lost = tq->segment_lost; + __entry->retrans = tq->segment_retransmitted; + ), + + TP_printk("c=%08x tq=%08x txq=%08x a=%lx l=%lx r=%lx", + __entry->call, __entry->qbase, __entry->trans, + __entry->acked, __entry->lost, __entry->retrans) + ); + +TRACE_EVENT(rxrpc_tlp_probe, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_tlp_probe_trace trace), + + TP_ARGS(call, trace), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, serial) + __field(rxrpc_seq_t, seq) + __field(enum rxrpc_tlp_probe_trace, trace) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->serial = call->tlp_serial; + __entry->seq = call->tlp_seq; + __entry->trace = trace; + ), + + TP_printk("c=%08x r=%08x pq=%08x %s", + __entry->call, __entry->serial, __entry->seq, + __print_symbolic(__entry->trace, rxrpc_tlp_probe_traces)) + ); + +TRACE_EVENT(rxrpc_tlp_ack, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + enum rxrpc_tlp_ack_trace trace), + + TP_ARGS(call, summary, trace), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(rxrpc_serial_t, serial) + __field(rxrpc_seq_t, tlp_seq) + __field(rxrpc_seq_t, hard_ack) + __field(enum rxrpc_tlp_ack_trace, trace) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->serial = call->tlp_serial; + __entry->tlp_seq = call->tlp_seq; + __entry->hard_ack = call->acks_hard_ack; + __entry->trace = trace; + ), + + TP_printk("c=%08x r=%08x pq=%08x hq=%08x %s", + __entry->call, __entry->serial, + __entry->tlp_seq, __entry->hard_ack, + __print_symbolic(__entry->trace, rxrpc_tlp_ack_traces)) + ); + +TRACE_EVENT(rxrpc_rack_timer, + TP_PROTO(struct rxrpc_call *call, ktime_t delay, bool exp), + + TP_ARGS(call, delay, exp), + + TP_STRUCT__entry( + __field(unsigned int, call) + __field(bool, exp) + __field(enum rxrpc_rack_timer_mode, mode) + __field(ktime_t, delay) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->exp = exp; + __entry->mode = call->rack_timer_mode; + __entry->delay = delay; + ), + + TP_printk("c=%08x %s %s to=%lld", + __entry->call, + __entry->exp ? "Exp" : "Set", + __print_symbolic(__entry->mode, rxrpc_rack_timer_modes), + ktime_to_us(__entry->delay)) + ); + #undef EM #undef E_ diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index ac5caf5a48e1..210b75e3179e 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -16,6 +16,7 @@ rxrpc-y := \ conn_object.o \ conn_service.o \ input.o \ + input_rack.o \ insecure.o \ io_thread.o \ key.o \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index a9d732ba6df0..0c0a3c89dba3 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -621,6 +621,18 @@ enum rxrpc_ca_state { NR__RXRPC_CA_STATES } __mode(byte); +/* + * Current purpose of call RACK timer. According to the RACK-TLP protocol + * [RFC8985], the transmission timer (call->rack_timo_at) may only be used for + * one of these at once. + */ +enum rxrpc_rack_timer_mode { + RXRPC_CALL_RACKTIMER_OFF, /* Timer not running */ + RXRPC_CALL_RACKTIMER_RACK_REORDER, /* RACK reordering timer */ + RXRPC_CALL_RACKTIMER_TLP_PTO, /* TLP timeout */ + RXRPC_CALL_RACKTIMER_RTO, /* Retransmission timeout */ +} __mode(byte); + /* * RxRPC call definition * - matched by { connection, call_id } @@ -638,8 +650,7 @@ struct rxrpc_call { struct mutex user_mutex; /* User access mutex */ struct sockaddr_rxrpc dest_srx; /* Destination address */ ktime_t delay_ack_at; /* When DELAY ACK needs to happen */ - ktime_t ack_lost_at; /* When ACK is figured as lost */ - ktime_t resend_at; /* When next resend needs to happen */ + ktime_t rack_timo_at; /* When ACK is figured as lost */ ktime_t ping_at; /* When next to send a ping */ ktime_t keepalive_at; /* When next to send a keepalive ping */ ktime_t expect_rx_by; /* When we expect to get a packet by */ @@ -695,8 +706,12 @@ struct rxrpc_call { rxrpc_seq_t tx_bottom; /* First packet in buffer */ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + rxrpc_serial_t tx_last_serial; /* Serial of last DATA transmitted */ u16 tx_backoff; /* Delay to insert due to Tx failure (ms) */ - u8 tx_winsize; /* Maximum size of Tx window */ + u16 tx_nr_sent; /* Number of packets sent, but unacked */ + u16 tx_nr_lost; /* Number of packets marked lost */ + u16 tx_nr_resent; /* Number of packets resent, but unacked */ + u16 tx_winsize; /* Maximum size of Tx window */ #define RXRPC_TX_MAX_WINDOW 128 u8 tx_jumbo_max; /* Maximum subpkts peer will accept */ ktime_t tx_last_sent; /* Last time a transmission occurred */ @@ -725,6 +740,25 @@ struct rxrpc_call { u16 cong_cumul_acks; /* Cumulative ACK count */ ktime_t cong_tstamp; /* Last time cwnd was changed */ + /* RACK-TLP [RFC8985] state. */ + ktime_t rack_xmit_ts; /* Latest transmission timestamp */ + ktime_t rack_rtt; /* RTT of most recently ACK'd segment */ + ktime_t rack_rtt_ts; /* Timestamp of rack_rtt */ + ktime_t rack_reo_wnd; /* Reordering window */ + unsigned int rack_reo_wnd_mult; /* Multiplier applied to rack_reo_wnd */ + int rack_reo_wnd_persist; /* Num loss recoveries before reset reo_wnd */ + rxrpc_seq_t rack_fack; /* Highest sequence so far ACK'd */ + rxrpc_seq_t rack_end_seq; /* Highest sequence seen */ + rxrpc_seq_t rack_dsack_round; /* DSACK opt recv'd in latest roundtrip */ + bool rack_dsack_round_none; /* T if dsack_round is "None" */ + bool rack_reordering_seen; /* T if detected reordering event */ + enum rxrpc_rack_timer_mode rack_timer_mode; /* Current mode of RACK timer */ + bool tlp_is_retrans; /* T if unacked TLP retransmission */ + rxrpc_serial_t tlp_serial; /* Serial of TLP probe (or 0 if none in progress) */ + rxrpc_seq_t tlp_seq; /* Sequence of TLP probe */ + unsigned int tlp_rtt_taken; /* Last time RTT taken */ + ktime_t tlp_max_ack_delay; /* Sender budget for max delayed ACK interval */ + /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ u16 ackr_sack_base; /* Starting slot in SACK table ring */ @@ -783,6 +817,9 @@ struct rxrpc_ack_summary { bool retrans_timeo:1; /* T if reTx due to timeout happened */ bool need_retransmit:1; /* T if we need transmission */ bool rtt_sample_avail:1; /* T if RTT sample available */ + bool in_fast_or_rto_recovery:1; + bool exiting_fast_or_rto_recovery:1; + bool tlp_probe_acked:1; /* T if the TLP probe seq was acked */ u8 /*enum rxrpc_congest_change*/ change; }; @@ -864,6 +901,7 @@ struct rxrpc_txqueue { unsigned long segment_lost; /* Bit-per-buf: Set if declared lost */ unsigned long segment_retransmitted; /* Bit-per-buf: Set if retransmitted */ unsigned long rtt_samples; /* Bit-per-buf: Set if available for RTT */ + unsigned long ever_retransmitted; /* Bit-per-buf: Set if ever retransmitted */ /* The arrays we want to pack into as few cache lines as possible. */ struct { @@ -883,7 +921,9 @@ struct rxrpc_send_data_req { struct rxrpc_txqueue *tq; /* Tx queue segment holding first DATA */ rxrpc_seq_t seq; /* Sequence of first data */ int n; /* Number of DATA packets to glue into jumbo */ + bool retrans; /* T if this is a retransmission */ bool did_send; /* T if did actually send */ + bool tlp_probe; /* T if this is a TLP probe */ int /* enum rxrpc_txdata_trace */ trace; }; @@ -943,8 +983,9 @@ void rxrpc_propose_ping(struct rxrpc_call *call, u32 serial, enum rxrpc_propose_ack_trace why); void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); -void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response); - +void rxrpc_resend_tlp(struct rxrpc_call *call); +void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit, + enum rxrpc_txdata_trace trace); bool rxrpc_input_call_event(struct rxrpc_call *call); /* @@ -1123,6 +1164,32 @@ void rxrpc_congestion_degrade(struct rxrpc_call *); void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *); void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); +/* + * input_rack.c + */ +void rxrpc_input_rack_one(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix); +void rxrpc_input_rack(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long new_acks); +void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary); +ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now); +void rxrpc_tlp_send_probe(struct rxrpc_call *call); +void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary); +void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by); + +/* Initialise TLP state [RFC8958 7.1]. */ +static inline void rxrpc_tlp_init(struct rxrpc_call *call) +{ + call->tlp_serial = 0; + call->tlp_seq = call->acks_hard_ack; + call->tlp_is_retrans = false; +} + /* * io_thread.c */ @@ -1402,6 +1469,11 @@ static inline u32 latest(u32 seq1, u32 seq2) return after(seq1, seq2) ? seq1 : seq2; } +static inline bool rxrpc_seq_in_txq(const struct rxrpc_txqueue *tq, rxrpc_seq_t seq) +{ + return (seq & (RXRPC_NR_TXQUEUE - 1)) == tq->qbase; +} + static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { rxrpc_get_skb(skb, rxrpc_skb_get_call_rx); @@ -1409,6 +1481,31 @@ static inline void rxrpc_queue_rx_call_packet(struct rxrpc_call *call, struct sk rxrpc_poke_call(call, rxrpc_call_poke_rx_packet); } +/* + * Calculate how much space there is for transmitting more DATA packets. + */ +static inline unsigned int rxrpc_tx_window_space(const struct rxrpc_call *call) +{ + int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); + int transmitted = call->tx_top - call->tx_bottom; + + return max(winsize - transmitted, 0); +} + +static inline unsigned int rxrpc_left_out(const struct rxrpc_call *call) +{ + return call->acks_nr_sacks + call->tx_nr_lost; +} + +/* + * Calculate the number of transmitted DATA packets assumed to be in flight + * [approx RFC6675]. + */ +static inline unsigned int rxrpc_tx_in_flight(const struct rxrpc_call *call) +{ + return call->tx_nr_sent - rxrpc_left_out(call) + call->tx_nr_resent; +} + /* * debug tracing */ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7af275544251..8e477f7f8850 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -54,35 +54,21 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_delayed_ack); } -/* - * Handle congestion being detected by the retransmit timeout. - */ -static void rxrpc_congestion_timeout(struct rxrpc_call *call) -{ - set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); -} - /* * Retransmit one or more packets. */ static bool rxrpc_retransmit_data(struct rxrpc_call *call, - struct rxrpc_send_data_req *req, - ktime_t rto, bool skip_too_young) + struct rxrpc_send_data_req *req) { struct rxrpc_txqueue *tq = req->tq; unsigned int ix = req->seq & RXRPC_TXQ_MASK; struct rxrpc_txbuf *txb = tq->bufs[ix]; - ktime_t xmit_ts, resend_at; _enter("%x,%x,%x,%x", tq->qbase, req->seq, ix, txb->debug_id); - xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); - resend_at = ktime_add(xmit_ts, rto); - trace_rxrpc_retransmit(call, req, txb, ktime_sub(resend_at, req->now)); - if (skip_too_young && ktime_after(resend_at, req->now)) - return false; + req->retrans = true; + trace_rxrpc_retransmit(call, req, txb); - __set_bit(ix, &tq->segment_retransmitted); txb->flags |= RXRPC_TXBUF_RESENT; rxrpc_send_data_packet(call, req); rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); @@ -97,133 +83,76 @@ static bool rxrpc_retransmit_data(struct rxrpc_call *call, /* * Perform retransmission of NAK'd and unack'd packets. */ -void rxrpc_resend(struct rxrpc_call *call, rxrpc_serial_t ack_serial, bool ping_response) +static void rxrpc_resend(struct rxrpc_call *call) { struct rxrpc_send_data_req req = { .now = ktime_get_real(), .trace = rxrpc_txdata_retransmit, }; - struct rxrpc_txqueue *tq = call->tx_queue; - ktime_t lowest_xmit_ts = KTIME_MAX; - ktime_t rto = rxrpc_get_rto_backoff(call, false); - bool unacked = false; + struct rxrpc_txqueue *tq; _enter("{%d,%d}", call->tx_bottom, call->tx_top); - if (call->tx_bottom == call->tx_top) { - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); - return; - } + trace_rxrpc_resend(call, call->acks_highest_serial); - trace_rxrpc_resend(call, ack_serial); - - /* Scan the transmission queue, looking for explicitly NAK'd packets. */ - do { - unsigned long naks = ~tq->segment_acked; - rxrpc_seq_t tq_top = tq->qbase + RXRPC_NR_TXQUEUE - 1; + /* Scan the transmission queue, looking for lost packets. */ + for (tq = call->tx_queue; tq; tq = tq->next) { + unsigned long lost = tq->segment_lost; if (after(tq->qbase, call->tx_transmitted)) break; - if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE) - naks &= (1UL << tq->nr_reported_acks) - 1; - _debug("retr %16lx %u c=%08x [%x]", tq->segment_acked, tq->nr_reported_acks, call->debug_id, tq->qbase); - _debug("nack %16lx", naks); + _debug("lost %16lx", lost); - while (naks) { - unsigned int ix = __ffs(naks); + trace_rxrpc_resend_lost(call, tq, lost); + while (lost) { + unsigned int ix = __ffs(lost); struct rxrpc_txbuf *txb = tq->bufs[ix]; - __clear_bit(ix, &naks); - if (after(txb->serial, call->acks_highest_serial)) - continue; /* Ack point not yet reached */ - - rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); + __clear_bit(ix, &lost); + rxrpc_see_txbuf(txb, rxrpc_txbuf_see_lost); req.tq = tq; req.seq = tq->qbase + ix; req.n = 1; - rxrpc_retransmit_data(call, &req, rto, false); - } - - /* Anything after the soft-ACK table up to and including - * ack.previousPacket will get ACK'd or NACK'd in due course, - * so don't worry about those here. We do, however, need to - * consider retransmitting anything beyond that point. - */ - if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE && - after(tq_top, call->acks_prev_seq)) { - rxrpc_seq_t start = latest(call->acks_prev_seq, - tq->qbase + tq->nr_reported_acks); - rxrpc_seq_t stop = earliest(tq_top, call->tx_transmitted); - - _debug("unrep %x-%x", start, stop); - for (rxrpc_seq_t seq = start; before_eq(seq, stop); seq++) { - rxrpc_serial_t serial = tq->segment_serial[seq & RXRPC_TXQ_MASK]; - - if (ping_response && - before(serial, call->acks_highest_serial)) - break; /* Wasn't accounted for by a more recent ping. */ - req.tq = tq; - req.seq = seq; - req.n = 1; - if (rxrpc_retransmit_data(call, &req, rto, true)) - unacked = true; - } + rxrpc_retransmit_data(call, &req); } - - /* Work out the next retransmission timeout. */ - if (ktime_before(tq->xmit_ts_base, lowest_xmit_ts)) { - unsigned int lowest_us = UINT_MAX; - - for (int i = 0; i < RXRPC_NR_TXQUEUE; i++) - if (!test_bit(i, &tq->segment_acked) && - tq->segment_xmit_ts[i] < lowest_us) - lowest_us = tq->segment_xmit_ts[i]; - _debug("lowest[%x] %llx %u", tq->qbase, tq->xmit_ts_base, lowest_us); - - if (lowest_us != UINT_MAX) { - ktime_t lowest_ns = ktime_add_us(tq->xmit_ts_base, lowest_us); - - if (ktime_before(lowest_ns, lowest_xmit_ts)) - lowest_xmit_ts = lowest_ns; - } - } - } while ((tq = tq->next)); - - if (lowest_xmit_ts < KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call, req.did_send); - ktime_t resend_at = ktime_add(lowest_xmit_ts, delay); - - _debug("delay %llu %lld", delay, ktime_sub(resend_at, req.now)); - call->resend_at = resend_at; - trace_rxrpc_timer_set(call, ktime_sub(resend_at, req.now), - rxrpc_timer_trace_resend_reset); - } else { - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); } - if (unacked) - rxrpc_congestion_timeout(call); + rxrpc_get_rto_backoff(call, req.did_send); + _leave(""); +} - /* If there was nothing that needed retransmission then it's likely - * that an ACK got lost somewhere. Send a ping to find out instead of - * retransmitting data. - */ - if (!req.did_send) { - ktime_t next_ping = ktime_add_us(call->acks_latest_ts, - call->srtt_us >> 3); +/* + * Resend the highest-seq DATA packet so far transmitted for RACK-TLP [RFC8985 7.3]. + */ +void rxrpc_resend_tlp(struct rxrpc_call *call) +{ + struct rxrpc_send_data_req req = { + .now = ktime_get_real(), + .seq = call->tx_transmitted, + .n = 1, + .tlp_probe = true, + .trace = rxrpc_txdata_tlp_retransmit, + }; - if (ktime_sub(next_ping, req.now) <= 0) - rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, - rxrpc_propose_ack_ping_for_0_retrans); + /* There's a chance it'll be on the tail segment of the queue. */ + req.tq = READ_ONCE(call->tx_qtail); + if (req.tq && + before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) { + rxrpc_retransmit_data(call, &req); + return; } - _leave(""); + for (req.tq = call->tx_queue; req.tq; req.tq = req.tq->next) { + if (after_eq(call->tx_transmitted, req.tq->qbase) && + before(call->tx_transmitted, req.tq->qbase + RXRPC_NR_TXQUEUE)) { + rxrpc_retransmit_data(call, &req); + return; + } + } } /* @@ -259,18 +188,10 @@ static void rxrpc_close_tx_phase(struct rxrpc_call *call) } } -static unsigned int rxrpc_tx_window_space(struct rxrpc_call *call) -{ - int winsize = umin(call->tx_winsize, call->cong_cwnd + call->cong_extra); - int in_flight = call->tx_top - call->tx_bottom; - - return max(winsize - in_flight, 0); -} - /* - * Transmit some as-yet untransmitted data. + * Transmit some as-yet untransmitted data, to a maximum of the supplied limit. */ -static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, +static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, unsigned int limit, enum rxrpc_txdata_trace trace) { int space = rxrpc_tx_window_space(call); @@ -335,8 +256,8 @@ static void rxrpc_transmit_fresh_data(struct rxrpc_call *call, } } -static void rxrpc_transmit_some_data(struct rxrpc_call *call, - enum rxrpc_txdata_trace trace) +void rxrpc_transmit_some_data(struct rxrpc_call *call, unsigned int limit, + enum rxrpc_txdata_trace trace) { switch (__rxrpc_call_state(call)) { case RXRPC_CALL_SERVER_ACK_REQUEST: @@ -353,7 +274,7 @@ static void rxrpc_transmit_some_data(struct rxrpc_call *call, rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); return; } - rxrpc_transmit_fresh_data(call, trace); + rxrpc_transmit_fresh_data(call, limit, trace); break; default: return; @@ -380,7 +301,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) { struct sk_buff *skb; ktime_t now, t; - bool resend = false, did_receive = false, saw_ack = false; + bool did_receive = false, saw_ack = false; s32 abort_code; rxrpc_see_call(call, rxrpc_call_see_input); @@ -398,21 +319,33 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) goto out; } - while ((skb = __skb_dequeue(&call->rx_queue))) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + do { + skb = __skb_dequeue(&call->rx_queue); + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (__rxrpc_call_is_complete(call) || + skb->mark == RXRPC_SKB_MARK_ERROR) { + rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); + goto out; + } + + saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; - if (__rxrpc_call_is_complete(call) || - skb->mark == RXRPC_SKB_MARK_ERROR) { + rxrpc_input_call_packet(call, skb); rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); - goto out; + did_receive = true; } - saw_ack |= sp->hdr.type == RXRPC_PACKET_TYPE_ACK; + t = ktime_sub(call->rack_timo_at, ktime_get_real()); + if (t <= 0) { + trace_rxrpc_timer_exp(call, t, + rxrpc_timer_trace_rack_off + call->rack_timer_mode); + call->rack_timo_at = KTIME_MAX; + rxrpc_rack_timer_expired(call, t); + } - rxrpc_input_call_packet(call, skb); - rxrpc_free_skb(skb, rxrpc_skb_put_call_rx); - did_receive = true; - } + } while (!skb_queue_empty(&call->rx_queue)); /* If we see our async-event poke, check for timeout trippage. */ now = ktime_get_real(); @@ -445,13 +378,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_delayed_ack); } - t = ktime_sub(call->ack_lost_at, now); - if (t <= 0) { - trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_lost_ack); - call->ack_lost_at = KTIME_MAX; - set_bit(RXRPC_CALL_EV_ACK_LOST, &call->events); - } - t = ktime_sub(call->ping_at, now); if (t <= 0) { trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_ping); @@ -460,15 +386,6 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_ping_for_keepalive); } - t = ktime_sub(call->resend_at, now); - if (t <= 0) { - trace_rxrpc_timer_exp(call, t, rxrpc_timer_trace_resend); - call->resend_at = KTIME_MAX; - resend = true; - } - - rxrpc_transmit_some_data(call, rxrpc_txdata_new_data); - now = ktime_get_real(); t = ktime_sub(call->keepalive_at, now); if (t <= 0) { @@ -478,21 +395,30 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) rxrpc_propose_ack_ping_for_keepalive); } + if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) + rxrpc_send_initial_ping(call); + + rxrpc_transmit_some_data(call, UINT_MAX, rxrpc_txdata_new_data); + if (saw_ack) rxrpc_congestion_degrade(call); - if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) - rxrpc_send_initial_ping(call); + if (did_receive && + (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_SEND_REQUEST || + __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SEND_REPLY)) { + t = ktime_sub(call->rack_timo_at, ktime_get_real()); + trace_rxrpc_rack(call, t); + } /* Process events */ if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - if (resend && + if (call->tx_nr_lost > 0 && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY && !test_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags)) - rxrpc_resend(call, 0, false); + rxrpc_resend(call); if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, @@ -520,8 +446,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call) set(call->expect_req_by); set(call->expect_rx_by); set(call->delay_ack_at); - set(call->ack_lost_at); - set(call->resend_at); + set(call->rack_timo_at); set(call->keepalive_at); set(call->ping_at); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index fb4ee0d2e9e1..5a543c3f6fb0 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -160,8 +160,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->ackr_window = 1; call->ackr_wtop = 1; call->delay_ack_at = KTIME_MAX; - call->ack_lost_at = KTIME_MAX; - call->resend_at = KTIME_MAX; + call->rack_timo_at = KTIME_MAX; call->ping_at = KTIME_MAX; call->keepalive_at = KTIME_MAX; call->expect_rx_by = KTIME_MAX; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9f308bd512e9..4974b5accafa 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -27,13 +27,13 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, } /* - * Do TCP-style congestion management [RFC 5681]. + * Do TCP-style congestion management [RFC5681]. */ static void rxrpc_congestion_management(struct rxrpc_call *call, struct rxrpc_ack_summary *summary) { summary->change = rxrpc_cong_no_change; - summary->in_flight = (call->tx_top - call->tx_bottom) - call->acks_nr_sacks; + summary->in_flight = rxrpc_tx_in_flight(call); if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; @@ -106,9 +106,12 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, call->cong_extra = 0; call->cong_dup_acks = 0; summary->need_retransmit = true; + summary->in_fast_or_rto_recovery = true; goto out; case RXRPC_CA_FAST_RETRANSMIT: + rxrpc_tlp_init(call); + summary->in_fast_or_rto_recovery = true; if (!summary->new_low_snack) { if (summary->nr_new_sacks == 0) call->cong_cwnd += 1; @@ -121,8 +124,10 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } else { summary->change = rxrpc_cong_progress; call->cong_cwnd = call->cong_ssthresh; - if (call->acks_nr_snacks == 0) + if (call->acks_nr_snacks == 0) { + summary->exiting_fast_or_rto_recovery = true; goto resume_normality; + } } goto out; @@ -171,7 +176,7 @@ send_extra_data: */ void rxrpc_congestion_degrade(struct rxrpc_call *call) { - ktime_t rtt, now; + ktime_t rtt, now, time_since; if (call->cong_ca_state != RXRPC_CA_SLOW_START && call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE) @@ -181,10 +186,11 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8)); now = ktime_get_real(); - if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) + time_since = ktime_sub(now, call->tx_last_sent); + if (ktime_before(time_since, rtt)) return; - trace_rxrpc_reset_cwnd(call, now); + trace_rxrpc_reset_cwnd(call, time_since, rtt); rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); call->tx_last_sent = now; call->cong_ca_state = RXRPC_CA_SLOW_START; @@ -200,11 +206,11 @@ static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, struct rxrpc_txqueue *tq, int ix) { + ktime_t xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); + rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, summary->acked_serial, summary->ack_serial, - ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]), - call->acks_latest_ts); - summary->rtt_sample_avail = false; + xmit_ts, call->acks_latest_ts); __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */ } @@ -216,7 +222,7 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, { struct rxrpc_txqueue *tq = call->tx_queue; rxrpc_seq_t seq = call->tx_bottom + 1; - bool rot_last = false; + bool rot_last = false, trace = false; _enter("%x,%x", call->tx_bottom, to); @@ -250,14 +256,16 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rot_last = true; } - if (summary->rtt_sample_avail && - summary->acked_serial == tq->segment_serial[ix] && + if (summary->acked_serial == tq->segment_serial[ix] && test_bit(ix, &tq->rtt_samples)) rxrpc_add_data_rtt_sample(call, summary, tq, ix); if (ix == tq->nr_reported_acks) { /* Packet directly hard ACK'd. */ tq->nr_reported_acks++; + rxrpc_input_rack_one(call, summary, tq, ix); + if (seq == call->tlp_seq) + summary->tlp_probe_acked = true; summary->nr_new_hacks++; __set_bit(ix, &tq->segment_acked); trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack); @@ -268,11 +276,21 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, } else { /* Soft NAK -> hard ACK. */ call->acks_nr_snacks--; + rxrpc_input_rack_one(call, summary, tq, ix); + if (seq == call->tlp_seq) + summary->tlp_probe_acked = true; summary->nr_new_hacks++; __set_bit(ix, &tq->segment_acked); trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak); } + call->tx_nr_sent--; + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + if (__test_and_clear_bit(ix, &tq->segment_retransmitted)) + call->tx_nr_resent--; + __clear_bit(ix, &tq->ever_retransmitted); + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); tq->bufs[ix] = NULL; @@ -282,7 +300,10 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, rxrpc_txqueue_rotate)); seq++; + trace = true; if (!(seq & RXRPC_TXQ_MASK)) { + trace_rxrpc_rack_update(call, summary); + trace = false; prefetch(tq->next); if (tq != call->tx_qtail) { call->tx_qbase += RXRPC_NR_TXQUEUE; @@ -299,6 +320,9 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, } while (before_eq(seq, to)); + if (trace) + trace_rxrpc_rack_update(call, summary); + if (rot_last) { set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); if (tq) { @@ -325,8 +349,10 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, { ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF; + call->rack_timo_at = KTIME_MAX; + trace_rxrpc_rack_timer(call, 0, false); + trace_rxrpc_timer_can(call, rxrpc_timer_trace_rack_off + call->rack_timer_mode); switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: @@ -842,10 +868,13 @@ static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, rxrpc_seq_t seq, rxrpc_seq_t *lowest_nak) { - unsigned long old_reported, flipped, new_acks, a_to_n, n_to_a; + unsigned long old_reported = 0, flipped, new_acks = 0; + unsigned long a_to_n, n_to_a = 0; int new, a, n; - old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + if (tq->nr_reported_acks > 0) + old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + _enter("{%x,%lx,%d},%lx,%d,%x", tq->qbase, tq->segment_acked, tq->nr_reported_acks, extracted_acks, nr_reported, seq); @@ -898,6 +927,18 @@ static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, if (before(lowest, *lowest_nak)) *lowest_nak = lowest; } + + if (summary->acked_serial) + rxrpc_input_soft_rtt(call, summary, tq); + + new_acks |= n_to_a; + if (new_acks) + rxrpc_input_rack(call, summary, tq, new_acks); + + if (call->tlp_serial && + rxrpc_seq_in_txq(tq, call->tlp_seq) && + test_bit(call->tlp_seq - tq->qbase, &new_acks)) + summary->tlp_probe_acked = true; } /* @@ -940,8 +981,6 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, _debug("bound %16lx %u", extracted, nr); - if (summary->rtt_sample_avail) - rxrpc_input_soft_rtt(call, summary, tq); rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, seq - RXRPC_NR_TXQUEUE, &lowest_nak); extracted = ~0UL; @@ -1063,7 +1102,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) /* Discard any out-of-order or duplicate ACKs (outside lock). */ if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) { trace_rxrpc_rx_discard_ack(call, summary.ack_serial, hard_ack, prev_pkt); - goto send_response; + goto send_response; /* Still respond if requested. */ } trailer.maxMTU = 0; @@ -1079,14 +1118,19 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; - switch (summary.ack_reason) { - case RXRPC_ACK_PING: - break; - default: - if (summary.acked_serial && - after(summary.acked_serial, call->acks_highest_serial)) - call->acks_highest_serial = summary.acked_serial; - break; + if (summary.acked_serial) { + switch (summary.ack_reason) { + case RXRPC_ACK_PING_RESPONSE: + rxrpc_complete_rtt_probe(call, call->acks_latest_ts, + summary.acked_serial, summary.ack_serial, + rxrpc_rtt_rx_ping_response); + break; + default: + if (after(summary.acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = summary.acked_serial; + summary.rtt_sample_avail = true; + break; + } } /* Parse rwind and mtu sizes if provided. */ @@ -1096,15 +1140,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); - if (summary.acked_serial) { - if (summary.ack_reason == RXRPC_ACK_PING_RESPONSE) - rxrpc_complete_rtt_probe(call, call->acks_latest_ts, - summary.acked_serial, summary.ack_serial, - rxrpc_rtt_rx_ping_response); - else - summary.rtt_sample_avail = true; - } - /* Ignore ACKs unless we are or have just been transmitting. */ switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: @@ -1141,10 +1176,14 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ping(call, summary.ack_serial, rxrpc_propose_ack_ping_for_lost_reply); + /* Drive the congestion management algorithm first and then RACK-TLP as + * the latter depends on the state/change in state in the former. + */ rxrpc_congestion_management(call, &summary); - if (summary.need_retransmit) - rxrpc_resend(call, summary.ack_serial, - summary.ack_reason == RXRPC_ACK_PING_RESPONSE); + rxrpc_rack_detect_loss_and_arm_timer(call, &summary); + rxrpc_tlp_process_ack(call, &summary); + if (call->tlp_serial && after_eq(summary.acked_serial, call->tlp_serial)) + call->tlp_serial = 0; send_response: if (summary.ack_reason == RXRPC_ACK_PING) diff --git a/net/rxrpc/input_rack.c b/net/rxrpc/input_rack.c new file mode 100644 index 000000000000..13c371261e0a --- /dev/null +++ b/net/rxrpc/input_rack.c @@ -0,0 +1,418 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RACK-TLP [RFC8958] Implementation + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ar-internal.h" + +static bool rxrpc_rack_sent_after(ktime_t t1, rxrpc_seq_t seq1, + ktime_t t2, rxrpc_seq_t seq2) +{ + if (ktime_after(t1, t2)) + return true; + return t1 == t2 && after(seq1, seq2); +} + +/* + * Mark a packet lost. + */ +static void rxrpc_rack_mark_lost(struct rxrpc_call *call, + struct rxrpc_txqueue *tq, unsigned int ix) +{ + if (__test_and_set_bit(ix, &tq->segment_lost)) { + if (__test_and_clear_bit(ix, &tq->segment_retransmitted)) + call->tx_nr_resent--; + } else { + call->tx_nr_lost++; + } + tq->segment_xmit_ts[ix] = UINT_MAX; +} + +/* + * Get the transmission time of a packet in the Tx queue. + */ +static ktime_t rxrpc_get_xmit_ts(const struct rxrpc_txqueue *tq, unsigned int ix) +{ + if (tq->segment_xmit_ts[ix] == UINT_MAX) + return KTIME_MAX; + return ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); +} + +/* + * Get a bitmask of nack bits for a queue segment and mask off any that aren't + * yet reported. + */ +static unsigned long rxrpc_tq_nacks(const struct rxrpc_txqueue *tq) +{ + unsigned long nacks = ~tq->segment_acked; + + if (tq->nr_reported_acks < RXRPC_NR_TXQUEUE) + nacks &= (1UL << tq->nr_reported_acks) - 1; + return nacks; +} + +/* + * Update the RACK state for the most recently sent packet that has been + * delivered [RFC8958 6.2 Step 2]. + */ +static void rxrpc_rack_update(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix) +{ + rxrpc_seq_t seq = tq->qbase + ix; + ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix); + ktime_t rtt = ktime_sub(call->acks_latest_ts, xmit_ts); + + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + + if (test_bit(ix, &tq->segment_retransmitted)) { + /* Use Rx.serial instead of TCP.ACK.ts_option.echo_reply. */ + if (before(call->acks_highest_serial, tq->segment_serial[ix])) + return; + if (rtt < minmax_get(&call->min_rtt)) + return; + } + + /* The RACK algorithm requires the segment ACKs to be traversed in + * order of segment transmission - but the only thing this seems to + * matter for is that RACK.rtt is set to the rtt of the most recently + * transmitted segment. We should be able to achieve the same by only + * setting RACK.rtt if the xmit time is greater. + */ + if (ktime_after(xmit_ts, call->rack_rtt_ts)) { + call->rack_rtt = rtt; + call->rack_rtt_ts = xmit_ts; + } + + if (rxrpc_rack_sent_after(xmit_ts, seq, call->rack_xmit_ts, call->rack_end_seq)) { + call->rack_rtt = rtt; + call->rack_xmit_ts = xmit_ts; + call->rack_end_seq = seq; + } +} + +/* + * Detect data segment reordering [RFC8958 6.2 Step 3]. + */ +static void rxrpc_rack_detect_reordering(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix) +{ + rxrpc_seq_t seq = tq->qbase + ix; + + /* Track the highest sequence number so far ACK'd. This is not + * necessarily the same as ack.firstPacket + ack.nAcks - 1 as the peer + * could put a NACK in the last SACK slot. + */ + if (after(seq, call->rack_fack)) + call->rack_fack = seq; + else if (before(seq, call->rack_fack) && + test_bit(ix, &tq->segment_retransmitted)) + call->rack_reordering_seen = true; +} + +void rxrpc_input_rack_one(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned int ix) +{ + rxrpc_rack_update(call, summary, tq, ix); + rxrpc_rack_detect_reordering(call, summary, tq, ix); +} + +void rxrpc_input_rack(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long new_acks) +{ + while (new_acks) { + unsigned int ix = __ffs(new_acks); + + __clear_bit(ix, &new_acks); + rxrpc_input_rack_one(call, summary, tq, ix); + } + + trace_rxrpc_rack_update(call, summary); +} + +/* + * Update the reordering window [RFC8958 6.2 Step 4]. Returns the updated + * duration of the reordering window. + * + * Note that the Rx protocol doesn't have a 'DSACK option' per se, but ACKs can + * be given a 'DUPLICATE' reason with the serial number referring to the + * duplicated DATA packet. Rx does not inform as to whether this was a + * reception of the same packet twice or of a retransmission of a packet we + * already received (though this could be determined by the transmitter based + * on the serial number). + */ +static ktime_t rxrpc_rack_update_reo_wnd(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary) +{ + rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */ + rxrpc_seq_t snd_nxt = call->tx_transmitted + 1; /* Next seq to be sent */ + bool have_dsack_option = summary->ack_reason == RXRPC_ACK_DUPLICATE; + int dup_thresh = 3; + + /* DSACK-based reordering window adaptation */ + if (!call->rack_dsack_round_none && + after_eq(snd_una, call->rack_dsack_round)) + call->rack_dsack_round_none = true; + + /* Grow the reordering window per round that sees DSACK. Reset the + * window after 16 DSACK-free recoveries. + */ + if (call->rack_dsack_round_none && have_dsack_option) { + call->rack_dsack_round_none = false; + call->rack_dsack_round = snd_nxt; + call->rack_reo_wnd_mult++; + call->rack_reo_wnd_persist = 16; + } else if (summary->exiting_fast_or_rto_recovery) { + call->rack_reo_wnd_persist--; + if (call->rack_reo_wnd_persist <= 0) + call->rack_reo_wnd_mult = 1; + } + + if (!call->rack_reordering_seen) { + if (summary->in_fast_or_rto_recovery) + return 0; + if (call->acks_nr_sacks >= dup_thresh) + return 0; + } + + return us_to_ktime(umin(call->rack_reo_wnd_mult * minmax_get(&call->min_rtt) / 4, + call->srtt_us >> 3)); +} + +/* + * Detect losses [RFC8958 6.2 Step 5]. + */ +static ktime_t rxrpc_rack_detect_loss(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary) +{ + struct rxrpc_txqueue *tq; + ktime_t timeout = 0, lost_after, now = ktime_get_real(); + + call->rack_reo_wnd = rxrpc_rack_update_reo_wnd(call, summary); + lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd); + trace_rxrpc_rack_scan_loss(call); + + for (tq = call->tx_queue; tq; tq = tq->next) { + unsigned long nacks = rxrpc_tq_nacks(tq); + + if (after(tq->qbase, call->tx_transmitted)) + break; + trace_rxrpc_rack_scan_loss_tq(call, tq, nacks); + + /* Skip ones marked lost but not yet retransmitted */ + nacks &= ~tq->segment_lost | tq->segment_retransmitted; + + while (nacks) { + unsigned int ix = __ffs(nacks); + rxrpc_seq_t seq = tq->qbase + ix; + ktime_t remaining; + ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix); + + __clear_bit(ix, &nacks); + + if (rxrpc_rack_sent_after(call->rack_xmit_ts, call->rack_end_seq, + xmit_ts, seq)) { + remaining = ktime_sub(ktime_add(xmit_ts, lost_after), now); + if (remaining <= 0) { + rxrpc_rack_mark_lost(call, tq, ix); + trace_rxrpc_rack_detect_loss(call, summary, seq); + } else { + timeout = max(remaining, timeout); + } + } + } + } + + return timeout; +} + +/* + * Detect losses and set a timer to retry the detection [RFC8958 6.2 Step 5]. + */ +void rxrpc_rack_detect_loss_and_arm_timer(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary) +{ + ktime_t timeout = rxrpc_rack_detect_loss(call, summary); + + if (timeout) { + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RACK_REORDER; + call->rack_timo_at = ktime_add(ktime_get_real(), timeout); + trace_rxrpc_rack_timer(call, timeout, false); + trace_rxrpc_timer_set(call, timeout, rxrpc_timer_trace_rack_reo); + } +} + +/* + * Handle RACK-TLP RTO expiration [RFC8958 6.3]. + */ +static void rxrpc_rack_mark_losses_on_rto(struct rxrpc_call *call) +{ + struct rxrpc_txqueue *tq; + rxrpc_seq_t snd_una = call->acks_lowest_nak; /* Lowest unack'd seq */ + ktime_t lost_after = ktime_add(call->rack_rtt, call->rack_reo_wnd); + ktime_t deadline = ktime_sub(ktime_get_real(), lost_after); + + for (tq = call->tx_queue; tq; tq = tq->next) { + unsigned long unacked = ~tq->segment_acked; + + trace_rxrpc_rack_mark_loss_tq(call, tq); + while (unacked) { + unsigned int ix = __ffs(unacked); + rxrpc_seq_t seq = tq->qbase + ix; + ktime_t xmit_ts = rxrpc_get_xmit_ts(tq, ix); + + if (after(seq, call->tx_transmitted)) + return; + __clear_bit(ix, &unacked); + + if (seq == snd_una || + ktime_before(xmit_ts, deadline)) + rxrpc_rack_mark_lost(call, tq, ix); + } + } +} + +/* + * Calculate the TLP loss probe timeout (PTO) [RFC8958 7.2]. + */ +ktime_t rxrpc_tlp_calc_pto(struct rxrpc_call *call, ktime_t now) +{ + unsigned int flight_size = rxrpc_tx_in_flight(call); + ktime_t rto_at = ktime_add(call->tx_last_sent, + rxrpc_get_rto_backoff(call, false)); + ktime_t pto; + + if (call->rtt_count > 0) { + /* Use 2*SRTT as the timeout. */ + pto = ns_to_ktime(call->srtt_us * NSEC_PER_USEC / 4); + if (flight_size) + pto = ktime_add(pto, call->tlp_max_ack_delay); + } else { + pto = NSEC_PER_SEC; + } + + if (ktime_after(ktime_add(now, pto), rto_at)) + pto = ktime_sub(rto_at, now); + return pto; +} + +/* + * Send a TLP loss probe on PTO expiration [RFC8958 7.3]. + */ +void rxrpc_tlp_send_probe(struct rxrpc_call *call) +{ + unsigned int in_flight = rxrpc_tx_in_flight(call); + + if (after_eq(call->acks_hard_ack, call->tx_transmitted)) + return; /* Everything we transmitted has been acked. */ + + /* There must be no other loss probe still in flight and we need to + * have taken a new RTT sample since last probe or the start of + * connection. + */ + if (!call->tlp_serial && + call->tlp_rtt_taken != call->rtt_taken) { + call->tlp_is_retrans = false; + if (after(call->send_top, call->tx_transmitted) && + rxrpc_tx_window_space(call) > 0) { + /* Transmit the lowest-sequence unsent DATA */ + call->tx_last_serial = 0; + rxrpc_transmit_some_data(call, 1, rxrpc_txdata_tlp_new_data); + call->tlp_serial = call->tx_last_serial; + call->tlp_seq = call->tx_transmitted; + trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_transmit_new); + in_flight = rxrpc_tx_in_flight(call); + } else { + /* Retransmit the highest-sequence DATA sent */ + call->tx_last_serial = 0; + rxrpc_resend_tlp(call); + call->tlp_is_retrans = true; + trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_retransmit); + } + } else { + trace_rxrpc_tlp_probe(call, rxrpc_tlp_probe_trace_busy); + } + + if (in_flight != 0) { + ktime_t rto = rxrpc_get_rto_backoff(call, false); + + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_RTO; + call->rack_timo_at = ktime_add(ktime_get_real(), rto); + trace_rxrpc_rack_timer(call, rto, false); + trace_rxrpc_timer_set(call, rto, rxrpc_timer_trace_rack_rto); + } +} + +/* + * Detect losses using the ACK of a TLP loss probe [RFC8958 7.4]. + */ +void rxrpc_tlp_process_ack(struct rxrpc_call *call, struct rxrpc_ack_summary *summary) +{ + if (!call->tlp_serial || after(call->tlp_seq, call->acks_hard_ack)) + return; + + if (!call->tlp_is_retrans) { + /* TLP of new data delivered */ + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_new_data); + call->tlp_serial = 0; + } else if (summary->ack_reason == RXRPC_ACK_DUPLICATE && + summary->acked_serial == call->tlp_serial) { + /* General Case: Detected packet losses using RACK [7.4.1] */ + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_dup_acked); + call->tlp_serial = 0; + } else if (after(call->acks_hard_ack, call->tlp_seq)) { + /* Repaired the single loss */ + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_hard_beyond); + call->tlp_serial = 0; + // TODO: Invoke congestion control to react to the loss + // event the probe has repaired + } else if (summary->tlp_probe_acked) { + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_acked); + /* Special Case: Detected a single loss repaired by the loss + * probe [7.4.2] + */ + call->tlp_serial = 0; + } else { + trace_rxrpc_tlp_ack(call, summary, rxrpc_tlp_ack_trace_incomplete); + } +} + +/* + * Handle RACK timer expiration; returns true to request a resend. + */ +void rxrpc_rack_timer_expired(struct rxrpc_call *call, ktime_t overran_by) +{ + struct rxrpc_ack_summary summary = {}; + enum rxrpc_rack_timer_mode mode = call->rack_timer_mode; + + trace_rxrpc_rack_timer(call, overran_by, true); + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF; + + switch (mode) { + case RXRPC_CALL_RACKTIMER_RACK_REORDER: + rxrpc_rack_detect_loss_and_arm_timer(call, &summary); + break; + case RXRPC_CALL_RACKTIMER_TLP_PTO: + rxrpc_tlp_send_probe(call); + break; + case RXRPC_CALL_RACKTIMER_RTO: + // Might need to poke the congestion algo in some way + rxrpc_rack_mark_losses_on_rto(call); + break; + //case RXRPC_CALL_RACKTIMER_ZEROWIN: + default: + pr_warn("Unexpected rack timer %u", call->rack_timer_mode); + } +} diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index fbacf2056f64..2925c7fc82cf 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -470,6 +470,7 @@ int rxrpc_io_thread(void *data) spin_lock_irq(&local->rx_queue.lock); skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); spin_unlock_irq(&local->rx_queue.lock); + trace_rxrpc_iothread_rx(local, skb_queue_len(&rx_queue)); } /* Distribute packets and errors. */ diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f934551a9b1c..6f7a125d6e90 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -542,12 +542,14 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se unsigned int xmit_ts; rxrpc_seq_t seq = req->seq; size_t len = 0; + bool start_tlp = false; trace_rxrpc_tq(call, tq, seq, rxrpc_tq_transmit); /* Each transmission of a Tx packet needs a new serial number */ serial = rxrpc_get_next_serials(call->conn, req->n); + call->tx_last_serial = serial + req->n - 1; call->tx_last_sent = req->now; xmit_ts = rxrpc_prepare_txqueue(tq, req); prefetch(tq->next); @@ -557,6 +559,18 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se struct rxrpc_txbuf *txb = tq->bufs[seq & RXRPC_TXQ_MASK]; _debug("prep[%u] tq=%x q=%x", i, tq->qbase, seq); + + /* Record (re-)transmission for RACK [RFC8985 6.1]. */ + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + if (req->retrans) { + __set_bit(ix, &tq->ever_retransmitted); + __set_bit(ix, &tq->segment_retransmitted); + call->tx_nr_resent++; + } else { + call->tx_nr_sent++; + start_tlp = true; + } tq->segment_xmit_ts[ix] = xmit_ts; tq->segment_serial[ix] = serial; if (i + 1 == req->n) @@ -576,11 +590,24 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se } /* Set timeouts */ - if (call->rtt_count > 1) { - ktime_t delay = rxrpc_get_rto_backoff(call, false); + if (req->tlp_probe) { + /* Sending TLP loss probe [RFC8985 7.3]. */ + call->tlp_serial = serial - 1; + call->tlp_seq = seq - 1; + } else if (start_tlp) { + /* Schedule TLP loss probe [RFC8985 7.2]. */ + ktime_t pto; + + if (!test_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) + /* The first packet may take longer to elicit a response. */ + pto = NSEC_PER_SEC; + else + pto = rxrpc_tlp_calc_pto(call, req->now); - call->ack_lost_at = ktime_add(req->now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_lost_ack); + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_TLP_PTO; + call->rack_timo_at = ktime_add(req->now, pto); + trace_rxrpc_rack_timer(call, pto, false); + trace_rxrpc_timer_set(call, pto, rxrpc_timer_trace_rack_tlp_pto); } if (!test_and_set_bit(RXRPC_CALL_BEGAN_RX_TIMER, &call->flags)) { @@ -589,12 +616,6 @@ static size_t rxrpc_prepare_data_packet(struct rxrpc_call *call, struct rxrpc_se call->expect_rx_by = ktime_add(req->now, delay); trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_expect_rx); } - if (call->resend_at == KTIME_MAX) { - ktime_t delay = rxrpc_get_rto_backoff(call, false); - - call->resend_at = ktime_add(req->now, delay); - trace_rxrpc_timer_set(call, delay, rxrpc_timer_trace_resend); - } rxrpc_set_keepalive(call, req->now); return len; -- cgit v1.2.3 From 3f330db30638b6489d548084a7e8843374d41ad0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 5 Dec 2024 08:59:14 -0800 Subject: net: reformat kdoc return statements kernel-doc -Wall warns about missing Return: statement for non-void functions. We have a number of kdocs in our headers which are missing the colon, IOW they use * Return some value or * Returns some value Having the colon makes some sense, it should help kdoc parser avoid false positives. So add them. This is mostly done with a sed script, and removing the unnecessary cases (mostly the comments which aren't kdoc). Acked-by: Johannes Berg Acked-by: Richard Cochran Acked-by: Sergey Ryazanov Reviewed-by: Edward Cree Acked-by: Alexandra Winter Acked-by: Pablo Neira Ayuso Link: https://patch.msgid.link/20241205165914.1071102-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/etherdevice.h | 18 ++++++++-------- include/linux/ethtool.h | 6 +++--- include/linux/if_vlan.h | 28 ++++++++++++------------ include/linux/netdevice.h | 14 ++++++------ include/linux/netfilter/x_tables.h | 2 +- include/linux/netfilter_netdev.h | 3 ++- include/linux/ptp_clock_kernel.h | 4 ++-- include/linux/rfkill.h | 2 +- include/linux/rtnetlink.h | 2 +- include/linux/skbuff.h | 16 +++++++------- include/linux/wwan.h | 2 +- include/net/cfg80211.h | 2 +- include/net/dst.h | 2 +- include/net/genetlink.h | 6 +++--- include/net/ipv6.h | 2 +- include/net/iucv/iucv.h | 30 +++++++++++++------------- include/net/netfilter/nf_tproxy.h | 4 ++-- include/net/netlink.h | 44 +++++++++++++++++++------------------- include/net/page_pool/helpers.h | 9 +++----- include/net/pkt_cls.h | 4 ++-- include/net/tcp.h | 2 +- 21 files changed, 101 insertions(+), 101 deletions(-) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index ecf203f01034..9a1eacf35d37 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -81,7 +81,7 @@ static const u8 eth_ipv6_mcast_addr_base[ETH_ALEN] __aligned(2) = * is_link_local_ether_addr - Determine if given Ethernet address is link-local * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if address is link local reserved addr (01:80:c2:00:00:0X) per + * Return: true if address is link local reserved addr (01:80:c2:00:00:0X) per * IEEE 802.1Q 8.6.3 Frame filtering. * * Please note: addr must be aligned to u16. @@ -104,7 +104,7 @@ static inline bool is_link_local_ether_addr(const u8 *addr) * is_zero_ether_addr - Determine if give Ethernet address is all zeros. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is all zeroes. + * Return: true if the address is all zeroes. * * Please note: addr must be aligned to u16. */ @@ -123,7 +123,7 @@ static inline bool is_zero_ether_addr(const u8 *addr) * is_multicast_ether_addr - Determine if the Ethernet address is a multicast. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a multicast address. + * Return: true if the address is a multicast address. * By definition the broadcast address is also a multicast address. */ static inline bool is_multicast_ether_addr(const u8 *addr) @@ -157,7 +157,7 @@ static inline bool is_multicast_ether_addr_64bits(const u8 *addr) * is_local_ether_addr - Determine if the Ethernet address is locally-assigned one (IEEE 802). * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a local address. + * Return: true if the address is a local address. */ static inline bool is_local_ether_addr(const u8 *addr) { @@ -168,7 +168,7 @@ static inline bool is_local_ether_addr(const u8 *addr) * is_broadcast_ether_addr - Determine if the Ethernet address is broadcast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is the broadcast address. + * Return: true if the address is the broadcast address. * * Please note: addr must be aligned to u16. */ @@ -183,7 +183,7 @@ static inline bool is_broadcast_ether_addr(const u8 *addr) * is_unicast_ether_addr - Determine if the Ethernet address is unicast * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return true if the address is a unicast address. + * Return: true if the address is a unicast address. */ static inline bool is_unicast_ether_addr(const u8 *addr) { @@ -197,7 +197,7 @@ static inline bool is_unicast_ether_addr(const u8 *addr) * Check that the Ethernet address (MAC) is not 00:00:00:00:00:00, is not * a multicast address, and is not FF:FF:FF:FF:FF:FF. * - * Return true if the address is valid. + * Return: true if the address is valid. * * Please note: addr must be aligned to u16. */ @@ -214,7 +214,7 @@ static inline bool is_valid_ether_addr(const u8 *addr) * * Check that the value from the Ethertype/length field is a valid Ethertype. * - * Return true if the valid is an 802.3 supported Ethertype. + * Return: true if the valid is an 802.3 supported Ethertype. */ static inline bool eth_proto_is_802_3(__be16 proto) { @@ -458,7 +458,7 @@ static inline bool ether_addr_is_ip_mcast(const u8 *addr) * ether_addr_to_u64 - Convert an Ethernet address into a u64 value. * @addr: Pointer to a six-byte array containing the Ethernet address * - * Return a u64 value of the address + * Return: a u64 value of the address */ static inline u64 ether_addr_to_u64(const u8 *addr) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index b8b935b52603..e217c6321ed0 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -257,7 +257,7 @@ struct ethtool_link_ksettings { * @mode : one of the ETHTOOL_LINK_MODE_*_BIT * (not atomic, no bound checking) * - * Returns true/false. + * Returns: true/false. */ #define ethtool_link_ksettings_test_link_mode(ptr, name, mode) \ test_bit(ETHTOOL_LINK_MODE_ ## mode ## _BIT, (ptr)->link_modes.name) @@ -1199,7 +1199,7 @@ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, * @dev: pointer to net_device structure * @vclock_index: pointer to pointer of vclock index * - * Return number of phc vclocks + * Return: number of phc vclocks */ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index); @@ -1253,7 +1253,7 @@ static inline int ethtool_mm_frag_size_min_to_add(u32 val_min, u32 *val_add, * ethtool_get_ts_info_by_layer - Obtains time stamping capabilities from the MAC or PHY layer. * @dev: pointer to net_device structure * @info: buffer to hold the result - * Returns zero on success, non-zero otherwise. + * Returns: zero on success, non-zero otherwise. */ int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info); diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index c1645c86eed9..d6326b53e336 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -310,7 +310,7 @@ static inline bool vlan_uses_dev(const struct net_device *dev) * eth_type_vlan - check for valid vlan ether type. * @ethertype: ether type to check * - * Returns true if the ether type is a vlan ether type. + * Returns: true if the ether type is a vlan ether type. */ static inline bool eth_type_vlan(__be16 ethertype) { @@ -341,9 +341,9 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features, * @mac_len: MAC header length including outer vlan headers * * Inserts the VLAN tag into @skb as part of the payload at offset mac_len - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci, @@ -390,9 +390,9 @@ static inline int __vlan_insert_inner_tag(struct sk_buff *skb, * @vlan_tci: VLAN TCI to insert * * Inserts the VLAN tag into @skb as part of the payload - * Returns error if skb_cow_head fails. - * * Does not change skb->protocol so this function can be used during receive. + * + * Returns: error if skb_cow_head fails. */ static inline int __vlan_insert_tag(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) @@ -533,7 +533,7 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not of VLAN type + * Returns: error if the skb is not of VLAN type */ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -551,7 +551,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if @skb->vlan_tci is not set correctly + * Returns: error if @skb->vlan_tci is not set correctly */ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, u16 *vlan_tci) @@ -570,7 +570,7 @@ static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb, * @skb: skbuff to query * @vlan_tci: buffer to store value * - * Returns error if the skb is not VLAN tagged + * Returns: error if the skb is not VLAN tagged */ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { @@ -587,7 +587,7 @@ static inline int vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) * @type: first vlan protocol * @depth: buffer to store length of eth and vlan tags in bytes * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, @@ -629,7 +629,7 @@ static inline __be16 __vlan_get_protocol(const struct sk_buff *skb, __be16 type, * vlan_get_protocol - get protocol EtherType. * @skb: skbuff to query * - * Returns the EtherType of the packet, regardless of whether it is + * Returns: the EtherType of the packet, regardless of whether it is * vlan encapsulated (normal or hardware accelerated) or not. */ static inline __be16 vlan_get_protocol(const struct sk_buff *skb) @@ -710,7 +710,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, * Expects the skb to contain a VLAN tag in the payload, and to have skb->data * pointing at the MAC header. * - * Returns a new pointer to skb->data, or NULL on failure to pull. + * Returns: a new pointer to skb->data, or NULL on failure to pull. */ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) { @@ -727,7 +727,7 @@ static inline void *vlan_remove_tag(struct sk_buff *skb, u16 *vlan_tci) * skb_vlan_tagged - check if skb is vlan tagged. * @skb: skbuff to query * - * Returns true if the skb is tagged, regardless of whether it is hardware + * Returns: true if the skb is tagged, regardless of whether it is hardware * accelerated or not. */ static inline bool skb_vlan_tagged(const struct sk_buff *skb) @@ -743,7 +743,7 @@ static inline bool skb_vlan_tagged(const struct sk_buff *skb) * skb_vlan_tagged_multi - check if skb is vlan tagged with multiple headers. * @skb: skbuff to query * - * Returns true if the skb is tagged with multiple vlan headers, regardless + * Returns: true if the skb is tagged with multiple vlan headers, regardless * of whether it is hardware accelerated or not. */ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) @@ -774,7 +774,7 @@ static inline bool skb_vlan_tagged_multi(struct sk_buff *skb) * @skb: skbuff to query * @features: features to be checked * - * Returns features without unsafe ones if the skb has multiple tags. + * Returns: features without unsafe ones if the skb has multiple tags. */ static inline netdev_features_t vlan_features_check(struct sk_buff *skb, netdev_features_t features) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 135105441681..d917949bba03 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -509,7 +509,7 @@ static inline bool napi_prefer_busy_poll(struct napi_struct *n) * is scheduled for example in the context of delayed timer * that can be skipped if a NAPI is already scheduled. * - * Return True if NAPI is scheduled, False otherwise. + * Return: True if NAPI is scheduled, False otherwise. */ static inline bool napi_is_scheduled(struct napi_struct *n) { @@ -524,7 +524,7 @@ bool napi_schedule_prep(struct napi_struct *n); * * Schedule NAPI poll routine to be called if it is not already * running. - * Return true if we schedule a NAPI or false if not. + * Return: true if we schedule a NAPI or false if not. * Refer to napi_schedule_prep() for additional reason on why * a NAPI might not be scheduled. */ @@ -558,7 +558,7 @@ static inline void napi_schedule_irqoff(struct napi_struct *n) * Mark NAPI processing as complete. Should only be called if poll budget * has not been completely consumed. * Prefer over napi_complete(). - * Return false if device should avoid rearming interrupts. + * Return: false if device should avoid rearming interrupts. */ bool napi_complete_done(struct napi_struct *n, int work_done); @@ -3851,7 +3851,7 @@ static inline bool netif_attr_test_mask(unsigned long j, * @online_mask: bitmask for CPUs/Rx queues that are online * @nr_bits: number of bits in the bitmask * - * Returns true if a CPU/Rx queue is online. + * Returns: true if a CPU/Rx queue is online. */ static inline bool netif_attr_test_online(unsigned long j, const unsigned long *online_mask, @@ -3871,7 +3871,8 @@ static inline bool netif_attr_test_online(unsigned long j, * @srcp: the cpumask/Rx queue mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set. + * Returns: next (after n) CPU/Rx queue index in the mask; + * >= nr_bits if no further CPUs/Rx queues set. */ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, unsigned int nr_bits) @@ -3893,7 +3894,8 @@ static inline unsigned int netif_attrmask_next(int n, const unsigned long *srcp, * @src2p: the second CPUs/Rx queues mask pointer * @nr_bits: number of bits in the bitmask * - * Returns >= nr_bits if no further CPUs/Rx queues set in both. + * Returns: next (after n) CPU/Rx queue index set in both masks; + * >= nr_bits if no further CPUs/Rx queues set in both. */ static inline int netif_attrmask_next_and(int n, const unsigned long *src1p, const unsigned long *src2p, diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5897f3dbaf7c..f39f688d7285 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -357,7 +357,7 @@ extern struct static_key xt_tee_enabled; * Begin packet processing : all readers must wait the end * 1) Must be called with preemption disabled * 2) softirqs must be disabled too (or we should use this_cpu_add()) - * Returns : + * Returns: * 1 if no recursion on this cpu * 0 if recursion detected */ diff --git a/include/linux/netfilter_netdev.h b/include/linux/netfilter_netdev.h index 8676316547cc..3175073a66ba 100644 --- a/include/linux/netfilter_netdev.h +++ b/include/linux/netfilter_netdev.h @@ -66,7 +66,6 @@ static inline bool nf_hook_egress_active(void) * @rc: result code which shall be returned by __dev_queue_xmit() on failure * @dev: netdev whose egress hooks shall be applied to @skb * - * Returns @skb on success or %NULL if the packet was consumed or filtered. * Caller must hold rcu_read_lock. * * On ingress, packets are classified first by tc, then by netfilter. @@ -81,6 +80,8 @@ static inline bool nf_hook_egress_active(void) * called recursively by tunnel drivers such as vxlan, the flag is reverted to * false after sch_handle_egress(). This ensures that netfilter is applied * both on the overlay and underlying network. + * + * Returns: @skb on success or %NULL if the packet was consumed or filtered. */ static inline struct sk_buff *nf_hook_egress(struct sk_buff *skb, int *rc, struct net_device *dev) diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index c892d22ce0a7..0d68d09bedd1 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -307,7 +307,7 @@ static inline u64 adjust_by_scaled_ppm(u64 base, long scaled_ppm) * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. * - * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * Returns: a valid pointer on success or PTR_ERR on failure. If PHC * support is missing at the configuration level, this function * returns NULL, and drivers are expected to gracefully handle that * case separately. @@ -445,7 +445,7 @@ int ptp_get_vclocks_index(int pclock_index, int **vclock_index); * @hwtstamp: timestamp * @vclock_index: phc index of ptp vclock. * - * Returns converted timestamp, or 0 on error. + * Returns: converted timestamp, or 0 on error. */ ktime_t ptp_convert_timestamp(const ktime_t *hwtstamp, int vclock_index); #else diff --git a/include/linux/rfkill.h b/include/linux/rfkill.h index 997b34197385..6816e4c5f3f0 100644 --- a/include/linux/rfkill.h +++ b/include/linux/rfkill.h @@ -241,7 +241,7 @@ bool rfkill_soft_blocked(struct rfkill *rfkill); * rfkill_find_type - Helper for finding rfkill type by name * @name: the name of the type * - * Returns enum rfkill_type that corresponds to the name. + * Returns: enum rfkill_type that corresponds to the name. */ enum rfkill_type rfkill_find_type(const char *name); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 14b88f551920..811ce44113f6 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -78,7 +78,7 @@ static inline bool lockdep_rtnl_is_held(void) * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit + * Return: the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 95452d1a07fc..69624b394cd9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1134,7 +1134,7 @@ static inline bool skb_pfmemalloc(const struct sk_buff *skb) * skb_dst - returns skb dst_entry * @skb: buffer * - * Returns skb dst_entry, regardless of reference taken or not. + * Returns: skb dst_entry, regardless of reference taken or not. */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { @@ -1222,7 +1222,7 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) * skb_unref - decrement the skb's reference count * @skb: buffer * - * Returns true if we can free the skb. + * Returns: true if we can free the skb. */ static inline bool skb_unref(struct sk_buff *skb) { @@ -1344,7 +1344,7 @@ struct sk_buff_fclones { * @sk: socket * @skb: buffer * - * Returns true if skb is a fast clone, and its clone is not freed. + * Returns: true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that didn't happen. */ @@ -3516,7 +3516,7 @@ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) * A page shouldn't be considered for reusing/recycling if it was allocated * under memory pressure or at a distant memory node. * - * Returns false if this page should be returned to page allocator, true + * Returns: false if this page should be returned to page allocator, true * otherwise. */ static inline bool dev_page_is_reusable(const struct page *page) @@ -3633,7 +3633,7 @@ int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. The page must already + * Returns: the address of the data within @frag. The page must already * be mapped. */ static inline void *skb_frag_address(const skb_frag_t *frag) @@ -3648,7 +3648,7 @@ static inline void *skb_frag_address(const skb_frag_t *frag) * skb_frag_address_safe - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * - * Returns the address of the data within @frag. Checks that the page + * Returns: the address of the data within @frag. Checks that the page * is mapped and returns %NULL otherwise. */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) @@ -3890,7 +3890,7 @@ static inline int skb_linearize(struct sk_buff *skb) * skb_has_shared_frag - can any frag be overwritten * @skb: buffer to test * - * Return true if the skb has at least one frag that might be modified + * Return: true if the skb has at least one frag that might be modified * by an external entity (as in vmsplice()/sendfile()) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) @@ -4612,7 +4612,7 @@ static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) /* Check if we need to perform checksum complete validation. * - * Returns true if checksum complete is needed, false otherwise + * Returns: true if checksum complete is needed, false otherwise * (either checksum is unnecessary or zero checksum is allowed). */ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, diff --git a/include/linux/wwan.h b/include/linux/wwan.h index 79c781875c09..a4d6cc0c9f68 100644 --- a/include/linux/wwan.h +++ b/include/linux/wwan.h @@ -97,7 +97,7 @@ struct wwan_port_caps { * * This function must be balanced with a call to wwan_remove_port(). * - * Returns a valid pointer to wwan_port on success or PTR_ERR on failure + * Returns: a valid pointer to wwan_port on success or PTR_ERR on failure */ struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 27acf1292a5c..182f7965048f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5957,7 +5957,7 @@ int wiphy_register(struct wiphy *wiphy); * @wiphy: the wiphy to check the locking on * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the + * Return: the value of the specified RCU-protected pointer, but omit the * READ_ONCE(), because caller holds the wiphy mutex used for updates. */ #define wiphy_dereference(wiphy, p) \ diff --git a/include/net/dst.h b/include/net/dst.h index 08647c99d79c..78c78cdce0e9 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -307,7 +307,7 @@ static inline bool dst_hold_safe(struct dst_entry *dst) * @skb: buffer * * If dst is not yet refcounted and not destroyed, grab a ref on it. - * Returns true if dst is refcounted. + * Returns: true if dst is refcounted. */ static inline bool skb_dst_force(struct sk_buff *skb) { diff --git a/include/net/genetlink.h b/include/net/genetlink.h index d096cc6352de..a03d56765832 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -354,7 +354,7 @@ __genlmsg_iput(struct sk_buff *skb, const struct genl_info *info, int flags) * such requests) or a struct initialized by genl_info_init_ntf() * when constructing notifications. * - * Returns pointer to new genetlink header. + * Returns: pointer to new genetlink header. */ static inline void * genlmsg_iput(struct sk_buff *skb, const struct genl_info *info) @@ -366,7 +366,7 @@ genlmsg_iput(struct sk_buff *skb, const struct genl_info *info) * genlmsg_nlhdr - Obtain netlink header from user specified header * @user_hdr: user header as returned from genlmsg_put() * - * Returns pointer to netlink header. + * Returns: pointer to netlink header. */ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr) { @@ -435,7 +435,7 @@ static inline void genl_dump_check_consistent(struct netlink_callback *cb, * @flags: netlink message flags * @cmd: generic netlink command * - * Returns pointer to user specific header + * Returns: pointer to user specific header */ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_info *info, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 248bfb26e2af..f5c43ad1565e 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -471,7 +471,7 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, /* This helper is specialized for BIG TCP needs. * It assumes the hop_jumbo_hdr will immediately follow the IPV6 header. * It assumes headers are already in skb->head. - * Returns 0, or IPPROTO_TCP if a BIG TCP packet is there. + * Returns: 0, or IPPROTO_TCP if a BIG TCP packet is there. */ static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) { diff --git a/include/net/iucv/iucv.h b/include/net/iucv/iucv.h index dd9e93c12260..9804fa5d9c67 100644 --- a/include/net/iucv/iucv.h +++ b/include/net/iucv/iucv.h @@ -202,7 +202,7 @@ struct iucv_handler { * * Registers a driver with IUCV. * - * Returns 0 on success, -ENOMEM if the memory allocation for the pathid + * Returns: 0 on success, -ENOMEM if the memory allocation for the pathid * table failed, or -EIO if IUCV_DECLARE_BUFFER failed on all cpus. */ int iucv_register(struct iucv_handler *handler, int smp); @@ -224,7 +224,7 @@ void iucv_unregister(struct iucv_handler *handle, int smp); * * Allocate a new path structure for use with iucv_connect. * - * Returns NULL if the memory allocation failed or a pointer to the + * Returns: NULL if the memory allocation failed or a pointer to the * path structure. */ static inline struct iucv_path *iucv_path_alloc(u16 msglim, u8 flags, gfp_t gfp) @@ -260,7 +260,7 @@ static inline void iucv_path_free(struct iucv_path *path) * This function is issued after the user received a connection pending * external interrupt and now wishes to complete the IUCV communication path. * - * Returns the result of the CP IUCV call. + * Returns: the result of the CP IUCV call. */ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, u8 *userdata, void *private); @@ -278,7 +278,7 @@ int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler, * successfully, you are not able to use the path until you receive an IUCV * Connection Complete external interrupt. * - * Returns the result of the CP IUCV call. + * Returns: the result of the CP IUCV call. */ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, u8 *userid, u8 *system, u8 *userdata, @@ -292,7 +292,7 @@ int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler, * This function temporarily suspends incoming messages on an IUCV path. * You can later reactivate the path by invoking the iucv_resume function. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata); @@ -304,7 +304,7 @@ int iucv_path_quiesce(struct iucv_path *path, u8 *userdata); * This function resumes incoming messages on an IUCV path that has * been stopped with iucv_path_quiesce. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_path_resume(struct iucv_path *path, u8 *userdata); @@ -315,7 +315,7 @@ int iucv_path_resume(struct iucv_path *path, u8 *userdata); * * This function terminates an IUCV path. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_path_sever(struct iucv_path *path, u8 *userdata); @@ -327,7 +327,7 @@ int iucv_path_sever(struct iucv_path *path, u8 *userdata); * * Cancels a message you have sent. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, u32 srccls); @@ -347,7 +347,7 @@ int iucv_message_purge(struct iucv_path *path, struct iucv_message *msg, * * Locking: local_bh_enable/local_bh_disable * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, size_t *residual); @@ -367,7 +367,7 @@ int iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, * * Locking: no locking. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *buffer, size_t size, @@ -382,7 +382,7 @@ int __iucv_message_receive(struct iucv_path *path, struct iucv_message *msg, * are notified of a message and the time that you complete the message, * the message may be rejected. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg); @@ -399,7 +399,7 @@ int iucv_message_reject(struct iucv_path *path, struct iucv_message *msg); * pathid, msgid, and trgcls. Prmmsg signifies the data is moved into * the parameter list. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, u8 flags, void *reply, size_t size); @@ -419,7 +419,7 @@ int iucv_message_reply(struct iucv_path *path, struct iucv_message *msg, * * Locking: local_bh_enable/local_bh_disable * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); @@ -439,7 +439,7 @@ int iucv_message_send(struct iucv_path *path, struct iucv_message *msg, * * Locking: no locking. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size); @@ -461,7 +461,7 @@ int __iucv_message_send(struct iucv_path *path, struct iucv_message *msg, * reply to the message and a buffer is provided into which IUCV moves * the reply to this message. * - * Returns the result from the CP IUCV call. + * Returns: the result from the CP IUCV call. */ int iucv_message_send2way(struct iucv_path *path, struct iucv_message *msg, u8 flags, u32 srccls, void *buffer, size_t size, diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h index 5adf6fda11e8..06985530517b 100644 --- a/include/net/netfilter/nf_tproxy.h +++ b/include/net/netfilter/nf_tproxy.h @@ -49,7 +49,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); * * nf_tproxy_handle_time_wait4() consumes the socket reference passed in. * - * Returns the listener socket if there's one, the TIME_WAIT socket if + * Returns: the listener socket if there's one, the TIME_WAIT socket if * no such listener is found, or NULL if the TCP header is incomplete. */ struct sock * @@ -108,7 +108,7 @@ nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, * * nf_tproxy_handle_time_wait6() consumes the socket reference passed in. * - * Returns the listener socket if there's one, the TIME_WAIT socket if + * Returns: the listener socket if there's one, the TIME_WAIT socket if * no such listener is found, or NULL if the TCP header is incomplete. */ struct sock * diff --git a/include/net/netlink.h b/include/net/netlink.h index 39eaa6be6ca8..e015ffbed819 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -649,7 +649,7 @@ static inline int nlmsg_ok(const struct nlmsghdr *nlh, int remaining) * @nlh: netlink message header * @remaining: number of bytes remaining in message stream * - * Returns the next netlink message in the message stream and + * Returns: the next netlink message in the message stream and * decrements remaining by the size of the current message. */ static inline struct nlmsghdr * @@ -676,7 +676,7 @@ nlmsg_next(const struct nlmsghdr *nlh, int *remaining) * exceeding maxtype will be rejected, policy must be specified, attributes * will be validated in the strictest way possible. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, @@ -701,7 +701,7 @@ static inline int nla_parse(struct nlattr **tb, int maxtype, * exceeding maxtype will be ignored and attributes from the policy are not * always strictly validated (only for new attributes). * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, const struct nlattr *head, int len, @@ -726,7 +726,7 @@ static inline int nla_parse_deprecated(struct nlattr **tb, int maxtype, * exceeding maxtype will be rejected as well as trailing data, but the * policy is not completely strictly validated (only for new attributes). * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_parse_deprecated_strict(struct nlattr **tb, int maxtype, const struct nlattr *head, @@ -833,7 +833,7 @@ nlmsg_parse_deprecated_strict(const struct nlmsghdr *nlh, int hdrlen, * @hdrlen: length of family specific header * @attrtype: type of attribute to look for * - * Returns the first attribute which matches the specified type. + * Returns: the first attribute which matches the specified type. */ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, int hdrlen, int attrtype) @@ -854,7 +854,7 @@ static inline struct nlattr *nlmsg_find_attr(const struct nlmsghdr *nlh, * specified policy. Validation is done in liberal mode. * See documentation of struct nla_policy for more details. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_validate_deprecated(const struct nlattr *head, int len, int maxtype, @@ -877,7 +877,7 @@ static inline int nla_validate_deprecated(const struct nlattr *head, int len, * specified policy. Validation is done in strict mode. * See documentation of struct nla_policy for more details. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int nla_validate(const struct nlattr *head, int len, int maxtype, const struct nla_policy *policy, @@ -914,7 +914,7 @@ static inline int nlmsg_validate_deprecated(const struct nlmsghdr *nlh, * nlmsg_report - need to report back to application? * @nlh: netlink message header * - * Returns 1 if a report back to the application is requested. + * Returns: 1 if a report back to the application is requested. */ static inline int nlmsg_report(const struct nlmsghdr *nlh) { @@ -925,7 +925,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) * nlmsg_seq - return the seq number of netlink message * @nlh: netlink message header * - * Returns 0 if netlink message is NULL + * Returns: 0 if netlink message is NULL */ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) { @@ -952,7 +952,7 @@ static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) * @payload: length of message payload * @flags: message flags * - * Returns NULL if the tailroom of the skb is insufficient to store + * Returns: NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, @@ -971,7 +971,7 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se * * Append data to an existing nlmsg, used when constructing a message * with multiple fixed-format headers (which is rare). - * Returns NULL if the tailroom of the skb is insufficient to store + * Returns: NULL if the tailroom of the skb is insufficient to store * the extra payload. */ static inline void *nlmsg_append(struct sk_buff *skb, u32 size) @@ -993,7 +993,7 @@ static inline void *nlmsg_append(struct sk_buff *skb, u32 size) * @payload: length of message payload * @flags: message flags * - * Returns NULL if the tailroom of the skb is insufficient to store + * Returns: NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, @@ -1050,7 +1050,7 @@ static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) * nlmsg_get_pos - return current position in netlink message * @skb: socket buffer the message is stored in * - * Returns a pointer to the current tail of the message. + * Returns: a pointer to the current tail of the message. */ static inline void *nlmsg_get_pos(struct sk_buff *skb) { @@ -1276,7 +1276,7 @@ static inline int nla_ok(const struct nlattr *nla, int remaining) * @nla: netlink attribute * @remaining: number of bytes remaining in attribute stream * - * Returns the next netlink attribute in the attribute stream and + * Returns: the next netlink attribute in the attribute stream and * decrements remaining by the size of the current attribute. */ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) @@ -1292,7 +1292,7 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) * @nla: attribute containing the nested attributes * @attrtype: type of attribute to look for * - * Returns the first attribute which matches the specified type. + * Returns: the first attribute which matches the specified type. */ static inline struct nlattr * nla_find_nested(const struct nlattr *nla, int attrtype) @@ -2091,7 +2091,7 @@ static inline int nla_get_flag(const struct nlattr *nla) * nla_get_msecs - return payload of msecs attribute * @nla: msecs netlink attribute * - * Returns the number of milliseconds in jiffies. + * Returns: the number of milliseconds in jiffies. */ static inline unsigned long nla_get_msecs(const struct nlattr *nla) { @@ -2183,7 +2183,7 @@ static inline void *nla_memdup_noprof(const struct nlattr *src, gfp_t gfp) * marked their nest attributes with NLA_F_NESTED flag. New APIs should use * nla_nest_start() which sets the flag. * - * Returns the container attribute or NULL on error + * Returns: the container attribute or NULL on error */ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, int attrtype) @@ -2204,7 +2204,7 @@ static inline struct nlattr *nla_nest_start_noflag(struct sk_buff *skb, * Unlike nla_nest_start_noflag(), mark the nest attribute with NLA_F_NESTED * flag. This is the preferred function to use in new code. * - * Returns the container attribute or NULL on error + * Returns: the container attribute or NULL on error */ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) { @@ -2219,7 +2219,7 @@ static inline struct nlattr *nla_nest_start(struct sk_buff *skb, int attrtype) * Corrects the container attribute header to include the all * appended attributes. * - * Returns the total data length of the skb. + * Returns: the total data length of the skb. */ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) { @@ -2252,7 +2252,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) * specified policy. Attributes with a type exceeding maxtype will be * ignored. See documentation of struct nla_policy for more details. * - * Returns 0 on success or a negative error code. + * Returns: 0 on success or a negative error code. */ static inline int __nla_validate_nested(const struct nlattr *start, int maxtype, const struct nla_policy *policy, @@ -2285,7 +2285,7 @@ nla_validate_nested_deprecated(const struct nlattr *start, int maxtype, * nla_need_padding_for_64bit - test 64-bit alignment of the next attribute * @skb: socket buffer the message is stored in * - * Return true if padding is needed to align the next attribute (nla_data()) to + * Return: true if padding is needed to align the next attribute (nla_data()) to * a 64-bit aligned area. */ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb) @@ -2312,7 +2312,7 @@ static inline bool nla_need_padding_for_64bit(struct sk_buff *skb) * This will only be done in architectures which do not have * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS defined. * - * Returns zero on success or a negative error code. + * Returns: zero on success or a negative error code. */ static inline int nla_align_64bit(struct sk_buff *skb, int padattr) { diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 793e6fd78bc5..26caa2c20912 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -104,8 +104,7 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) * * Get a page fragment from the page allocator or page_pool caches. * - * Return: - * Return allocated page fragment, otherwise return NULL. + * Return: allocated page fragment, otherwise return NULL. */ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, unsigned int *offset, @@ -155,8 +154,7 @@ static inline struct page *page_pool_alloc(struct page_pool *pool, * depending on the requested size in order to allocate memory with least memory * utilization and performance penalty. * - * Return: - * Return allocated page or page fragment, otherwise return NULL. + * Return: allocated page or page fragment, otherwise return NULL. */ static inline struct page *page_pool_dev_alloc(struct page_pool *pool, unsigned int *offset, @@ -190,8 +188,7 @@ static inline void *page_pool_alloc_va(struct page_pool *pool, * This is just a thin wrapper around the page_pool_alloc() API, and * it returns va of the allocated page or page fragment. * - * Return: - * Return the va for the allocated page or page fragment, otherwise return NULL. + * Return: the va for the allocated page or page fragment, otherwise return NULL. */ static inline void *page_pool_dev_alloc_va(struct page_pool *pool, unsigned int *size) diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index cf199af85c52..22c5ab4269d7 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -319,7 +319,7 @@ tcf_exts_hw_stats_update(const struct tcf_exts *exts, * tcf_exts_has_actions - check if at least one action is present * @exts: tc filter extensions handle * - * Returns true if at least one action is present. + * Returns: true if at least one action is present. */ static inline bool tcf_exts_has_actions(struct tcf_exts *exts) { @@ -501,7 +501,7 @@ int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *, * through all ematches respecting their logic relations returning * as soon as the result is obvious. * - * Returns 1 if the ematch tree as-one matches, no ematches are configured + * Returns: 1 if the ematch tree as-one matches, no ematches are configured * or ematch is not enabled in the kernel, otherwise 0 is returned. */ static inline int tcf_em_tree_match(struct sk_buff *skb, diff --git a/include/net/tcp.h b/include/net/tcp.h index e9b37b76e894..5b2b04835688 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1817,7 +1817,7 @@ int tcp_sigpool_hash_skb_data(struct tcp_sigpool *hp, * @id: tcp_sigpool that was previously allocated by tcp_sigpool_alloc_ahash() * @c: returned tcp_sigpool for usage (uninitialized on failure) * - * Returns 0 on success, error otherwise. + * Returns: 0 on success, error otherwise. */ int tcp_sigpool_start(unsigned int id, struct tcp_sigpool *c); /** -- cgit v1.2.3 From e4f8647767cfac0291def86ddfac23b925294701 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:54 +0100 Subject: vxlan: Track reserved bits explicitly as part of the configuration In order to make it possible to configure which bits in VXLAN header should be considered reserved, introduce a new field vxlan_config::reserved_bits. Have it cover the whole header, except for the VNI-present bit and the bits for VNI itself, and have individual enabled features clear more bits off reserved_bits. (This is expressed as first constructing a used_bits set, and then inverting it to get the reserved_bits. The set of used_bits will be useful on its own for validation of user-set reserved_bits in a following patch.) The patch also moves a comment relevant to the validation from the unparsed validation site up to the new site. Logically this patch should add the new comment, and a later patch that removes the unparsed bits would remove the old comment. But keeping both legs in the same patch is better from the history spelunking point of view. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/984dbf98d5940d3900268dbffaf70961f731d4a4.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 41 ++++++++++++++++++++++++++++++----------- include/net/vxlan.h | 1 + 2 files changed, 31 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index f114568b67a3..e50f4cb70193 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1710,9 +1710,20 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; } - /* For backwards compatibility, only allow reserved fields to be - * used by VXLAN extensions if explicitly requested. - */ + if (vh->vx_flags & vxlan->cfg.reserved_bits.vx_flags || + vh->vx_vni & vxlan->cfg.reserved_bits.vx_vni) { + /* If the header uses bits besides those enabled by the + * netdevice configuration, treat this as a malformed packet. + * This behavior diverges from VXLAN RFC (RFC7348) which + * stipulates that bits in reserved in reserved fields are to be + * ignored. The approach here maintains compatibility with + * previous stack code, and also is more robust and provides a + * little more security in adding extensions to VXLAN. + */ + reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; + goto drop; + } + if (vxlan->cfg.flags & VXLAN_F_GPE) { if (!vxlan_parse_gpe_proto(vh, &protocol)) goto drop; @@ -1763,14 +1774,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) */ if (unparsed.vx_flags || unparsed.vx_vni) { - /* If there are any unprocessed flags remaining treat - * this as a malformed packet. This behavior diverges from - * VXLAN RFC (RFC7348) which stipulates that bits in reserved - * in reserved fields are to be ignored. The approach here - * maintains compatibility with previous stack code, and also - * is more robust and provides a little more security in - * adding extensions to VXLAN. - */ reason = SKB_DROP_REASON_VXLAN_INVALID_HDR; goto drop; } @@ -4070,6 +4073,10 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], struct net_device *dev, struct vxlan_config *conf, bool changelink, struct netlink_ext_ack *extack) { + struct vxlanhdr used_bits = { + .vx_flags = VXLAN_HF_VNI, + .vx_vni = VXLAN_VNI_MASK, + }; struct vxlan_dev *vxlan = netdev_priv(dev); int err = 0; @@ -4296,6 +4303,8 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], extack); if (err) return err; + used_bits.vx_flags |= VXLAN_HF_RCO; + used_bits.vx_vni |= ~VXLAN_VNI_MASK; } if (data[IFLA_VXLAN_GBP]) { @@ -4303,6 +4312,7 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], VXLAN_F_GBP, changelink, false, extack); if (err) return err; + used_bits.vx_flags |= VXLAN_GBP_USED_BITS; } if (data[IFLA_VXLAN_GPE]) { @@ -4311,8 +4321,17 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], extack); if (err) return err; + + used_bits.vx_flags |= VXLAN_GPE_USED_BITS; } + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + conf->reserved_bits = (struct vxlanhdr) { + .vx_flags = ~used_bits.vx_flags, + .vx_vni = ~used_bits.vx_vni, + }; if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL, VXLAN_F_REMCSUM_NOPARTIAL, changelink, diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 33ba6fc151cf..2dd23ee2bacd 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -227,6 +227,7 @@ struct vxlan_config { unsigned int addrmax; bool no_share; enum ifla_vxlan_df df; + struct vxlanhdr reserved_bits; }; enum { -- cgit v1.2.3 From 6c11379b104e3718135fd7fc37bb254b41e4cf65 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 5 Dec 2024 16:40:57 +0100 Subject: vxlan: Add an attribute to make VXLAN header validation configurable The set of bits that the VXLAN netdevice currently considers reserved is defined by the features enabled at the netdevice construction. In order to make this configurable, add an attribute, IFLA_VXLAN_RESERVED_BITS. The payload is a pair of big-endian u32's covering the VXLAN header. This is validated against the set of flags used by the various enabled VXLAN features, and attempts to override bits used by an enabled feature are bounced. Signed-off-by: Petr Machata Reviewed-by: Ido Schimmel Reviewed-by: Nikolay Aleksandrov Link: https://patch.msgid.link/c657275e5ceed301e62c69fe8e559e32909442e2.1733412063.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 53 ++++++++++++++++++++++++++++++++++++------ include/uapi/linux/if_link.h | 1 + 2 files changed, 47 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index ff5684a2103a..43cf672b7b9f 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -3428,6 +3428,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_VNIFILTER] = { .type = NLA_U8 }, [IFLA_VXLAN_LOCALBYPASS] = NLA_POLICY_MAX(NLA_U8, 1), [IFLA_VXLAN_LABEL_POLICY] = NLA_POLICY_MAX(NLA_U32, VXLAN_LABEL_MAX), + [IFLA_VXLAN_RESERVED_BITS] = NLA_POLICY_EXACT_LEN(sizeof(struct vxlanhdr)), }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[], @@ -4315,13 +4316,44 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[], used_bits.vx_flags |= VXLAN_GPE_USED_BITS; } - /* For backwards compatibility, only allow reserved fields to be - * used by VXLAN extensions if explicitly requested. - */ - conf->reserved_bits = (struct vxlanhdr) { - .vx_flags = ~used_bits.vx_flags, - .vx_vni = ~used_bits.vx_vni, - }; + if (data[IFLA_VXLAN_RESERVED_BITS]) { + struct vxlanhdr reserved_bits; + + if (changelink) { + NL_SET_ERR_MSG_ATTR(extack, + data[IFLA_VXLAN_RESERVED_BITS], + "Cannot change reserved_bits"); + return -EOPNOTSUPP; + } + + nla_memcpy(&reserved_bits, data[IFLA_VXLAN_RESERVED_BITS], + sizeof(reserved_bits)); + if (used_bits.vx_flags & reserved_bits.vx_flags || + used_bits.vx_vni & reserved_bits.vx_vni) { + __be64 ub_be64, rb_be64; + + memcpy(&ub_be64, &used_bits, sizeof(ub_be64)); + memcpy(&rb_be64, &reserved_bits, sizeof(rb_be64)); + + NL_SET_ERR_MSG_ATTR_FMT(extack, + data[IFLA_VXLAN_RESERVED_BITS], + "Used bits %#018llx cannot overlap reserved bits %#018llx", + be64_to_cpu(ub_be64), + be64_to_cpu(rb_be64)); + return -EINVAL; + } + + conf->reserved_bits = reserved_bits; + } else { + /* For backwards compatibility, only allow reserved fields to be + * used by VXLAN extensions if explicitly requested. + */ + conf->reserved_bits = (struct vxlanhdr) { + .vx_flags = ~used_bits.vx_flags, + .vx_vni = ~used_bits.vx_vni, + }; + } + if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL]) { err = vxlan_nl2flag(conf, data, IFLA_VXLAN_REMCSUM_NOPARTIAL, VXLAN_F_REMCSUM_NOPARTIAL, changelink, @@ -4506,6 +4538,8 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(0) + /* IFLA_VXLAN_GPE */ nla_total_size(0) + /* IFLA_VXLAN_REMCSUM_NOPARTIAL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_VNIFILTER */ + /* IFLA_VXLAN_RESERVED_BITS */ + nla_total_size(sizeof(struct vxlanhdr)) + 0; } @@ -4608,6 +4642,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(vxlan->cfg.flags & VXLAN_F_VNIFILTER))) goto nla_put_failure; + if (nla_put(skb, IFLA_VXLAN_RESERVED_BITS, + sizeof(vxlan->cfg.reserved_bits), + &vxlan->cfg.reserved_bits)) + goto nla_put_failure; + return 0; nla_put_failure: diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 2575e0cd9b48..77730c340c8f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -1394,6 +1394,7 @@ enum { IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ + IFLA_VXLAN_RESERVED_BITS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From 31cdd8418234e70043abd26894b57eb201489cba Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:42:58 +0100 Subject: net: stmmac: Fix CSR divider comment The comment in declaration of STMMAC_CSR_250_300M incorrectly describes the constant as '/* MDC = clk_scr_i/122 */' but the DWC Ether QOS Handbook version 5.20a says it is CSR clock/124. Signed-off-by: Jan Petrous (OSS) Reviewed-by: Jacob Keller Reviewed-by: Russell King (Oracle) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-1-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index d79ff252cfdc..75cbfb576358 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -33,7 +33,7 @@ #define STMMAC_CSR_20_35M 0x2 /* MDC = clk_scr_i/16 */ #define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ -#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */ +#define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */ /* MTL algorithms identifiers */ #define MTL_TX_ALGORITHM_WRR 0x0 -- cgit v1.2.3 From c8fab05d021dfc04401102f9fa1de07fc8f75d8d Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:42:59 +0100 Subject: net: stmmac: Extend CSR calc support Add support for CSR clock range up to 800 MHz. Reviewed-by: Jacob Keller Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-2-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/common.h | 2 ++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++++ include/linux/stmmac.h | 2 ++ 3 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 1367fa5c9b8e..70d601f45481 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -257,6 +257,8 @@ struct stmmac_safety_stats { #define CSR_F_150M 150000000 #define CSR_F_250M 250000000 #define CSR_F_300M 300000000 +#define CSR_F_500M 500000000 +#define CSR_F_800M 800000000 #define MAC_CSR_H_FRQ_MASK 0x20 diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 9b262cdad60b..3cb7ad6ccc4e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -325,6 +325,10 @@ static void stmmac_clk_csr_set(struct stmmac_priv *priv) priv->clk_csr = STMMAC_CSR_150_250M; else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) priv->clk_csr = STMMAC_CSR_250_300M; + else if ((clk_rate >= CSR_F_300M) && (clk_rate < CSR_F_500M)) + priv->clk_csr = STMMAC_CSR_300_500M; + else if ((clk_rate >= CSR_F_500M) && (clk_rate < CSR_F_800M)) + priv->clk_csr = STMMAC_CSR_500_800M; } if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 75cbfb576358..865d0fe26f98 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -34,6 +34,8 @@ #define STMMAC_CSR_35_60M 0x3 /* MDC = clk_scr_i/26 */ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ #define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/124 */ +#define STMMAC_CSR_300_500M 0x6 /* MDC = clk_scr_i/204 */ +#define STMMAC_CSR_500_800M 0x7 /* MDC = clk_scr_i/324 */ /* MTL algorithms identifiers */ #define MTL_TX_ALGORITHM_WRR 0x0 -- cgit v1.2.3 From cb09f61a9ab84369c62f2ef7f8a2b797f596f6d1 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:00 +0100 Subject: net: stmmac: Fix clock rate variables size The clock API clk_get_rate() returns unsigned long value. Expand affected members of stmmac platform data and convert the stmmac_clk_csr_set() and dwmac4_core_init() methods to defining the unsigned long clk_rate local variables. Reviewed-by: Andrew Lunn Reviewed-by: Serge Semin Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-3-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 2 +- drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 2 +- include/linux/stmmac.h | 6 +++--- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 901a3c1959fa..2a5b38723635 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -777,7 +777,7 @@ static void ethqos_ptp_clk_freq_config(struct stmmac_priv *priv) netdev_err(priv->dev, "Failed to max out clk_ptp_ref: %d\n", err); plat_dat->clk_ptp_rate = clk_get_rate(plat_dat->clk_ptp_ref); - netdev_dbg(priv->dev, "PTP rate %d\n", plat_dat->clk_ptp_rate); + netdev_dbg(priv->dev, "PTP rate %lu\n", plat_dat->clk_ptp_rate); } static int qcom_ethqos_probe(struct platform_device *pdev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index c25781874aa7..c36f90a782c5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -27,7 +27,7 @@ static void dwmac4_core_init(struct mac_device_info *hw, struct stmmac_priv *priv = netdev_priv(dev); void __iomem *ioaddr = hw->pcsr; u32 value = readl(ioaddr + GMAC_CONFIG); - u32 clk_rate; + unsigned long clk_rate; value |= GMAC_CORE_INIT; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 3cb7ad6ccc4e..d45fd7a3acd5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -301,7 +301,7 @@ static void stmmac_global_err(struct stmmac_priv *priv) */ static void stmmac_clk_csr_set(struct stmmac_priv *priv) { - u32 clk_rate; + unsigned long clk_rate; clk_rate = clk_get_rate(priv->plat->stmmac_clk); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 3ac32444e492..06e07e6e180b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -640,7 +640,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) dev_info(&pdev->dev, "PTP uses main clock\n"); } else { plat->clk_ptp_rate = clk_get_rate(plat->clk_ptp_ref); - dev_dbg(&pdev->dev, "PTP rate %d\n", plat->clk_ptp_rate); + dev_dbg(&pdev->dev, "PTP rate %lu\n", plat->clk_ptp_rate); } plat->stmmac_rst = devm_reset_control_get_optional(&pdev->dev, diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 865d0fe26f98..c9878a612e53 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -252,8 +252,8 @@ struct plat_stmmacenet_data { struct clk *stmmac_clk; struct clk *pclk; struct clk *clk_ptp_ref; - unsigned int clk_ptp_rate; - unsigned int clk_ref_rate; + unsigned long clk_ptp_rate; + unsigned long clk_ref_rate; unsigned int mult_fact_100ns; s32 ptp_max_adj; u32 cdc_error_adj; @@ -265,7 +265,7 @@ struct plat_stmmacenet_data { int mac_port_sel_speed; int has_xgmac; u8 vlan_fail_q; - unsigned int eee_usecs_rate; + unsigned long eee_usecs_rate; struct pci_dev *pdev; int int_snapshot_num; int msi_mac_vec; -- cgit v1.2.3 From 386aa60abdb600a4e5ad818e6dba171685942e54 Mon Sep 17 00:00:00 2001 From: "Jan Petrous (OSS)" Date: Thu, 5 Dec 2024 17:43:01 +0100 Subject: net: phy: Add helper for mapping RGMII link speed to clock rate The RGMII interface supports three data rates: 10/100 Mbps and 1 Gbps. These speeds correspond to clock frequencies of 2.5/25 MHz and 125 MHz, respectively. Many Ethernet drivers, including glues in stmmac, follow a similar pattern of converting RGMII speed to clock frequency. To simplify code, define the helper rgmii_clock(speed) to convert connection speed to clock frequency. Suggested-by: Russell King (Oracle) Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: Jan Petrous (OSS) Link: https://patch.msgid.link/20241205-upstream_s32cc_gmac-v8-4-ec1d180df815@oss.nxp.com Signed-off-by: Jakub Kicinski --- include/linux/phy.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index bb157136351e..e597a32cc787 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -298,6 +298,29 @@ static inline const char *phy_modes(phy_interface_t interface) } } +/** + * rgmii_clock - map link speed to the clock rate + * @speed: link speed value + * + * Description: maps RGMII supported link speeds + * into the clock rates. + * + * Returns: clock rate or negative errno + */ +static inline long rgmii_clock(int speed) +{ + switch (speed) { + case SPEED_10: + return 2500000; + case SPEED_100: + return 25000000; + case SPEED_1000: + return 125000000; + default: + return -EINVAL; + } +} + #define PHY_INIT_TIMEOUT 100000 #define PHY_FORCE_TIMEOUT 10 -- cgit v1.2.3 From c17618cf664ddf54b264ea74df9e8ab3e3ceda3b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 4 Dec 2024 20:18:39 -0800 Subject: scsi: Eliminate scsi_register() and scsi_unregister() usage & docs scsi_mid_low_api.rst refers to scsi_register() and scsi_unregister() but these functions don't exist. They have been replaced by more meaningful names. Update one driver (megaraid_mbox.c) that uses "scsi_unregister" in a warning message. Update scsi_mid_low_api.rst to eliminate references to scsi_register() and scsi_unregister(). Signed-off-by: Randy Dunlap Link: https://lore.kernel.org/r/20241205041839.164404-1-rdunlap@infradead.org Cc: James E.J. Bottomley Cc: Martin K. Petersen Cc: Bart Van Assche Cc: Jonathan Corbet Cc: linux-doc@vger.kernel.org Cc: Kashyap Desai Cc: Sumit Saxena Cc: Shivasharan S Cc: Chandrakanth patil Cc: megaraidlinux.pdl@broadcom.com Signed-off-by: Martin K. Petersen --- Documentation/scsi/scsi_mid_low_api.rst | 55 +++------------------------------ drivers/scsi/megaraid/megaraid_mbox.c | 2 +- include/scsi/scsi_host.h | 2 +- 3 files changed, 7 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/Documentation/scsi/scsi_mid_low_api.rst b/Documentation/scsi/scsi_mid_low_api.rst index 941680487e90..9d15e20b0b5e 100644 --- a/Documentation/scsi/scsi_mid_low_api.rst +++ b/Documentation/scsi/scsi_mid_low_api.rst @@ -101,7 +101,7 @@ supplied functions" below. Those functions in group b) are listed in a section entitled "Interface functions" below. Their function pointers are placed in the members of "struct scsi_host_template", an instance of which is passed to -scsi_host_alloc() [#]_. Those interface functions that the LLD does not +scsi_host_alloc(). Those interface functions that the LLD does not wish to supply should have NULL placed in the corresponding member of struct scsi_host_template. Defining an instance of struct scsi_host_template at file scope will cause NULL to be placed in function @@ -116,9 +116,6 @@ should be static. For example the sdev_init() function in an LLD called "xxx" could be defined as ``static int xxx_sdev_init(struct scsi_device * sdev) { /* code */ }`` -.. [#] the scsi_host_alloc() function is a replacement for the rather vaguely - named scsi_register() function in most situations. - Hotplug initialization model ============================ @@ -302,14 +299,12 @@ Summary: - scsi_host_alloc - return a new scsi_host instance whose refcount==1 - scsi_host_get - increments Scsi_Host instance's refcount - scsi_host_put - decrements Scsi_Host instance's refcount (free if 0) - - scsi_register - create and register a scsi host adapter instance. - scsi_remove_device - detach and remove a SCSI device - scsi_remove_host - detach and remove all SCSI devices owned by host - scsi_report_bus_reset - report scsi _bus_ reset observed - scsi_scan_host - scan SCSI bus - scsi_track_queue_full - track successive QUEUE_FULL events - scsi_unblock_requests - allow further commands to be queued to given host - - scsi_unregister - [calls scsi_host_put()] Details:: @@ -474,27 +469,6 @@ Details:: void scsi_host_put(struct Scsi_Host *shost) - /** - * scsi_register - create and register a scsi host adapter instance. - * @sht: pointer to scsi host template - * @privsize: extra bytes to allocate in hostdata array (which is the - * last member of the returned Scsi_Host instance) - * - * Returns pointer to new Scsi_Host instance or NULL on failure - * - * Might block: yes - * - * Notes: When this call returns to the LLD, the SCSI bus scan on - * this host has _not_ yet been done. - * The hostdata array (by default zero length) is a per host scratch - * area for the LLD. - * - * Defined in: drivers/scsi/hosts.c . - **/ - struct Scsi_Host * scsi_register(struct scsi_host_template * sht, - int privsize) - - /** * scsi_remove_device - detach and remove a SCSI device * @sdev: a pointer to a scsi device instance @@ -524,7 +498,7 @@ Details:: * * Notes: Should only be invoked if the "hotplug initialization * model" is being used. It should be called _prior_ to - * scsi_unregister(). + * calling scsi_host_put(). * * Defined in: drivers/scsi/hosts.c . **/ @@ -601,31 +575,12 @@ Details:: void scsi_unblock_requests(struct Scsi_Host * shost) - /** - * scsi_unregister - unregister and free memory used by host instance - * @shp: pointer to scsi host instance to unregister. - * - * Returns nothing - * - * Might block: no - * - * Notes: Should not be invoked if the "hotplug initialization - * model" is being used. Called internally by exit_this_scsi_driver() - * in the "passive initialization model". Hence a LLD has no need to - * call this function directly. - * - * Defined in: drivers/scsi/hosts.c . - **/ - void scsi_unregister(struct Scsi_Host * shp) - - - Interface Functions =================== Interface functions are supplied (defined) by LLDs and their function pointers are placed in an instance of struct scsi_host_template which -is passed to scsi_host_alloc() or scsi_register(). +is passed to scsi_host_alloc(). Some are mandatory. Interface functions should be declared static. The accepted convention is that driver "xyz" will declare its sdev_configure() function as:: @@ -636,7 +591,7 @@ and so forth for all interface functions listed below. A pointer to this function should be placed in the 'sdev_configure' member of a "struct scsi_host_template" instance. A pointer to such an instance -should be passed to the mid level's scsi_host_alloc() or scsi_register(). +should be passed to the mid level's scsi_host_alloc(). . The interface functions are also described in the include/scsi/scsi_host.h @@ -1111,7 +1066,7 @@ of interest: hostdata[0] - area reserved for LLD at end of struct Scsi_Host. Size is set by the second argument (named 'xtr_bytes') to - scsi_host_alloc() or scsi_register(). + scsi_host_alloc(). vendor_id - a unique value that identifies the vendor supplying the LLD for the Scsi_Host. Used most often in validating diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c index bc867da650b6..9463f11cc4c8 100644 --- a/drivers/scsi/megaraid/megaraid_mbox.c +++ b/drivers/scsi/megaraid/megaraid_mbox.c @@ -621,7 +621,7 @@ megaraid_io_attach(adapter_t *adapter) host = scsi_host_alloc(&megaraid_template_g, 8); if (!host) { con_log(CL_ANN, (KERN_WARNING - "megaraid mbox: scsi_register failed\n")); + "megaraid mbox: scsi_host_alloc failed\n")); return -1; } diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 8fccfd27393e..fac90ae884c7 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -595,7 +595,7 @@ struct Scsi_Host { * have some way of identifying each detected host adapter properly * and uniquely. For hosts that do not support more than one card * in the system at one time, this does not need to be set. It is - * initialized to 0 in scsi_register. + * initialized to 0 in scsi_host_alloc. */ unsigned int unique_id; -- cgit v1.2.3 From 09463346b6c23672cdd451f500d2a23b792bd6f0 Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 15 Nov 2024 19:26:50 +0800 Subject: crypto: hisilicon/zip - add data aggregation feature The zip device adds data aggregation feature, data with the same key can be combined. This patch enables the device data aggregation feature. New feature is called "hashagg" name and registered to the uacce subsystem to allow applications to submit data aggregation operations in user space. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/qm.c | 12 +++--- drivers/crypto/hisilicon/zip/Makefile | 2 +- drivers/crypto/hisilicon/zip/dae_main.c | 70 +++++++++++++++++++++++++++++++++ drivers/crypto/hisilicon/zip/zip.h | 2 + drivers/crypto/hisilicon/zip/zip_main.c | 15 +++++-- include/linux/hisi_acc_qm.h | 3 ++ 6 files changed, 93 insertions(+), 11 deletions(-) create mode 100644 drivers/crypto/hisilicon/zip/dae_main.c (limited to 'include') diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 19c1b5d3c954..97404efa2e41 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -119,6 +119,7 @@ #define QM_SQC_VFT_BASE_MASK_V2 GENMASK(15, 0) #define QM_SQC_VFT_NUM_SHIFT_V2 45 #define QM_SQC_VFT_NUM_MASK_V2 GENMASK(9, 0) +#define QM_MAX_QC_TYPE 2 #define QM_ABNORMAL_INT_SOURCE 0x100000 #define QM_ABNORMAL_INT_MASK 0x100004 @@ -234,8 +235,6 @@ #define QM_QOS_MAX_CIR_U 6 #define QM_AUTOSUSPEND_DELAY 3000 -#define QM_DEV_ALG_MAX_LEN 256 - /* abnormal status value for stopping queue */ #define QM_STOP_QUEUE_FAIL 1 #define QM_DUMP_SQC_FAIL 3 @@ -333,6 +332,7 @@ static const struct hisi_qm_cap_info qm_cap_info_comm[] = { {QM_SUPPORT_STOP_FUNC, 0x3100, 0, BIT(10), 0x0, 0x0, 0x1}, {QM_SUPPORT_MB_COMMAND, 0x3100, 0, BIT(11), 0x0, 0x0, 0x1}, {QM_SUPPORT_SVA_PREFETCH, 0x3100, 0, BIT(14), 0x0, 0x0, 0x1}, + {QM_SUPPORT_DAE, 0x3100, 0, BIT(15), 0x0, 0x0, 0x0}, }; static const struct hisi_qm_cap_info qm_cap_info_pf[] = { @@ -855,10 +855,10 @@ int hisi_qm_set_algs(struct hisi_qm *qm, u64 alg_msk, const struct qm_dev_alg *d strcat(algs, dev_algs[i].alg); ptr = strrchr(algs, '\n'); - if (ptr) { + if (ptr) *ptr = '\0'; - qm->uacce->algs = algs; - } + + qm->uacce->algs = algs; return 0; } @@ -2475,7 +2475,7 @@ static long hisi_qm_uacce_ioctl(struct uacce_queue *q, unsigned int cmd, sizeof(struct hisi_qp_ctx))) return -EFAULT; - if (qp_ctx.qc_type != 0 && qp_ctx.qc_type != 1) + if (qp_ctx.qc_type > QM_MAX_QC_TYPE) return -EINVAL; qm_set_sqctype(q, qp_ctx.qc_type); diff --git a/drivers/crypto/hisilicon/zip/Makefile b/drivers/crypto/hisilicon/zip/Makefile index a936f099ee22..13de020b77d6 100644 --- a/drivers/crypto/hisilicon/zip/Makefile +++ b/drivers/crypto/hisilicon/zip/Makefile @@ -1,2 +1,2 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += hisi_zip.o -hisi_zip-objs = zip_main.o zip_crypto.o +hisi_zip-objs = zip_main.o zip_crypto.o dae_main.o diff --git a/drivers/crypto/hisilicon/zip/dae_main.c b/drivers/crypto/hisilicon/zip/dae_main.c new file mode 100644 index 000000000000..4369c5a7e2d5 --- /dev/null +++ b/drivers/crypto/hisilicon/zip/dae_main.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 HiSilicon Limited. */ + +#include +#include +#include +#include "zip.h" + +/* memory */ +#define DAE_MEM_START_OFFSET 0x331040 +#define DAE_MEM_DONE_OFFSET 0x331044 +#define DAE_MEM_START_MASK 0x1 +#define DAE_MEM_DONE_MASK 0x1 +#define DAE_REG_RD_INTVRL_US 10 +#define DAE_REG_RD_TMOUT_US USEC_PER_SEC + +#define DAE_ALG_NAME "hashagg" + +static inline bool dae_is_support(struct hisi_qm *qm) +{ + if (test_bit(QM_SUPPORT_DAE, &qm->caps)) + return true; + + return false; +} + +int hisi_dae_set_user_domain(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (!dae_is_support(qm)) + return 0; + + val = readl(qm->io_base + DAE_MEM_START_OFFSET); + val |= DAE_MEM_START_MASK; + writel(val, qm->io_base + DAE_MEM_START_OFFSET); + ret = readl_relaxed_poll_timeout(qm->io_base + DAE_MEM_DONE_OFFSET, val, + val & DAE_MEM_DONE_MASK, + DAE_REG_RD_INTVRL_US, DAE_REG_RD_TMOUT_US); + if (ret) + pci_err(qm->pdev, "failed to init dae memory!\n"); + + return ret; +} + +int hisi_dae_set_alg(struct hisi_qm *qm) +{ + size_t len; + + if (!dae_is_support(qm)) + return 0; + + if (!qm->uacce) + return 0; + + len = strlen(qm->uacce->algs); + /* A line break may be required */ + if (len + strlen(DAE_ALG_NAME) + 1 >= QM_DEV_ALG_MAX_LEN) { + pci_err(qm->pdev, "algorithm name is too long!\n"); + return -EINVAL; + } + + if (len) + strcat((char *)qm->uacce->algs, "\n"); + + strcat((char *)qm->uacce->algs, DAE_ALG_NAME); + + return 0; +} diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index 2fecf346c3c9..a44ce7f06786 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -103,4 +103,6 @@ int zip_create_qps(struct hisi_qp **qps, int qp_num, int node); int hisi_zip_register_to_crypto(struct hisi_qm *qm); void hisi_zip_unregister_from_crypto(struct hisi_qm *qm); bool hisi_zip_alg_support(struct hisi_qm *qm, u32 alg); +int hisi_dae_set_user_domain(struct hisi_qm *qm); +int hisi_dae_set_alg(struct hisi_qm *qm); #endif diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 9239b251c2d7..63a18c26c4ea 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -582,7 +582,7 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm) hisi_zip_enable_clock_gate(qm); - return 0; + return hisi_dae_set_user_domain(qm); } static void hisi_zip_master_ooo_ctrl(struct hisi_qm *qm, bool enable) @@ -1301,17 +1301,24 @@ static int hisi_zip_qm_init(struct hisi_qm *qm, struct pci_dev *pdev) ret = zip_pre_store_cap_reg(qm); if (ret) { pci_err(qm->pdev, "Failed to pre-store capability registers!\n"); - hisi_qm_uninit(qm); - return ret; + goto err_qm_uninit; } alg_msk = qm->cap_tables.dev_cap_table[ZIP_ALG_BITMAP].cap_val; ret = hisi_qm_set_algs(qm, alg_msk, zip_dev_algs, ARRAY_SIZE(zip_dev_algs)); if (ret) { pci_err(qm->pdev, "Failed to set zip algs!\n"); - hisi_qm_uninit(qm); + goto err_qm_uninit; } + ret = hisi_dae_set_alg(qm); + if (ret) + goto err_qm_uninit; + + return 0; + +err_qm_uninit: + hisi_qm_uninit(qm); return ret; } diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 6dbd0d49628f..3a13fb719dd0 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -97,6 +97,8 @@ /* page number for queue file region */ #define QM_DOORBELL_PAGE_NR 1 +#define QM_DEV_ALG_MAX_LEN 256 + /* uacce mode of the driver */ #define UACCE_MODE_NOUACCE 0 /* don't use uacce */ #define UACCE_MODE_SVA 1 /* use uacce sva mode */ @@ -156,6 +158,7 @@ enum qm_cap_bits { QM_SUPPORT_MB_COMMAND, QM_SUPPORT_SVA_PREFETCH, QM_SUPPORT_RPM, + QM_SUPPORT_DAE, }; struct qm_dev_alg { -- cgit v1.2.3 From 771ba5c982a28ede1d33de9702c0f3501f1f9e1c Mon Sep 17 00:00:00 2001 From: Weili Qian Date: Fri, 15 Nov 2024 19:26:51 +0800 Subject: crypto: hisilicon/zip - support new error report The error detection of the data aggregation feature is separated from the compression/decompression feature. This patch enables the error detection and reporting of the data aggregation feature. When an unrecoverable error occurs in the algorithm core, the device reports the error to the driver, and the driver will reset the device. Signed-off-by: Weili Qian Signed-off-by: Herbert Xu --- drivers/crypto/hisilicon/hpre/hpre_main.c | 12 ++ drivers/crypto/hisilicon/qm.c | 46 +++++-- drivers/crypto/hisilicon/sec2/sec_main.c | 12 ++ drivers/crypto/hisilicon/zip/dae_main.c | 192 ++++++++++++++++++++++++++++++ drivers/crypto/hisilicon/zip/zip.h | 6 + drivers/crypto/hisilicon/zip/zip_main.c | 36 +++++- include/linux/hisi_acc_qm.h | 2 + 7 files changed, 292 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 96fde9437b4b..5b4c65440d06 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -1396,6 +1396,17 @@ static enum acc_err_result hpre_get_err_result(struct hisi_qm *qm) return ACC_ERR_RECOVERED; } +static bool hpre_dev_is_abnormal(struct hisi_qm *qm) +{ + u32 err_status; + + err_status = hpre_get_hw_err_status(qm); + if (err_status & qm->err_info.dev_shutdown_mask) + return true; + + return false; +} + static void hpre_err_info_init(struct hisi_qm *qm) { struct hisi_qm_err_info *err_info = &qm->err_info; @@ -1428,6 +1439,7 @@ static const struct hisi_qm_err_ini hpre_err_ini = { .show_last_dfx_regs = hpre_show_last_dfx_regs, .err_info_init = hpre_err_info_init, .get_err_result = hpre_get_err_result, + .dev_is_abnormal = hpre_dev_is_abnormal, }; static int hpre_pf_probe_init(struct hpre *hpre) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 97404efa2e41..38193e25b014 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -501,15 +501,20 @@ static u32 qm_get_dev_err_status(struct hisi_qm *qm) /* Check if the error causes the master ooo block */ static bool qm_check_dev_error(struct hisi_qm *qm) { - u32 val, dev_val; + struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); + u32 err_status; - if (qm->fun_type == QM_HW_VF) + if (pf_qm->fun_type == QM_HW_VF) return false; - val = qm_get_hw_error_status(qm) & qm->err_info.qm_shutdown_mask; - dev_val = qm_get_dev_err_status(qm) & qm->err_info.dev_shutdown_mask; + err_status = qm_get_hw_error_status(pf_qm); + if (err_status & pf_qm->err_info.qm_shutdown_mask) + return true; + + if (pf_qm->err_ini->dev_is_abnormal) + return pf_qm->err_ini->dev_is_abnormal(pf_qm); - return val || dev_val; + return false; } static int qm_wait_reset_finish(struct hisi_qm *qm) @@ -654,7 +659,6 @@ EXPORT_SYMBOL_GPL(hisi_qm_mb); /* op 0: set xqc information to hardware, 1: get xqc information from hardware. */ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op) { - struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); struct qm_mailbox mailbox; dma_addr_t xqc_dma; void *tmp_xqc; @@ -688,7 +692,7 @@ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op } /* Setting xqc will fail if master OOO is blocked. */ - if (qm_check_dev_error(pf_qm)) { + if (qm_check_dev_error(qm)) { dev_err(&qm->pdev->dev, "failed to send mailbox since qm is stop!\n"); return -EIO; } @@ -1052,11 +1056,10 @@ static void qm_disable_qp(struct hisi_qm *qm, u32 qp_id) static void qm_reset_function(struct hisi_qm *qm) { - struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); struct device *dev = &qm->pdev->dev; int ret; - if (qm_check_dev_error(pf_qm)) + if (qm_check_dev_error(qm)) return; ret = qm_reset_prepare_ready(qm); @@ -2156,12 +2159,11 @@ static int qm_wait_qp_empty(struct hisi_qm *qm, u32 *state, u32 qp_id) static int qm_drain_qp(struct hisi_qp *qp) { struct hisi_qm *qm = qp->qm; - struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); u32 state = 0; int ret; /* No need to judge if master OOO is blocked. */ - if (qm_check_dev_error(pf_qm)) + if (qm_check_dev_error(qm)) return 0; /* HW V3 supports drain qp by device */ @@ -4137,6 +4139,12 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm) struct pci_dev *pdev = qm->pdev; int ret; + if (qm->err_ini->set_priv_status) { + ret = qm->err_ini->set_priv_status(qm); + if (ret) + return ret; + } + ret = qm_reset_prepare_ready(qm); if (ret) { pci_err(pdev, "Controller reset not ready!\n"); @@ -4527,7 +4535,7 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev) * Check whether there is an ECC mbit error, If it occurs, need to * wait for soft reset to fix it. */ - while (qm_check_dev_error(pf_qm)) { + while (qm_check_dev_error(qm)) { msleep(++delay); if (delay > QM_RESET_WAIT_TIMEOUT) return; @@ -5247,6 +5255,14 @@ static int qm_clear_device(struct hisi_qm *qm) return ret; } + if (qm->err_ini->set_priv_status) { + ret = qm->err_ini->set_priv_status(qm); + if (ret) { + writel(0x0, qm->io_base + ACC_MASTER_GLOBAL_CTRL); + return ret; + } + } + return qm_reset_device(qm); } @@ -5598,6 +5614,12 @@ static int qm_prepare_for_suspend(struct hisi_qm *qm) if (ret) return ret; + if (qm->err_ini->set_priv_status) { + ret = qm->err_ini->set_priv_status(qm); + if (ret) + return ret; + } + ret = qm_set_pf_mse(qm, false); if (ret) pci_err(pdev, "failed to disable MSE before suspending!\n"); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 8ec5333bb5aa..d71594588b85 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -1097,6 +1097,17 @@ static enum acc_err_result sec_get_err_result(struct hisi_qm *qm) return ACC_ERR_RECOVERED; } +static bool sec_dev_is_abnormal(struct hisi_qm *qm) +{ + u32 err_status; + + err_status = sec_get_hw_err_status(qm); + if (err_status & qm->err_info.dev_shutdown_mask) + return true; + + return false; +} + static void sec_err_info_init(struct hisi_qm *qm) { struct hisi_qm_err_info *err_info = &qm->err_info; @@ -1129,6 +1140,7 @@ static const struct hisi_qm_err_ini sec_err_ini = { .show_last_dfx_regs = sec_show_last_dfx_regs, .err_info_init = sec_err_info_init, .get_err_result = sec_get_err_result, + .dev_is_abnormal = sec_dev_is_abnormal, }; static int sec_pf_probe_init(struct sec_dev *sec) diff --git a/drivers/crypto/hisilicon/zip/dae_main.c b/drivers/crypto/hisilicon/zip/dae_main.c index 4369c5a7e2d5..6f22e4c36e49 100644 --- a/drivers/crypto/hisilicon/zip/dae_main.c +++ b/drivers/crypto/hisilicon/zip/dae_main.c @@ -16,6 +16,42 @@ #define DAE_ALG_NAME "hashagg" +/* error */ +#define DAE_AXI_CFG_OFFSET 0x331000 +#define DAE_AXI_SHUTDOWN_MASK (BIT(0) | BIT(5)) +#define DAE_ERR_SOURCE_OFFSET 0x331C84 +#define DAE_ERR_STATUS_OFFSET 0x331C88 +#define DAE_ERR_CE_OFFSET 0x331CA0 +#define DAE_ERR_CE_MASK BIT(3) +#define DAE_ERR_NFE_OFFSET 0x331CA4 +#define DAE_ERR_NFE_MASK 0x17 +#define DAE_ERR_FE_OFFSET 0x331CA8 +#define DAE_ERR_FE_MASK 0 +#define DAE_ECC_MBIT_MASK BIT(2) +#define DAE_ECC_INFO_OFFSET 0x33400C +#define DAE_ERR_SHUTDOWN_OFFSET 0x331CAC +#define DAE_ERR_SHUTDOWN_MASK 0x17 +#define DAE_ERR_ENABLE_OFFSET 0x331C80 +#define DAE_ERR_ENABLE_MASK (DAE_ERR_FE_MASK | DAE_ERR_NFE_MASK | DAE_ERR_CE_MASK) +#define DAE_AM_CTRL_GLOBAL_OFFSET 0x330000 +#define DAE_AM_RETURN_OFFSET 0x330150 +#define DAE_AM_RETURN_MASK 0x3 +#define DAE_AXI_CFG_OFFSET 0x331000 +#define DAE_AXI_SHUTDOWN_EN_MASK (BIT(0) | BIT(5)) + +struct hisi_dae_hw_error { + u32 int_msk; + const char *msg; +}; + +static const struct hisi_dae_hw_error dae_hw_error[] = { + { .int_msk = BIT(0), .msg = "dae_axi_bus_err" }, + { .int_msk = BIT(1), .msg = "dae_axi_poison_err" }, + { .int_msk = BIT(2), .msg = "dae_ecc_2bit_err" }, + { .int_msk = BIT(3), .msg = "dae_ecc_1bit_err" }, + { .int_msk = BIT(4), .msg = "dae_fsm_hbeat_err" }, +}; + static inline bool dae_is_support(struct hisi_qm *qm) { if (test_bit(QM_SUPPORT_DAE, &qm->caps)) @@ -68,3 +104,159 @@ int hisi_dae_set_alg(struct hisi_qm *qm) return 0; } + +static void hisi_dae_master_ooo_ctrl(struct hisi_qm *qm, bool enable) +{ + u32 axi_val, err_val; + + axi_val = readl(qm->io_base + DAE_AXI_CFG_OFFSET); + if (enable) { + axi_val |= DAE_AXI_SHUTDOWN_MASK; + err_val = DAE_ERR_SHUTDOWN_MASK; + } else { + axi_val &= ~DAE_AXI_SHUTDOWN_MASK; + err_val = 0; + } + + writel(axi_val, qm->io_base + DAE_AXI_CFG_OFFSET); + writel(err_val, qm->io_base + DAE_ERR_SHUTDOWN_OFFSET); +} + +void hisi_dae_hw_error_enable(struct hisi_qm *qm) +{ + if (!dae_is_support(qm)) + return; + + /* clear dae hw error source if having */ + writel(DAE_ERR_ENABLE_MASK, qm->io_base + DAE_ERR_SOURCE_OFFSET); + + /* configure error type */ + writel(DAE_ERR_CE_MASK, qm->io_base + DAE_ERR_CE_OFFSET); + writel(DAE_ERR_NFE_MASK, qm->io_base + DAE_ERR_NFE_OFFSET); + writel(DAE_ERR_FE_MASK, qm->io_base + DAE_ERR_FE_OFFSET); + + hisi_dae_master_ooo_ctrl(qm, true); + + /* enable dae hw error interrupts */ + writel(DAE_ERR_ENABLE_MASK, qm->io_base + DAE_ERR_ENABLE_OFFSET); +} + +void hisi_dae_hw_error_disable(struct hisi_qm *qm) +{ + if (!dae_is_support(qm)) + return; + + writel(0, qm->io_base + DAE_ERR_ENABLE_OFFSET); + hisi_dae_master_ooo_ctrl(qm, false); +} + +static u32 hisi_dae_get_hw_err_status(struct hisi_qm *qm) +{ + return readl(qm->io_base + DAE_ERR_STATUS_OFFSET); +} + +static void hisi_dae_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts) +{ + if (!dae_is_support(qm)) + return; + + writel(err_sts, qm->io_base + DAE_ERR_SOURCE_OFFSET); +} + +static void hisi_dae_disable_error_report(struct hisi_qm *qm, u32 err_type) +{ + writel(DAE_ERR_NFE_MASK & (~err_type), qm->io_base + DAE_ERR_NFE_OFFSET); +} + +static void hisi_dae_log_hw_error(struct hisi_qm *qm, u32 err_type) +{ + const struct hisi_dae_hw_error *err = dae_hw_error; + struct device *dev = &qm->pdev->dev; + u32 ecc_info; + size_t i; + + for (i = 0; i < ARRAY_SIZE(dae_hw_error); i++) { + err = &dae_hw_error[i]; + if (!(err->int_msk & err_type)) + continue; + + dev_err(dev, "%s [error status=0x%x] found\n", + err->msg, err->int_msk); + + if (err->int_msk & DAE_ECC_MBIT_MASK) { + ecc_info = readl(qm->io_base + DAE_ECC_INFO_OFFSET); + dev_err(dev, "dae multi ecc sram info 0x%x\n", ecc_info); + } + } +} + +enum acc_err_result hisi_dae_get_err_result(struct hisi_qm *qm) +{ + u32 err_status; + + if (!dae_is_support(qm)) + return ACC_ERR_NONE; + + err_status = hisi_dae_get_hw_err_status(qm); + if (!err_status) + return ACC_ERR_NONE; + + hisi_dae_log_hw_error(qm, err_status); + + if (err_status & DAE_ERR_NFE_MASK) { + /* Disable the same error reporting until device is recovered. */ + hisi_dae_disable_error_report(qm, err_status); + return ACC_ERR_NEED_RESET; + } + hisi_dae_clear_hw_err_status(qm, err_status); + + return ACC_ERR_RECOVERED; +} + +bool hisi_dae_dev_is_abnormal(struct hisi_qm *qm) +{ + u32 err_status; + + if (!dae_is_support(qm)) + return false; + + err_status = hisi_dae_get_hw_err_status(qm); + if (err_status & DAE_ERR_NFE_MASK) + return true; + + return false; +} + +int hisi_dae_close_axi_master_ooo(struct hisi_qm *qm) +{ + u32 val; + int ret; + + if (!dae_is_support(qm)) + return 0; + + val = readl(qm->io_base + DAE_AM_CTRL_GLOBAL_OFFSET); + val |= BIT(0); + writel(val, qm->io_base + DAE_AM_CTRL_GLOBAL_OFFSET); + + ret = readl_relaxed_poll_timeout(qm->io_base + DAE_AM_RETURN_OFFSET, + val, (val == DAE_AM_RETURN_MASK), + DAE_REG_RD_INTVRL_US, DAE_REG_RD_TMOUT_US); + if (ret) + dev_err(&qm->pdev->dev, "failed to close dae axi ooo!\n"); + + return ret; +} + +void hisi_dae_open_axi_master_ooo(struct hisi_qm *qm) +{ + u32 val; + + if (!dae_is_support(qm)) + return; + + val = readl(qm->io_base + DAE_AXI_CFG_OFFSET); + + writel(val & ~DAE_AXI_SHUTDOWN_EN_MASK, qm->io_base + DAE_AXI_CFG_OFFSET); + writel(val | DAE_AXI_SHUTDOWN_EN_MASK, qm->io_base + DAE_AXI_CFG_OFFSET); +} diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h index a44ce7f06786..9fb2a9c01132 100644 --- a/drivers/crypto/hisilicon/zip/zip.h +++ b/drivers/crypto/hisilicon/zip/zip.h @@ -105,4 +105,10 @@ void hisi_zip_unregister_from_crypto(struct hisi_qm *qm); bool hisi_zip_alg_support(struct hisi_qm *qm, u32 alg); int hisi_dae_set_user_domain(struct hisi_qm *qm); int hisi_dae_set_alg(struct hisi_qm *qm); +void hisi_dae_hw_error_disable(struct hisi_qm *qm); +void hisi_dae_hw_error_enable(struct hisi_qm *qm); +void hisi_dae_open_axi_master_ooo(struct hisi_qm *qm); +int hisi_dae_close_axi_master_ooo(struct hisi_qm *qm); +bool hisi_dae_dev_is_abnormal(struct hisi_qm *qm); +enum acc_err_result hisi_dae_get_err_result(struct hisi_qm *qm); #endif diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 63a18c26c4ea..cb17d6cbf6ff 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -631,6 +631,8 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm) /* enable ZIP hw error interrupts */ writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG); + + hisi_dae_hw_error_enable(qm); } static void hisi_zip_hw_error_disable(struct hisi_qm *qm) @@ -643,6 +645,8 @@ static void hisi_zip_hw_error_disable(struct hisi_qm *qm) writel(ce | nfe | HZIP_CORE_INT_RAS_FE_ENB_MASK, qm->io_base + HZIP_CORE_INT_MASK_REG); hisi_zip_master_ooo_ctrl(qm, false); + + hisi_dae_hw_error_disable(qm); } static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file) @@ -1129,6 +1133,8 @@ static void hisi_zip_open_axi_master_ooo(struct hisi_qm *qm) writel(val | HZIP_AXI_SHUTDOWN_ENABLE, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL); + + hisi_dae_open_axi_master_ooo(qm); } static void hisi_zip_close_axi_master_ooo(struct hisi_qm *qm) @@ -1147,8 +1153,11 @@ static void hisi_zip_close_axi_master_ooo(struct hisi_qm *qm) static enum acc_err_result hisi_zip_get_err_result(struct hisi_qm *qm) { + enum acc_err_result zip_result = ACC_ERR_NONE; + enum acc_err_result dae_result; u32 err_status; + /* Get device hardware new error status */ err_status = hisi_zip_get_hw_err_status(qm); if (err_status) { if (err_status & qm->err_info.ecc_2bits_mask) @@ -1159,11 +1168,32 @@ static enum acc_err_result hisi_zip_get_err_result(struct hisi_qm *qm) /* Disable the same error reporting until device is recovered. */ hisi_zip_disable_error_report(qm, err_status); return ACC_ERR_NEED_RESET; + } else { + hisi_zip_clear_hw_err_status(qm, err_status); } - hisi_zip_clear_hw_err_status(qm, err_status); } - return ACC_ERR_RECOVERED; + dae_result = hisi_dae_get_err_result(qm); + + return (zip_result == ACC_ERR_NEED_RESET || + dae_result == ACC_ERR_NEED_RESET) ? + ACC_ERR_NEED_RESET : ACC_ERR_RECOVERED; +} + +static bool hisi_zip_dev_is_abnormal(struct hisi_qm *qm) +{ + u32 err_status; + + err_status = hisi_zip_get_hw_err_status(qm); + if (err_status & qm->err_info.dev_shutdown_mask) + return true; + + return hisi_dae_dev_is_abnormal(qm); +} + +static int hisi_zip_set_priv_status(struct hisi_qm *qm) +{ + return hisi_dae_close_axi_master_ooo(qm); } static void hisi_zip_err_info_init(struct hisi_qm *qm) @@ -1200,6 +1230,8 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = { .show_last_dfx_regs = hisi_zip_show_last_dfx_regs, .err_info_init = hisi_zip_err_info_init, .get_err_result = hisi_zip_get_err_result, + .set_priv_status = hisi_zip_set_priv_status, + .dev_is_abnormal = hisi_zip_dev_is_abnormal, }; static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip) diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h index 3a13fb719dd0..c1dafbabbd6b 100644 --- a/include/linux/hisi_acc_qm.h +++ b/include/linux/hisi_acc_qm.h @@ -269,6 +269,8 @@ struct hisi_qm_err_ini { void (*show_last_dfx_regs)(struct hisi_qm *qm); void (*err_info_init)(struct hisi_qm *qm); enum acc_err_result (*get_err_result)(struct hisi_qm *qm); + bool (*dev_is_abnormal)(struct hisi_qm *qm); + int (*set_priv_status)(struct hisi_qm *qm); }; struct hisi_qm_cap_info { -- cgit v1.2.3 From fbef60de6c753253e1337ea60cf818d079108974 Mon Sep 17 00:00:00 2001 From: Chiara Meiohas Date: Tue, 3 Dec 2024 15:57:11 +0200 Subject: RDMA/mlx5: Extend ODP statistics with operation count The current ODP counters represent the total number of pages handled, but it is not enough to understand the effectiveness of these operations. Extend the ODP counters to include the number of times page fault and invalidation events were handled. Example for a single page fault handling 512 pages: - page_fault: incremented by 512 (total pages) - page_fault_handled: incremented by 1 (operation count) The same example is applicable for page invalidation too. Previous output: $ rdma stat mr dev rocep8s0f0 mrn 8 page_faults 27 page_invalidations 0 page_prefetch 29 New output: $ rdma stat mr dev rocep8s0f0 mrn 21 page_faults 512 page_faults_handled 1 page_invalidations 0 page_invalidations_handled 0 page_prefetch 51200 Signed-off-by: Chiara Meiohas Reviewed-by: Michael Guralnik Link: https://patch.msgid.link/b18f29ed1392996ade66e9e6c45f018925253f6a.1733234165.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 ++++++ drivers/infiniband/hw/mlx5/odp.c | 6 +++--- drivers/infiniband/hw/mlx5/restrack.c | 9 +++++++++ include/rdma/ib_verbs.h | 2 ++ 4 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index a01b592aa716..974a45c92fbb 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -669,6 +669,12 @@ struct mlx5_ib_mkey { #define mlx5_update_odp_stats(mr, counter_name, value) \ atomic64_add(value, &((mr)->odp_stats.counter_name)) +#define mlx5_update_odp_stats_with_handled(mr, counter_name, value) \ + do { \ + mlx5_update_odp_stats(mr, counter_name, value); \ + atomic64_add(1, &((mr)->odp_stats.counter_name##_handled)); \ + } while (0) + struct mlx5_ib_mr { struct ib_mr ibmr; struct mlx5_ib_mkey mmkey; diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 4b37446758fd..4eb03fc0d302 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -313,7 +313,7 @@ static bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, MLX5_IB_UPD_XLT_ZAP | MLX5_IB_UPD_XLT_ATOMIC); - mlx5_update_odp_stats(mr, invalidations, invalidations); + mlx5_update_odp_stats_with_handled(mr, invalidations, invalidations); /* * We are now sure that the device will not access the @@ -997,7 +997,7 @@ next_mr: if (ret < 0) goto end; - mlx5_update_odp_stats(mr, faults, ret); + mlx5_update_odp_stats_with_handled(mr, faults, ret); npages += ret; ret = 0; @@ -1529,7 +1529,7 @@ static void mlx5_ib_mr_memory_pfault_handler(struct mlx5_ib_dev *dev, goto err; } - mlx5_update_odp_stats(mr, faults, ret); + mlx5_update_odp_stats_with_handled(mr, faults, ret); mlx5r_deref_odp_mkey(mmkey); if (pfault->memory.flags & MLX5_MEMORY_PAGE_FAULT_FLAGS_LAST) diff --git a/drivers/infiniband/hw/mlx5/restrack.c b/drivers/infiniband/hw/mlx5/restrack.c index affcf8fe943c..67841922c7b8 100644 --- a/drivers/infiniband/hw/mlx5/restrack.c +++ b/drivers/infiniband/hw/mlx5/restrack.c @@ -95,10 +95,19 @@ static int fill_stat_mr_entry(struct sk_buff *msg, struct ib_mr *ibmr) if (rdma_nl_stat_hwcounter_entry(msg, "page_faults", atomic64_read(&mr->odp_stats.faults))) goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_faults_handled", + atomic64_read(&mr->odp_stats.faults_handled))) + goto err_table; if (rdma_nl_stat_hwcounter_entry( msg, "page_invalidations", atomic64_read(&mr->odp_stats.invalidations))) goto err_table; + if (rdma_nl_stat_hwcounter_entry( + msg, "page_invalidations_handled", + atomic64_read(&mr->odp_stats.invalidations_handled))) + goto err_table; + if (rdma_nl_stat_hwcounter_entry(msg, "page_prefetch", atomic64_read(&mr->odp_stats.prefetch))) goto err_table; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 3417636da960..6ddd5e3bb884 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2256,7 +2256,9 @@ struct rdma_netdev_alloc_params { struct ib_odp_counters { atomic64_t faults; + atomic64_t faults_handled; atomic64_t invalidations; + atomic64_t invalidations_handled; atomic64_t prefetch; }; -- cgit v1.2.3 From ea79df10331218b04d90f028a605f33c879c518d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 25 Nov 2024 14:23:11 +0100 Subject: mmc: core: Drop the MMC_RSP_R1_NO_CRC response The MMC_RSP_R1_NO_CRC type of response is not being used by the mmc core for any commands. Let's therefore drop it, together with the corresponding code in the host drivers. Signed-off-by: Ulf Hansson Acked-by: Wolfram Sang # for TMIO Reviewed-by: Avri Altman Message-ID: <20241125132311.23939-1-ulf.hansson@linaro.org> --- drivers/mmc/host/rtsx_pci_sdmmc.c | 2 -- drivers/mmc/host/rtsx_usb_sdmmc.c | 3 --- drivers/mmc/host/tmio_mmc_core.c | 1 - include/linux/mmc/core.h | 3 --- 4 files changed, 9 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c index 48d3b0aae5a0..0c6eb60a95fd 100644 --- a/drivers/mmc/host/rtsx_pci_sdmmc.c +++ b/drivers/mmc/host/rtsx_pci_sdmmc.c @@ -115,8 +115,6 @@ static int sd_response_type(struct mmc_command *cmd) return SD_RSP_TYPE_R0; case MMC_RSP_R1: return SD_RSP_TYPE_R1; - case MMC_RSP_R1_NO_CRC: - return SD_RSP_TYPE_R1 | SD_NO_CHECK_CRC7; case MMC_RSP_R1B: return SD_RSP_TYPE_R1b; case MMC_RSP_R2: diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c index 107c78df53cf..d229c2b83ea9 100644 --- a/drivers/mmc/host/rtsx_usb_sdmmc.c +++ b/drivers/mmc/host/rtsx_usb_sdmmc.c @@ -313,9 +313,6 @@ static void sd_send_cmd_get_rsp(struct rtsx_usb_sdmmc *host, case MMC_RSP_R1: rsp_type = SD_RSP_TYPE_R1; break; - case MMC_RSP_R1_NO_CRC: - rsp_type = SD_RSP_TYPE_R1 | SD_NO_CHECK_CRC7; - break; case MMC_RSP_R1B: rsp_type = SD_RSP_TYPE_R1b; break; diff --git a/drivers/mmc/host/tmio_mmc_core.c b/drivers/mmc/host/tmio_mmc_core.c index 45a474ccab1c..04c1c54df791 100644 --- a/drivers/mmc/host/tmio_mmc_core.c +++ b/drivers/mmc/host/tmio_mmc_core.c @@ -297,7 +297,6 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, switch (mmc_resp_type(cmd)) { case MMC_RSP_NONE: c |= RESP_NONE; break; case MMC_RSP_R1: - case MMC_RSP_R1_NO_CRC: c |= RESP_R1; break; case MMC_RSP_R1B: c |= RESP_R1B; break; case MMC_RSP_R2: c |= RESP_R2; break; diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 56972bd78462..e13856ab6ad0 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -64,9 +64,6 @@ struct mmc_command { #define MMC_RSP_R6 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R7 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) -/* Can be used by core to poll after switch to MMC HS mode */ -#define MMC_RSP_R1_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE) - #define mmc_resp_type(cmd) ((cmd)->flags & (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC|MMC_RSP_BUSY|MMC_RSP_OPCODE)) /* -- cgit v1.2.3 From ed97550d470d00ebafe9de888fd100cb82d3abb6 Mon Sep 17 00:00:00 2001 From: Andy-ld Lu Date: Tue, 26 Nov 2024 20:48:22 +0800 Subject: mmc: core: Introduce the MMC_RSP_R1B_NO_CRC response The R1B response type with ignoring CRC is used in the mmc_cqe_recovery(), introduce the MMC_RSP_R1B_NO_CRC response type to simplify the code. Signed-off-by: Andy-ld Lu Message-ID: <20241126125041.16071-2-andy-ld.lu@mediatek.com> Signed-off-by: Ulf Hansson --- drivers/mmc/core/core.c | 6 ++---- include/linux/mmc/core.h | 1 + 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index d996d39c0d6f..4ba28a10b2df 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -557,8 +557,7 @@ int mmc_cqe_recovery(struct mmc_host *host) memset(&cmd, 0, sizeof(cmd)); cmd.opcode = MMC_STOP_TRANSMISSION; - cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; - cmd.flags &= ~MMC_RSP_CRC; /* Ignore CRC */ + cmd.flags = MMC_RSP_R1B_NO_CRC | MMC_CMD_AC; /* Ignore CRC */ cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT; mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); @@ -567,8 +566,7 @@ int mmc_cqe_recovery(struct mmc_host *host) memset(&cmd, 0, sizeof(cmd)); cmd.opcode = MMC_CMDQ_TASK_MGMT; cmd.arg = 1; /* Discard entire queue */ - cmd.flags = MMC_RSP_R1B | MMC_CMD_AC; - cmd.flags &= ~MMC_RSP_CRC; /* Ignore CRC */ + cmd.flags = MMC_RSP_R1B_NO_CRC | MMC_CMD_AC; /* Ignore CRC */ cmd.busy_timeout = MMC_CQE_RECOVERY_TIMEOUT; err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES); diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index e13856ab6ad0..01e0f591a20b 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -57,6 +57,7 @@ struct mmc_command { #define MMC_RSP_NONE (0) #define MMC_RSP_R1 (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE) #define MMC_RSP_R1B (MMC_RSP_PRESENT|MMC_RSP_CRC|MMC_RSP_OPCODE|MMC_RSP_BUSY) +#define MMC_RSP_R1B_NO_CRC (MMC_RSP_PRESENT|MMC_RSP_OPCODE|MMC_RSP_BUSY) #define MMC_RSP_R2 (MMC_RSP_PRESENT|MMC_RSP_136|MMC_RSP_CRC) #define MMC_RSP_R3 (MMC_RSP_PRESENT) #define MMC_RSP_R4 (MMC_RSP_PRESENT) -- cgit v1.2.3 From d4cc8912cbff4990940b33cc61a9b09ddaee9704 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Mon, 9 Dec 2024 16:49:56 +0000 Subject: firmware: arm_scmi: Add module aliases to i.MX vendor protocols Using the pattern 'scmi-protocol-0x-' as MODULE_ALIAS allows the SCMI core to autoload this protocol, if built as a module, when its protocol operations are requested by an SCMI driver. Cc: Peng Fan Acked-by: Peng Fan Signed-off-by: Cristian Marussi Message-Id: <20241209164957.1801886-3-cristian.marussi@arm.com> Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/vendors/imx/imx-sm-bbm.c | 5 +++-- drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c | 5 +++-- include/linux/scmi_imx_protocol.h | 9 +++++---- 3 files changed, 11 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-bbm.c b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-bbm.c index 17799eacf06c..aa176c1a5eef 100644 --- a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-bbm.c +++ b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-bbm.c @@ -374,10 +374,11 @@ static const struct scmi_protocol scmi_imx_bbm = { .ops = &scmi_imx_bbm_proto_ops, .events = &scmi_imx_bbm_protocol_events, .supported_version = SCMI_PROTOCOL_SUPPORTED_VERSION, - .vendor_id = "NXP", - .sub_vendor_id = "IMX", + .vendor_id = SCMI_IMX_VENDOR, + .sub_vendor_id = SCMI_IMX_SUBVENDOR, }; module_scmi_protocol(scmi_imx_bbm); +MODULE_ALIAS("scmi-protocol-" __stringify(SCMI_PROTOCOL_IMX_BBM) "-" SCMI_IMX_VENDOR); MODULE_DESCRIPTION("i.MX SCMI BBM driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c index a86ab9b35953..83b69fc4fba5 100644 --- a/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c +++ b/drivers/firmware/arm_scmi/vendors/imx/imx-sm-misc.c @@ -309,10 +309,11 @@ static const struct scmi_protocol scmi_imx_misc = { .ops = &scmi_imx_misc_proto_ops, .events = &scmi_imx_misc_protocol_events, .supported_version = SCMI_PROTOCOL_SUPPORTED_VERSION, - .vendor_id = "NXP", - .sub_vendor_id = "IMX", + .vendor_id = SCMI_IMX_VENDOR, + .sub_vendor_id = SCMI_IMX_SUBVENDOR, }; module_scmi_protocol(scmi_imx_misc); +MODULE_ALIAS("scmi-protocol-" __stringify(SCMI_PROTOCOL_IMX_MISC) "-" SCMI_IMX_VENDOR); MODULE_DESCRIPTION("i.MX SCMI MISC driver"); MODULE_LICENSE("GPL"); diff --git a/include/linux/scmi_imx_protocol.h b/include/linux/scmi_imx_protocol.h index 066216f1357a..53b356a26414 100644 --- a/include/linux/scmi_imx_protocol.h +++ b/include/linux/scmi_imx_protocol.h @@ -13,10 +13,11 @@ #include #include -enum scmi_nxp_protocol { - SCMI_PROTOCOL_IMX_BBM = 0x81, - SCMI_PROTOCOL_IMX_MISC = 0x84, -}; +#define SCMI_PROTOCOL_IMX_BBM 0x81 +#define SCMI_PROTOCOL_IMX_MISC 0x84 + +#define SCMI_IMX_VENDOR "NXP" +#define SCMI_IMX_SUBVENDOR "IMX" struct scmi_imx_bbm_proto_ops { int (*rtc_time_set)(const struct scmi_protocol_handle *ph, u32 id, -- cgit v1.2.3 From a94204f4d48e28a711b7ed10399f749286c433e3 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:15 -0500 Subject: fsnotify: opt-in for permission events at file open time Legacy inotify/fanotify listeners can add watches for events on inode, parent or mount and expect to get events (e.g. FS_MODIFY) on files that were already open at the time of setting up the watches. fanotify permission events are typically used by Anti-malware sofware, that is watching the entire mount and it is not common to have more that one Anti-malware engine installed on a system. To reduce the overhead of the fsnotify_file_perm() hooks on every file access, relax the semantics of the legacy FAN_ACCESS_PERM event to generate events only if there were *any* permission event listeners on the filesystem at the time that the file was opened. The new semantic is implemented by extending the FMODE_NONOTIFY bit into two FMODE_NONOTIFY_* bits, that are used to store a mode for which of the events types to report. This is going to apply to the new fanotify pre-content events in order to reduce the cost of the new pre-content event vfs hooks. [Thanks to Bert Karwatzki for reporting a bug in this code with CONFIG_FANOTIFY_ACCESS_PERMISSIONS disabled] Suggested-by: Linus Torvalds Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wj8L=mtcRTi=NECHMGfZQgXOp_uix1YVh04fEmrKaMnXA@mail.gmail.com/ Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/5ea5f8e283d1edb55aa79c35187bfe344056af14.1731684329.git.josef@toxicpanda.com --- fs/notify/fsnotify.c | 38 ++++++++++++++++++++++++++++++++++++++ fs/open.c | 8 +++++++- include/linux/fs.h | 43 ++++++++++++++++++++++++++++++++++++++----- include/linux/fsnotify.h | 39 +++++++++++++++++++++++---------------- 4 files changed, 106 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index f976949d2634..569ec356e4ce 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -623,6 +623,44 @@ out: } EXPORT_SYMBOL_GPL(fsnotify); +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +/* + * At open time we check fsnotify_sb_has_priority_watchers() and set the + * FMODE_NONOTIFY_ mode bits accordignly. + * Later, fsnotify permission hooks do not check if there are permission event + * watches, but that there were permission event watches at open time. + */ +void file_set_fsnotify_mode(struct file *file) +{ + struct super_block *sb = file->f_path.dentry->d_sb; + + /* Is it a file opened by fanotify? */ + if (FMODE_FSNOTIFY_NONE(file->f_mode)) + return; + + /* + * Permission events is a super set of pre-content events, so if there + * are no permission event watchers, there are also no pre-content event + * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit. + */ + if (likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_CONTENT))) { + file->f_mode |= FMODE_NONOTIFY_PERM; + return; + } + + /* + * If there are permission event watchers but no pre-content event + * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. + */ + if (likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT))) { + file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; + return; + } +} +#endif + static __init int fsnotify_init(void) { int ret; diff --git a/fs/open.c b/fs/open.c index c3490286092e..1a9483872e1f 100644 --- a/fs/open.c +++ b/fs/open.c @@ -901,7 +901,7 @@ static int do_dentry_open(struct file *f, f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH | FMODE_OPENED; + f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY; f->f_op = &empty_fops; return 0; } @@ -929,6 +929,12 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* + * Set FMODE_NONOTIFY_* bits according to existing permission watches. + * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't + * change anything. + */ + file_set_fsnotify_mode(f); error = fsnotify_open_perm(f); if (error) goto cleanup_all; diff --git a/include/linux/fs.h b/include/linux/fs.h index 93c2b720271e..5f7ac5b548a4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -173,13 +173,20 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) -/* FMODE_* bit 24 */ - /* File is embedded in backing_file object */ -#define FMODE_BACKING ((__force fmode_t)(1 << 25)) +#define FMODE_BACKING ((__force fmode_t)(1 << 24)) + +/* + * Together with FMODE_NONOTIFY_PERM defines which fsnotify events shouldn't be + * generated (see below) + */ +#define FMODE_NONOTIFY ((__force fmode_t)(1 << 25)) -/* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) +/* + * Together with FMODE_NONOTIFY defines which fsnotify events shouldn't be + * generated (see below) + */ +#define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) @@ -190,6 +197,32 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) +/* + * The two FMODE_NONOTIFY* define which fsnotify events should not be generated + * for a file. These are the possible values of (f->f_mode & + * FMODE_FSNOTIFY_MASK) and their meaning: + * + * FMODE_NONOTIFY - suppress all (incl. non-permission) events. + * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events. + * FMODE_NONOTIFY | FMODE_NONOTIFY_PERM - suppress only pre-content events. + */ +#define FMODE_FSNOTIFY_MASK \ + (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM) + +#define FMODE_FSNOTIFY_NONE(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == FMODE_NONOTIFY) +#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +#define FMODE_FSNOTIFY_PERM(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == 0 || \ + (mode & FMODE_FSNOTIFY_MASK) == (FMODE_NONOTIFY | FMODE_NONOTIFY_PERM)) +#define FMODE_FSNOTIFY_HSM(mode) \ + ((mode & FMODE_FSNOTIFY_MASK) == 0) +#else +#define FMODE_FSNOTIFY_PERM(mode) 0 +#define FMODE_FSNOTIFY_HSM(mode) 0 +#endif + + /* * Attribute flags. These should be or-ed together to figure out what * has been changed! diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 278620e063ab..8d1849137a96 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -108,38 +108,35 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } -static inline int fsnotify_file(struct file *file, __u32 mask) +static inline int fsnotify_path(const struct path *path, __u32 mask) { - const struct path *path; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); +} +static inline int fsnotify_file(struct file *file, __u32 mask) +{ /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ - if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) + if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; - path = &file->f_path; - /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ - if (mask & ALL_FSNOTIFY_PERM_EVENTS && - !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, - FSNOTIFY_PRIO_CONTENT)) - return 0; - - return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); + return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS + +void file_set_fsnotify_mode(struct file *file); + /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { - __u32 fsnotify_mask = FS_ACCESS_PERM; - /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection @@ -150,7 +147,10 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, if (!(perm_mask & MAY_READ)) return 0; - return fsnotify_file(file, fsnotify_mask); + if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) + return 0; + + return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } /* @@ -168,16 +168,23 @@ static inline int fsnotify_open_perm(struct file *file) { int ret; + if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) + return 0; + if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } - return fsnotify_file(file, FS_OPEN_PERM); + return fsnotify_path(&file->f_path, FS_OPEN_PERM); } #else +static inline void file_set_fsnotify_mode(struct file *file) +{ +} + static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { -- cgit v1.2.3 From 318652e07fa5b1743d08eeccd69a1f47f2c15710 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:16 -0500 Subject: fsnotify: check if file is actually being watched for pre-content events on open So far, we set FMODE_NONOTIFY_ flags at open time if we know that there are no permission event watchers at all on the filesystem, but lack of FMODE_NONOTIFY_ flags does not mean that the file is actually watched. For pre-content events, it is possible to optimize things so that we don't bother trying to send pre-content events if file was not watched (through sb, mnt, parent or inode itself) on open. Set FMODE_NONOTIFY_ flags according to that. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/2ddcc9f8d1fde48d085318a6b5a889289d8871d8.1731684329.git.josef@toxicpanda.com --- fs/notify/fsnotify.c | 29 ++++++++++++++++++++++++++--- include/linux/fsnotify_backend.h | 3 +++ 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 569ec356e4ce..9e483fface1e 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -193,7 +193,7 @@ static bool fsnotify_event_needs_parent(struct inode *inode, __u32 mnt_mask, return mask & marks_mask; } -/* Are there any inode/mount/sb objects that are interested in this event? */ +/* Are there any inode/mount/sb objects that watch for these events? */ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, __u32 mask) { @@ -632,7 +632,9 @@ EXPORT_SYMBOL_GPL(fsnotify); */ void file_set_fsnotify_mode(struct file *file) { - struct super_block *sb = file->f_path.dentry->d_sb; + struct dentry *dentry = file->f_path.dentry, *parent; + struct super_block *sb = dentry->d_sb; + __u32 mnt_mask, p_mask; /* Is it a file opened by fanotify? */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) @@ -653,11 +655,32 @@ void file_set_fsnotify_mode(struct file *file) * If there are permission event watchers but no pre-content event * watchers, set FMODE_NONOTIFY | FMODE_NONOTIFY_PERM to indicate that. */ - if (likely(!fsnotify_sb_has_priority_watchers(sb, + if ((!d_is_dir(dentry) && !d_is_reg(dentry)) || + likely(!fsnotify_sb_has_priority_watchers(sb, FSNOTIFY_PRIO_PRE_CONTENT))) { file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; return; } + + /* + * OK, there are some pre-content watchers. Check if anybody is + * watching for pre-content events on *this* file. + */ + mnt_mask = READ_ONCE(real_mount(file->f_path.mnt)->mnt_fsnotify_mask); + if (unlikely(fsnotify_object_watched(d_inode(dentry), mnt_mask, + FSNOTIFY_PRE_CONTENT_EVENTS))) + return; + + /* Is parent watching for pre-content events on this file? */ + if (dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED) { + parent = dget_parent(dentry); + p_mask = fsnotify_inode_watches_children(d_inode(parent)); + dput(parent); + if (p_mask & FSNOTIFY_PRE_CONTENT_EVENTS) + return; + } + /* Nobody watching for pre-content events from this file */ + file->f_mode |= FMODE_NONOTIFY | FMODE_NONOTIFY_PERM; } #endif diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 3ecf7768e577..9c105244815d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -77,6 +77,9 @@ */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) +/* Pre-content events can be used to fill file content */ +#define FSNOTIFY_PRE_CONTENT_EVENTS 0 + #define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ FS_OPEN_EXEC_PERM) -- cgit v1.2.3 From 0a076036b631f086a6bce93a45eaa216f234f121 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:19 -0500 Subject: fanotify: reserve event bit of deprecated FAN_DIR_MODIFY Avoid reusing it, because we would like to reserve it for future FAN_PATH_MODIFY pre-content event. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/632d9f80428e2e7a6b6a8ccc2925d87c92bbb518.1731684329.git.josef@toxicpanda.com --- include/linux/fsnotify_backend.h | 1 + include/uapi/linux/fanotify.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9c105244815d..c38762b62bf1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -55,6 +55,7 @@ #define FS_OPEN_PERM 0x00010000 /* open event in an permission hook */ #define FS_ACCESS_PERM 0x00020000 /* access event in a permissions hook */ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ +/* #define FS_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ /* * Set on inode mark that cares about things that happen to its children. diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 34f221d3a1b9..79072b6894f2 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -25,6 +25,7 @@ #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ -- cgit v1.2.3 From f156524e5d72c81792eee81f828784dc8a37a7f2 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:20 -0500 Subject: fsnotify: introduce pre-content permission events The new FS_PRE_ACCESS permission event is similar to FS_ACCESS_PERM, but it meant for a different use case of filling file content before access to a file range, so it has slightly different semantics. Generate FS_PRE_ACCESS/FS_ACCESS_PERM as two seperate events, so content scanners could inspect the content filled by pre-content event handler. Unlike FS_ACCESS_PERM, FS_PRE_ACCESS is also called before a file is modified by syscalls as write() and fallocate(). FS_ACCESS_PERM is reported also on blockdev and pipes, but the new pre-content events are only reported for regular files and dirs. The pre-content events are meant to be used by hierarchical storage managers that want to fill the content of files on first access. There are some specific requirements from filesystems that could be used with pre-content events, so add a flag for fs to opt-in for pre-content events explicitly before they can be used. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/b934c5e3af205abc4e0e4709f6486815937ddfdf.1731684329.git.josef@toxicpanda.com --- fs/notify/fsnotify.c | 2 +- include/linux/fs.h | 1 + include/linux/fsnotify.h | 19 ++++++++++++++++++- include/linux/fsnotify_backend.h | 11 ++++++++--- security/selinux/hooks.c | 3 ++- 5 files changed, 30 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 9e483fface1e..32b461c5d04b 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -688,7 +688,7 @@ static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 23); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/include/linux/fs.h b/include/linux/fs.h index 5f7ac5b548a4..3f4d59464965 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1265,6 +1265,7 @@ extern int send_sigurg(struct file *file); #define SB_I_RETIRED 0x00000800 /* superblock shouldn't be reused */ #define SB_I_NOUMASK 0x00001000 /* VFS does not apply umask */ #define SB_I_NOIDMAP 0x00002000 /* No idmapped mounts on this superblock */ +#define SB_I_ALLOW_HSM 0x00004000 /* Allow HSM events on this superblock */ /* Possible states of 'frozen' field */ enum { diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 8d1849137a96..d91aa064f0e4 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -144,12 +144,29 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, */ lockdep_assert_once(file_write_not_started(file)); - if (!(perm_mask & MAY_READ)) + if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) return 0; if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) return 0; + /* + * read()/write() and other types of access generate pre-content events. + */ + if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { + int ret = fsnotify_path(&file->f_path, FS_PRE_ACCESS); + + if (ret) + return ret; + } + + if (!(perm_mask & MAY_READ)) + return 0; + + /* + * read() also generates the legacy FS_ACCESS_PERM event, so content + * scanners can inspect the content filled by pre-content event. + */ return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c38762b62bf1..9bda354b5538 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -57,6 +57,8 @@ #define FS_OPEN_EXEC_PERM 0x00040000 /* open/exec event in a permission hook */ /* #define FS_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ +#define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */ + /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. @@ -78,11 +80,14 @@ */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) +/* Content events can be used to inspect file content */ +#define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \ + FS_ACCESS_PERM) /* Pre-content events can be used to fill file content */ -#define FSNOTIFY_PRE_CONTENT_EVENTS 0 +#define FSNOTIFY_PRE_CONTENT_EVENTS (FS_PRE_ACCESS) -#define ALL_FSNOTIFY_PERM_EVENTS (FS_OPEN_PERM | FS_ACCESS_PERM | \ - FS_OPEN_EXEC_PERM) +#define ALL_FSNOTIFY_PERM_EVENTS (FSNOTIFY_CONTENT_PERM_EVENTS | \ + FSNOTIFY_PRE_CONTENT_EVENTS) /* * This is a list of all events that may get sent to a parent that is watching diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f5a08f94e094..97a2c04c2b37 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3404,7 +3404,8 @@ static int selinux_path_notify(const struct path *path, u64 mask, perm |= FILE__WATCH_WITH_PERM; /* watches on read-like events need the file:watch_reads permission */ - if (mask & (FS_ACCESS | FS_ACCESS_PERM | FS_CLOSE_NOWRITE)) + if (mask & (FS_ACCESS | FS_ACCESS_PERM | FS_PRE_ACCESS | + FS_CLOSE_NOWRITE)) perm |= FILE__WATCH_READS; return path_has_perm(current_cred(), path, perm); -- cgit v1.2.3 From 9740d17162deca7138fad7dcf3ef52324832c32b Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:21 -0500 Subject: fsnotify: pass optional file access range in pre-content event We would like to add file range information to pre-content events. Pass a struct file_range with offset and length to event handler along with pre-content permission event. The offset and length are aligned to page size, but we may need to align them to minimum folio size for filesystems with large block size. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/88eddee301231d814aede27fb4d5b41ae37c9702.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.c | 11 +++++++++-- fs/notify/fanotify/fanotify.h | 2 ++ fs/notify/fsnotify.c | 18 ++++++++++++++++++ include/linux/fsnotify.h | 4 ++-- include/linux/fsnotify_backend.h | 40 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 24c7c5df4998..2e6ba94ec405 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -548,9 +548,13 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, return &pevent->fae; } -static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, +static struct fanotify_event *fanotify_alloc_perm_event(const void *data, + int data_type, gfp_t gfp) { + const struct path *path = fsnotify_data_path(data, data_type); + const struct file_range *range = + fsnotify_data_file_range(data, data_type); struct fanotify_perm_event *pevent; pevent = kmem_cache_alloc(fanotify_perm_event_cachep, gfp); @@ -564,6 +568,9 @@ static struct fanotify_event *fanotify_alloc_perm_event(const struct path *path, pevent->hdr.len = 0; pevent->state = FAN_EVENT_INIT; pevent->path = *path; + /* NULL ppos means no range info */ + pevent->ppos = range ? &range->pos : NULL; + pevent->count = range ? range->count : 0; path_get(path); return &pevent->fae; @@ -801,7 +808,7 @@ static struct fanotify_event *fanotify_alloc_event( old_memcg = set_active_memcg(group->memcg); if (fanotify_is_perm_event(mask)) { - event = fanotify_alloc_perm_event(path, gfp); + event = fanotify_alloc_perm_event(data, data_type, gfp); } else if (fanotify_is_error_event(mask)) { event = fanotify_alloc_error_event(group, fsid, data, data_type, &hash); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index e5ab33cae6a7..93598b7d5952 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -425,6 +425,8 @@ FANOTIFY_PE(struct fanotify_event *event) struct fanotify_perm_event { struct fanotify_event fae; struct path path; + const loff_t *ppos; /* optional file range info */ + size_t count; u32 response; /* userspace answer to the event */ unsigned short state; /* state of the event */ int fd; /* fd we passed to userspace for this event */ diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 32b461c5d04b..8ee495a58d0a 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -203,6 +203,24 @@ static inline bool fsnotify_object_watched(struct inode *inode, __u32 mnt_mask, return mask & marks_mask & ALL_FSNOTIFY_EVENTS; } +/* Report pre-content event with optional range info */ +int fsnotify_pre_content(const struct path *path, const loff_t *ppos, + size_t count) +{ + struct file_range range; + + /* Report page aligned range only when pos is known */ + if (!ppos) + return fsnotify_path(path, FS_PRE_ACCESS); + + range.path = path; + range.pos = PAGE_ALIGN_DOWN(*ppos); + range.count = PAGE_ALIGN(*ppos + count) - range.pos; + + return fsnotify_parent(path->dentry, FS_PRE_ACCESS, &range, + FSNOTIFY_EVENT_FILE_RANGE); +} + /* * Notify this dentry's parent about a child's events with child name info * if parent is watching or if inode/sb/mount are interested in events with diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index d91aa064f0e4..87044acf8e79 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -154,7 +154,7 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, * read()/write() and other types of access generate pre-content events. */ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { - int ret = fsnotify_path(&file->f_path, FS_PRE_ACCESS); + int ret = fsnotify_pre_content(&file->f_path, ppos, count); if (ret) return ret; @@ -171,7 +171,7 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, } /* - * fsnotify_file_perm - permission hook before file access + * fsnotify_file_perm - permission hook before file access (unknown range) */ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 9bda354b5538..0d24a21a8e60 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -294,6 +294,7 @@ static inline void fsnotify_group_assert_locked(struct fsnotify_group *group) /* When calling fsnotify tell it if the data is a path or inode */ enum fsnotify_data_type { FSNOTIFY_EVENT_NONE, + FSNOTIFY_EVENT_FILE_RANGE, FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, FSNOTIFY_EVENT_DENTRY, @@ -306,6 +307,17 @@ struct fs_error_report { struct super_block *sb; }; +struct file_range { + const struct path *path; + loff_t pos; + size_t count; +}; + +static inline const struct path *file_range_path(const struct file_range *range) +{ + return range->path; +} + static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { @@ -315,6 +327,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) return d_inode(data); case FSNOTIFY_EVENT_PATH: return d_inode(((const struct path *)data)->dentry); + case FSNOTIFY_EVENT_FILE_RANGE: + return d_inode(file_range_path(data)->dentry); case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *)data)->inode; default: @@ -330,6 +344,8 @@ static inline struct dentry *fsnotify_data_dentry(const void *data, int data_typ return (struct dentry *)data; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data)->dentry; default: return NULL; } @@ -341,6 +357,8 @@ static inline const struct path *fsnotify_data_path(const void *data, switch (data_type) { case FSNOTIFY_EVENT_PATH: return data; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data); default: return NULL; } @@ -356,6 +374,8 @@ static inline struct super_block *fsnotify_data_sb(const void *data, return ((struct dentry *)data)->d_sb; case FSNOTIFY_EVENT_PATH: return ((const struct path *)data)->dentry->d_sb; + case FSNOTIFY_EVENT_FILE_RANGE: + return file_range_path(data)->dentry->d_sb; case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *) data)->sb; default: @@ -375,6 +395,18 @@ static inline struct fs_error_report *fsnotify_data_error_report( } } +static inline const struct file_range *fsnotify_data_file_range( + const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_FILE_RANGE: + return (struct file_range *)data; + default: + return NULL; + } +} + /* * Index to merged marks iterator array that correlates to a type of watch. * The type of watched object can be deduced from the iterator type, but not @@ -863,9 +895,17 @@ static inline void fsnotify_init_event(struct fsnotify_event *event) { INIT_LIST_HEAD(&event->list); } +int fsnotify_pre_content(const struct path *path, const loff_t *ppos, + size_t count); #else +static inline int fsnotify_pre_content(const struct path *path, + const loff_t *ppos, size_t count) +{ + return 0; +} + static inline int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, struct inode *inode, u32 cookie) -- cgit v1.2.3 From 4acf3bc76e521b47acebcefc6312c97992f4ca29 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:22 -0500 Subject: fsnotify: generate pre-content permission event on truncate Generate FS_PRE_ACCESS event before truncate, without sb_writers held. Move the security hooks also before sb_start_write() to conform with other security hooks (e.g. in write, fallocate). The event will have a range info of the page surrounding the new size to provide an opportunity to fill the conetnt at the end of file before truncating to non-page aligned size. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/23af8201db6ac2efdea94f09ab067d81ba5de7a7.1731684329.git.josef@toxicpanda.com --- fs/open.c | 31 +++++++++++++++++++++---------- include/linux/fsnotify.h | 20 ++++++++++++++++++++ 2 files changed, 41 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/open.c b/fs/open.c index 1a9483872e1f..d11d373dca80 100644 --- a/fs/open.c +++ b/fs/open.c @@ -81,14 +81,18 @@ long vfs_truncate(const struct path *path, loff_t length) if (!S_ISREG(inode->i_mode)) return -EINVAL; - error = mnt_want_write(path->mnt); - if (error) - goto out; - idmap = mnt_idmap(path->mnt); error = inode_permission(idmap, inode, MAY_WRITE); if (error) - goto mnt_drop_write_and_out; + return error; + + error = fsnotify_truncate_perm(path, length); + if (error) + return error; + + error = mnt_want_write(path->mnt); + if (error) + return error; error = -EPERM; if (IS_APPEND(inode)) @@ -114,7 +118,7 @@ put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: mnt_drop_write(path->mnt); -out: + return error; } EXPORT_SYMBOL_GPL(vfs_truncate); @@ -175,11 +179,18 @@ long do_ftruncate(struct file *file, loff_t length, int small) /* Check IS_APPEND on real upper inode */ if (IS_APPEND(file_inode(file))) return -EPERM; - sb_start_write(inode->i_sb); + error = security_file_truncate(file); - if (!error) - error = do_truncate(file_mnt_idmap(file), dentry, length, - ATTR_MTIME | ATTR_CTIME, file); + if (error) + return error; + + error = fsnotify_truncate_perm(&file->f_path, length); + if (error) + return error; + + sb_start_write(inode->i_sb); + error = do_truncate(file_mnt_idmap(file), dentry, length, + ATTR_MTIME | ATTR_CTIME, file); sb_end_write(inode->i_sb); return error; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 87044acf8e79..1a9ef8f6784d 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -170,6 +170,21 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } +/* + * fsnotify_truncate_perm - permission hook before file truncate + */ +static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) +{ + struct inode *inode = d_inode(path->dentry); + + if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) || + !fsnotify_sb_has_priority_watchers(inode->i_sb, + FSNOTIFY_PRIO_PRE_CONTENT)) + return 0; + + return fsnotify_pre_content(path, &length, 0); +} + /* * fsnotify_file_perm - permission hook before file access (unknown range) */ @@ -208,6 +223,11 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, return 0; } +static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) +{ + return 0; +} + static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; -- cgit v1.2.3 From 4f8afa33817a6420398d1c177c6e220a05081f51 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:23 -0500 Subject: fanotify: introduce FAN_PRE_ACCESS permission event Similar to FAN_ACCESS_PERM permission event, but it is only allowed with class FAN_CLASS_PRE_CONTENT and only allowed on regular files and dirs. Unlike FAN_ACCESS_PERM, it is safe to write to the file being accessed in the context of the event handler. This pre-content event is meant to be used by hierarchical storage managers that want to fill the content of files on first read access. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/b80986f8d5b860acea2c9a73c0acd93587be5fe4.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.c | 3 ++- fs/notify/fanotify/fanotify_user.c | 35 ++++++++++++++++++++++++++++++----- include/linux/fanotify.h | 14 ++++++++++---- include/uapi/linux/fanotify.h | 2 ++ 4 files changed, 44 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 2e6ba94ec405..da6c3c1c7edf 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -916,8 +916,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM); BUILD_BUG_ON(FAN_FS_ERROR != FS_ERROR); BUILD_BUG_ON(FAN_RENAME != FS_RENAME); + BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 21); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 456cc3e92c88..08e4d8659ef5 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -1287,7 +1287,7 @@ static int fanotify_group_init_error_pool(struct fsnotify_group *group) } static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, - unsigned int fan_flags) + __u32 mask, unsigned int fan_flags) { /* * Non evictable mark cannot be downgraded to evictable mark. @@ -1314,6 +1314,11 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark, fsn_mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY) return -EEXIST; + /* For now pre-content events are not generated for directories */ + mask |= fsn_mark->mask; + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EEXIST; + return 0; } @@ -1340,7 +1345,7 @@ static int fanotify_add_mark(struct fsnotify_group *group, /* * Check if requested mark flags conflict with an existing mark flags. */ - ret = fanotify_may_update_existing_mark(fsn_mark, fan_flags); + ret = fanotify_may_update_existing_mark(fsn_mark, mask, fan_flags); if (ret) goto out; @@ -1640,11 +1645,23 @@ static int fanotify_events_supported(struct fsnotify_group *group, unsigned int flags) { unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS; + bool is_dir = d_is_dir(path->dentry); /* Strict validation of events in non-dir inode mask with v5.17+ APIs */ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) || (mask & FAN_RENAME) || (flags & FAN_MARK_IGNORE); + /* + * Filesystems need to opt-into pre-content evnets (a.k.a HSM) + * and they are only supported on regular files and directories. + */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS) { + if (!(path->mnt->mnt_sb->s_iflags & SB_I_ALLOW_HSM)) + return -EOPNOTSUPP; + if (!is_dir && !d_is_reg(path->dentry)) + return -EINVAL; + } + /* * Some filesystems such as 'proc' acquire unusual locks when opening * files. For them fanotify permission events have high chances of @@ -1677,7 +1694,7 @@ static int fanotify_events_supported(struct fsnotify_group *group, * but because we always allowed it, error only when using new APIs. */ if (strict_dir_events && mark_type == FAN_MARK_INODE && - !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) + !is_dir && (mask & FANOTIFY_DIRONLY_EVENT_BITS)) return -ENOTDIR; return 0; @@ -1778,10 +1795,14 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EPERM; /* - * Permission events require minimum priority FAN_CLASS_CONTENT. + * Permission events are not allowed for FAN_CLASS_NOTIF. + * Pre-content permission events are not allowed for FAN_CLASS_CONTENT. */ if (mask & FANOTIFY_PERM_EVENTS && - group->priority < FSNOTIFY_PRIO_CONTENT) + group->priority == FSNOTIFY_PRIO_NORMAL) + return -EINVAL; + else if (mask & FANOTIFY_PRE_CONTENT_EVENTS && + group->priority == FSNOTIFY_PRIO_CONTENT) return -EINVAL; if (mask & FAN_FS_ERROR && @@ -1816,6 +1837,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, if (mask & FAN_RENAME && !(fid_mode & FAN_REPORT_NAME)) return -EINVAL; + /* Pre-content events are not currently generated for directories. */ + if (mask & FANOTIFY_PRE_CONTENT_EVENTS && mask & FAN_ONDIR) + return -EINVAL; + if (mark_cmd == FAN_MARK_FLUSH) { if (mark_type == FAN_MARK_MOUNT) fsnotify_clear_vfsmount_marks_by_group(group); diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 89ff45bd6f01..c747af064d2c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -89,6 +89,16 @@ #define FANOTIFY_DIRENT_EVENTS (FAN_MOVE | FAN_CREATE | FAN_DELETE | \ FAN_RENAME) +/* Content events can be used to inspect file content */ +#define FANOTIFY_CONTENT_PERM_EVENTS (FAN_OPEN_PERM | FAN_OPEN_EXEC_PERM | \ + FAN_ACCESS_PERM) +/* Pre-content events can be used to fill file content */ +#define FANOTIFY_PRE_CONTENT_EVENTS (FAN_PRE_ACCESS) + +/* Events that require a permission response from user */ +#define FANOTIFY_PERM_EVENTS (FANOTIFY_CONTENT_PERM_EVENTS | \ + FANOTIFY_PRE_CONTENT_EVENTS) + /* Events that can be reported with event->fd */ #define FANOTIFY_FD_EVENTS (FANOTIFY_PATH_EVENTS | FANOTIFY_PERM_EVENTS) @@ -104,10 +114,6 @@ FANOTIFY_INODE_EVENTS | \ FANOTIFY_ERROR_EVENTS) -/* Events that require a permission response from user */ -#define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \ - FAN_OPEN_EXEC_PERM) - /* Extra flags that may be reported with event or control handling of events */ #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 79072b6894f2..7596168c80eb 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -27,6 +27,8 @@ #define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ /* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ + #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ #define FAN_RENAME 0x10000000 /* File was renamed */ -- cgit v1.2.3 From 870499bc1d4dc04cba1f63dd5e7bc02b983e2458 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:24 -0500 Subject: fanotify: report file range info with pre-content events With group class FAN_CLASS_PRE_CONTENT, report offset and length info along with FAN_PRE_ACCESS pre-content events. This information is meant to be used by hierarchical storage managers that want to fill partial content of files on first access to range. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/b90a9e6c809dd3cad5684da90f23ea93ec6ce8c8.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.h | 8 ++++++++ fs/notify/fanotify/fanotify_user.c | 38 ++++++++++++++++++++++++++++++++++++++ include/uapi/linux/fanotify.h | 8 ++++++++ 3 files changed, 54 insertions(+) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 93598b7d5952..7f06355afa1f 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -448,6 +448,14 @@ static inline bool fanotify_is_perm_event(u32 mask) mask & FANOTIFY_PERM_EVENTS; } +static inline bool fanotify_event_has_access_range(struct fanotify_event *event) +{ + if (!(event->mask & FANOTIFY_PRE_CONTENT_EVENTS)) + return false; + + return FANOTIFY_PERM(event)->ppos; +} + static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse) { return container_of(fse, struct fanotify_event, fse); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 08e4d8659ef5..6ef3cc7de5e4 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -121,6 +121,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; sizeof(struct fanotify_event_info_pidfd) #define FANOTIFY_ERROR_INFO_LEN \ (sizeof(struct fanotify_event_info_error)) +#define FANOTIFY_RANGE_INFO_LEN \ + (sizeof(struct fanotify_event_info_range)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -180,6 +182,9 @@ static size_t fanotify_event_len(unsigned int info_mode, if (info_mode & FAN_REPORT_PIDFD) event_len += FANOTIFY_PIDFD_INFO_LEN; + if (fanotify_event_has_access_range(event)) + event_len += FANOTIFY_RANGE_INFO_LEN; + return event_len; } @@ -516,6 +521,30 @@ static int copy_pidfd_info_to_user(int pidfd, return info_len; } +static size_t copy_range_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_perm_event *pevent = FANOTIFY_PERM(event); + struct fanotify_event_info_range info = { }; + size_t info_len = FANOTIFY_RANGE_INFO_LEN; + + if (WARN_ON_ONCE(info_len > count)) + return -EFAULT; + + if (WARN_ON_ONCE(!pevent->ppos)) + return -EINVAL; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_RANGE; + info.hdr.len = info_len; + info.offset = *(pevent->ppos); + info.count = pevent->count; + + if (copy_to_user(buf, &info, info_len)) + return -EFAULT; + + return info_len; +} + static int copy_info_records_to_user(struct fanotify_event *event, struct fanotify_info *info, unsigned int info_mode, int pidfd, @@ -637,6 +666,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_event_has_access_range(event)) { + ret = copy_range_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 7596168c80eb..0636a9c85dd0 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -146,6 +146,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_DFID 3 #define FAN_EVENT_INFO_TYPE_PIDFD 4 #define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 /* Special info types for FAN_RENAME */ #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 @@ -192,6 +193,13 @@ struct fanotify_event_info_error { __u32 error_count; }; +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + /* * User space may need to record additional information about its decision. * The extra information type records what kind of information is included. -- cgit v1.2.3 From b4b2ff4f61ded819bfa22e50fdec7693f51cbbee Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Fri, 15 Nov 2024 10:30:25 -0500 Subject: fanotify: allow to set errno in FAN_DENY permission response With FAN_DENY response, user trying to perform the filesystem operation gets an error with errno set to EPERM. It is useful for hierarchical storage management (HSM) service to be able to deny access for reasons more diverse than EPERM, for example EAGAIN, if HSM could retry the operation later. Allow fanotify groups with priority FAN_CLASSS_PRE_CONTENT to responsd to permission events with the response value FAN_DENY_ERRNO(errno), instead of FAN_DENY to return a custom error. Limit custom error values to errors expected on read(2)/write(2) and open(2) of regular files. This list could be extended in the future. Userspace can test for legitimate values of FAN_DENY_ERRNO(errno) by writing a response to an fanotify group fd with a value of FAN_NOFD in the fd field of the response. The change in fanotify_response is backward compatible, because errno is written in the high 8 bits of the 32bit response field and old kernels reject respose value with high bits set. Signed-off-by: Amir Goldstein Signed-off-by: Jan Kara Link: https://patch.msgid.link/1e5fb6af84b69ca96b5c849fa5f10bdf4d1dc414.1731684329.git.josef@toxicpanda.com --- fs/notify/fanotify/fanotify.c | 17 +++++++++++++---- fs/notify/fanotify/fanotify.h | 5 +++++ fs/notify/fanotify/fanotify_user.c | 29 +++++++++++++++++++++++++++-- include/linux/fanotify.h | 4 +++- include/uapi/linux/fanotify.h | 7 +++++++ 5 files changed, 55 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index da6c3c1c7edf..95646f7c46ca 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -223,7 +223,7 @@ static int fanotify_get_response(struct fsnotify_group *group, struct fanotify_perm_event *event, struct fsnotify_iter_info *iter_info) { - int ret; + int ret, errno; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -262,14 +262,23 @@ static int fanotify_get_response(struct fsnotify_group *group, ret = 0; break; case FAN_DENY: + /* Check custom errno from pre-content events */ + errno = fanotify_get_response_errno(event->response); + if (errno) { + ret = -errno; + break; + } + fallthrough; default: ret = -EPERM; } /* Check if the response should be audited */ - if (event->response & FAN_AUDIT) - audit_fanotify(event->response & ~FAN_AUDIT, - &event->audit_rule); + if (event->response & FAN_AUDIT) { + u32 response = event->response & + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS); + audit_fanotify(response & ~FAN_AUDIT, &event->audit_rule); + } pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__, group, event, ret); diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 7f06355afa1f..c12cbc270539 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -528,3 +528,8 @@ static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark) return mflags; } + +static inline u32 fanotify_get_response_errno(int res) +{ + return (res >> FAN_ERRNO_SHIFT) & FAN_ERRNO_MASK; +} diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6ef3cc7de5e4..19435cd2c41f 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -327,11 +327,12 @@ static int process_access_response(struct fsnotify_group *group, struct fanotify_perm_event *event; int fd = response_struct->fd; u32 response = response_struct->response; + int errno = fanotify_get_response_errno(response); int ret = info_len; struct fanotify_response_info_audit_rule friar; - pr_debug("%s: group=%p fd=%d response=%u buf=%p size=%zu\n", __func__, - group, fd, response, info, info_len); + pr_debug("%s: group=%p fd=%d response=%x errno=%d buf=%p size=%zu\n", + __func__, group, fd, response, errno, info, info_len); /* * make sure the response is valid, if invalid we do nothing and either * userspace can send a valid response or we will clean it up after the @@ -342,7 +343,31 @@ static int process_access_response(struct fsnotify_group *group, switch (response & FANOTIFY_RESPONSE_ACCESS) { case FAN_ALLOW: + if (errno) + return -EINVAL; + break; case FAN_DENY: + /* Custom errno is supported only for pre-content groups */ + if (errno && group->priority != FSNOTIFY_PRIO_PRE_CONTENT) + return -EINVAL; + + /* + * Limit errno to values expected on open(2)/read(2)/write(2) + * of regular files. + */ + switch (errno) { + case 0: + case EIO: + case EPERM: + case EBUSY: + case ETXTBSY: + case EAGAIN: + case ENOSPC: + case EDQUOT: + break; + default: + return -EINVAL; + } break; default: return -EINVAL; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index c747af064d2c..78f660ebc318 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -132,7 +132,9 @@ /* These masks check for invalid bits in permission responses. */ #define FANOTIFY_RESPONSE_ACCESS (FAN_ALLOW | FAN_DENY) #define FANOTIFY_RESPONSE_FLAGS (FAN_AUDIT | FAN_INFO) -#define FANOTIFY_RESPONSE_VALID_MASK (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS) +#define FANOTIFY_RESPONSE_VALID_MASK \ + (FANOTIFY_RESPONSE_ACCESS | FANOTIFY_RESPONSE_FLAGS | \ + (FAN_ERRNO_MASK << FAN_ERRNO_SHIFT)) /* Do not use these old uapi constants internally */ #undef FAN_ALL_CLASS_BITS diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index 0636a9c85dd0..bd8167979707 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -235,6 +235,13 @@ struct fanotify_response_info_audit_rule { /* Legit userspace responses to a _PERM event */ #define FAN_ALLOW 0x01 #define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + #define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ #define FAN_INFO 0x20 /* Bitmask to indicate additional information */ -- cgit v1.2.3 From b04b981f3a842ef63e06048fafaa8d20c20334c6 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 30 Nov 2024 17:39:37 +0100 Subject: pmdomain: core: Support naming idle states Commit 422f2d418186 ("arm64: dts: qcom: Drop undocumented domain "idle-state-name"") brought to light the common misbelief that idle-state-names also applies to e.g. PSCI power domain idle states. Make that a reality, mimicking the property name used by cpuidle states. Signed-off-by: Konrad Dybcio Message-ID: <20241130-topic-idle_state_name-v1-2-d0ff67b0c8e9@oss.qualcomm.com> Signed-off-by: Ulf Hansson --- drivers/pmdomain/core.c | 15 ++++++++++++--- include/linux/pm_domain.h | 1 + 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/pmdomain/core.c b/drivers/pmdomain/core.c index bb11f467dc78..6d61bcf9b21e 100644 --- a/drivers/pmdomain/core.c +++ b/drivers/pmdomain/core.c @@ -3174,6 +3174,8 @@ static int genpd_parse_state(struct genpd_power_state *genpd_state, if (!err) genpd_state->residency_ns = 1000LL * residency; + of_property_read_string(state_node, "idle-state-name", &genpd_state->name); + genpd_state->power_on_latency_ns = 1000LL * exit_latency; genpd_state->power_off_latency_ns = 1000LL * entry_latency; genpd_state->fwnode = &state_node->fwnode; @@ -3452,7 +3454,10 @@ static int idle_states_show(struct seq_file *s, void *data) seq_puts(s, "State Time Spent(ms) Usage Rejected\n"); for (i = 0; i < genpd->state_count; i++) { - idle_time += genpd->states[i].idle_time; + struct genpd_power_state *state = &genpd->states[i]; + char state_name[15]; + + idle_time += state->idle_time; if (genpd->status == GENPD_STATE_OFF && genpd->state_idx == i) { now = ktime_get_mono_fast_ns(); @@ -3462,9 +3467,13 @@ static int idle_states_show(struct seq_file *s, void *data) } } + if (!state->name) + snprintf(state_name, ARRAY_SIZE(state_name), "S%-13d", i); + do_div(idle_time, NSEC_PER_MSEC); - seq_printf(s, "S%-13i %-14llu %-14llu %llu\n", i, idle_time, - genpd->states[i].usage, genpd->states[i].rejected); + seq_printf(s, "%-14s %-14llu %-14llu %llu\n", + state->name ?: state_name, idle_time, + state->usage, state->rejected); } genpd_unlock(genpd); diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 45646bfcaf1a..1aab31370065 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -147,6 +147,7 @@ struct genpd_governor_data { }; struct genpd_power_state { + const char *name; s64 power_off_latency_ns; s64 power_on_latency_ns; s64 residency_ns; -- cgit v1.2.3 From 7d5265ffcd8b41da5e09066360540d6e0716e9cd Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 12 Nov 2024 10:28:26 -0500 Subject: rseq: Validate read-only fields under DEBUG_RSEQ config The rseq uapi requires cooperation between users of the rseq fields to ensure that all libraries and applications using rseq within a process do not interfere with each other. This is especially important for fields which are meant to be read-only from user-space, as documented in uapi/linux/rseq.h: - cpu_id_start, - cpu_id, - node_id, - mm_cid. Storing to those fields from a user-space library prevents any sharing of the rseq ABI with other libraries and applications, as other users are not aware that the content of those fields has been altered by a third-party library. This is unfortunately the current behavior of tcmalloc: it purposefully overlaps part of a cached value with the cpu_id_start upper bits to get notified about preemption, because the kernel clears those upper bits before returning to user-space. This behavior does not conform to the rseq uapi header ABI. This prevents tcmalloc from using rseq when rseq is registered by the GNU C library 2.35+. It requires tcmalloc users to disable glibc rseq registration with a glibc tunable, which is a sad state of affairs. Considering that tcmalloc and the GNU C library are the two first upstream projects using rseq, and that they are already incompatible due to use of this hack, adding kernel-level validation of all read-only fields content is necessary to ensure future users of rseq abide by the rseq ABI requirements. Validate that user-space does not corrupt the read-only fields and conform to the rseq uapi header ABI when the kernel is built with CONFIG_DEBUG_RSEQ=y. This is done by storing a copy of the read-only fields in the task_struct, and validating the prior values present in user-space before updating them. If the values do not match, print a warning on the console (printk_ratelimited()). This is a first step to identify misuses of the rseq ABI by printing a warning on the console. After a giving some time to userspace to correct its use of rseq, the plan is to eventually terminate offending processes with SIGSEGV. This change is expected to produce warnings for the upstream tcmalloc implementation, but tcmalloc developers mentioned they were open to adapt their implementation to kernel-level change. Signed-off-by: Mathieu Desnoyers Signed-off-by: Peter Zijlstra (Intel) Link: https://github.com/google/tcmalloc/issues/144 --- include/linux/sched.h | 9 +++++ kernel/rseq.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index d380bffee2ef..b5916be49f62 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1367,6 +1367,15 @@ struct task_struct { * with respect to preemption. */ unsigned long rseq_event_mask; +# ifdef CONFIG_DEBUG_RSEQ + /* + * This is a place holder to save a copy of the rseq fields for + * validation of read-only fields. The struct rseq has a + * variable-length array at the end, so it cannot be used + * directly. Reserve a size large enough for the known fields. + */ + char rseq_fields[sizeof(struct rseq)]; +# endif #endif #ifdef CONFIG_SCHED_MM_CID diff --git a/kernel/rseq.c b/kernel/rseq.c index 9de6e35fe679..e04bb30a2eb8 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #define CREATE_TRACE_POINTS @@ -25,6 +26,78 @@ RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL | \ RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE) +#ifdef CONFIG_DEBUG_RSEQ +static struct rseq *rseq_kernel_fields(struct task_struct *t) +{ + return (struct rseq *) t->rseq_fields; +} + +static int rseq_validate_ro_fields(struct task_struct *t) +{ + static DEFINE_RATELIMIT_STATE(_rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + u32 cpu_id_start, cpu_id, node_id, mm_cid; + struct rseq __user *rseq = t->rseq; + + /* + * Validate fields which are required to be read-only by + * user-space. + */ + if (!user_read_access_begin(rseq, t->rseq_len)) + goto efault; + unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end); + unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end); + unsafe_get_user(node_id, &rseq->node_id, efault_end); + unsafe_get_user(mm_cid, &rseq->mm_cid, efault_end); + user_read_access_end(); + + if ((cpu_id_start != rseq_kernel_fields(t)->cpu_id_start || + cpu_id != rseq_kernel_fields(t)->cpu_id || + node_id != rseq_kernel_fields(t)->node_id || + mm_cid != rseq_kernel_fields(t)->mm_cid) && __ratelimit(&_rs)) { + + pr_warn("Detected rseq corruption for pid: %d, name: %s\n" + "\tcpu_id_start: %u ?= %u\n" + "\tcpu_id: %u ?= %u\n" + "\tnode_id: %u ?= %u\n" + "\tmm_cid: %u ?= %u\n", + t->pid, t->comm, + cpu_id_start, rseq_kernel_fields(t)->cpu_id_start, + cpu_id, rseq_kernel_fields(t)->cpu_id, + node_id, rseq_kernel_fields(t)->node_id, + mm_cid, rseq_kernel_fields(t)->mm_cid); + } + + /* For now, only print a console warning on mismatch. */ + return 0; + +efault_end: + user_read_access_end(); +efault: + return -EFAULT; +} + +static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id, + u32 node_id, u32 mm_cid) +{ + rseq_kernel_fields(t)->cpu_id_start = cpu_id; + rseq_kernel_fields(t)->cpu_id = cpu_id; + rseq_kernel_fields(t)->node_id = node_id; + rseq_kernel_fields(t)->mm_cid = mm_cid; +} +#else +static int rseq_validate_ro_fields(struct task_struct *t) +{ + return 0; +} + +static void rseq_set_ro_fields(struct task_struct *t, u32 cpu_id_start, u32 cpu_id, + u32 node_id, u32 mm_cid) +{ +} +#endif + /* * * Restartable sequences are a lightweight interface that allows @@ -92,6 +165,11 @@ static int rseq_update_cpu_node_id(struct task_struct *t) u32 node_id = cpu_to_node(cpu_id); u32 mm_cid = task_mm_cid(t); + /* + * Validate read-only rseq fields. + */ + if (rseq_validate_ro_fields(t)) + goto efault; WARN_ON_ONCE((int) mm_cid < 0); if (!user_write_access_begin(rseq, t->rseq_len)) goto efault; @@ -105,6 +183,7 @@ static int rseq_update_cpu_node_id(struct task_struct *t) * t->rseq_len != ORIG_RSEQ_SIZE. */ user_write_access_end(); + rseq_set_ro_fields(t, cpu_id, cpu_id, node_id, mm_cid); trace_rseq_update(t); return 0; @@ -119,6 +198,11 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t) u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0, mm_cid = 0; + /* + * Validate read-only rseq fields. + */ + if (!rseq_validate_ro_fields(t)) + return -EFAULT; /* * Reset cpu_id_start to its initial state (0). */ @@ -141,6 +225,9 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t) */ if (put_user(mm_cid, &t->rseq->mm_cid)) return -EFAULT; + + rseq_set_ro_fields(t, cpu_id_start, cpu_id, node_id, mm_cid); + /* * Additional feature fields added after ORIG_RSEQ_SIZE * need to be conditionally reset only if @@ -423,6 +510,17 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, current->rseq = rseq; current->rseq_len = rseq_len; current->rseq_sig = sig; +#ifdef CONFIG_DEBUG_RSEQ + /* + * Initialize the in-kernel rseq fields copy for validation of + * read-only fields. + */ + if (get_user(rseq_kernel_fields(current)->cpu_id_start, &rseq->cpu_id_start) || + get_user(rseq_kernel_fields(current)->cpu_id, &rseq->cpu_id) || + get_user(rseq_kernel_fields(current)->node_id, &rseq->node_id) || + get_user(rseq_kernel_fields(current)->mm_cid, &rseq->mm_cid)) + return -EFAULT; +#endif /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields -- cgit v1.2.3 From 3c48780d48df029cf9d5f42b8971663e6fb975ae Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 4 Dec 2024 11:48:05 -0800 Subject: of: Hide of_default_bus_match_table[] This isn't used outside this file. Hide the array in the C file. Signed-off-by: Stephen Boyd Acked-by: Saravana Kannan Link: https://lore.kernel.org/r/20241204194806.2665589-1-swboyd@chromium.org Signed-off-by: Rob Herring (Arm) --- drivers/of/platform.c | 23 +++++++++++------------ include/linux/of_platform.h | 2 -- 2 files changed, 11 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 9bafcff3e628..c6d8afb284e8 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -24,16 +24,6 @@ #include "of_private.h" -const struct of_device_id of_default_bus_match_table[] = { - { .compatible = "simple-bus", }, - { .compatible = "simple-mfd", }, - { .compatible = "isa", }, -#ifdef CONFIG_ARM_AMBA - { .compatible = "arm,amba-bus", }, -#endif /* CONFIG_ARM_AMBA */ - {} /* Empty terminated list */ -}; - /** * of_find_device_by_node - Find the platform_device associated with a node * @np: Pointer to device tree node @@ -484,8 +474,17 @@ int of_platform_default_populate(struct device_node *root, const struct of_dev_auxdata *lookup, struct device *parent) { - return of_platform_populate(root, of_default_bus_match_table, lookup, - parent); + static const struct of_device_id match_table[] = { + { .compatible = "simple-bus", }, + { .compatible = "simple-mfd", }, + { .compatible = "isa", }, +#ifdef CONFIG_ARM_AMBA + { .compatible = "arm,amba-bus", }, +#endif /* CONFIG_ARM_AMBA */ + {} /* Empty terminated list */ + }; + + return of_platform_populate(root, match_table, lookup, parent); } EXPORT_SYMBOL_GPL(of_platform_default_populate); diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index a2ff1ad48f7f..17471ef8e092 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -47,8 +47,6 @@ struct of_dev_auxdata { { .compatible = _compat, .phys_addr = _phys, .name = _name, \ .platform_data = _pdata } -extern const struct of_device_id of_default_bus_match_table[]; - /* Platform drivers register/unregister */ extern struct platform_device *of_device_alloc(struct device_node *np, const char *bus_id, -- cgit v1.2.3 From 549de562d794a42bb647952e965e588390e16fe0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Dec 2024 21:18:57 -0600 Subject: ACPI: platform-profile: Add a name member to handlers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to prepare for allowing multiple handlers, introduce a name field that can be used to distinguish between different handlers. Tested-by: Mark Pearson Tested-by: Matthew Schwartz Reviewed-by: Hans de Goede Reviewed-by: Mark Pearson Reviewed-by: Maximilian Luz Reviewed-by: Ilpo Järvinen Reviewed-by: Armin Wolf Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20241206031918.1537-2-mario.limonciello@amd.com Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_platform_profile.c | 1 + drivers/platform/x86/acer-wmi.c | 1 + drivers/platform/x86/amd/pmf/sps.c | 1 + drivers/platform/x86/asus-wmi.c | 1 + drivers/platform/x86/dell/alienware-wmi.c | 1 + drivers/platform/x86/dell/dell-pc.c | 1 + drivers/platform/x86/hp/hp-wmi.c | 1 + drivers/platform/x86/ideapad-laptop.c | 1 + drivers/platform/x86/inspur_platform_profile.c | 1 + drivers/platform/x86/thinkpad_acpi.c | 1 + include/linux/platform_profile.h | 1 + 11 files changed, 11 insertions(+) (limited to 'include') diff --git a/drivers/platform/surface/surface_platform_profile.c b/drivers/platform/surface/surface_platform_profile.c index 08db878f1d7d..9d3e3f945818 100644 --- a/drivers/platform/surface/surface_platform_profile.c +++ b/drivers/platform/surface/surface_platform_profile.c @@ -211,6 +211,7 @@ static int surface_platform_profile_probe(struct ssam_device *sdev) tpd->sdev = sdev; + tpd->handler.name = "Surface Platform Profile"; tpd->handler.profile_get = ssam_platform_profile_get; tpd->handler.profile_set = ssam_platform_profile_set; diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index d09baa3d3d90..53fbc9b4d3df 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1878,6 +1878,7 @@ static int acer_platform_profile_setup(void) if (quirks->predator_v4) { int err; + platform_profile_handler.name = "acer-wmi"; platform_profile_handler.profile_get = acer_predator_v4_platform_profile_get; platform_profile_handler.profile_set = diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index 92f7fb22277d..e2d0cc92c439 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -405,6 +405,7 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev) amd_pmf_set_sps_power_limits(dev); } + dev->pprof.name = "amd-pmf"; dev->pprof.profile_get = amd_pmf_profile_get; dev->pprof.profile_set = amd_pmf_profile_set; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index ba8b6d028f9f..a08dc936276b 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -3876,6 +3876,7 @@ static int platform_profile_setup(struct asus_wmi *asus) dev_info(dev, "Using throttle_thermal_policy for platform_profile support\n"); + asus->platform_profile_handler.name = "asus-wmi"; asus->platform_profile_handler.profile_get = asus_wmi_platform_profile_get; asus->platform_profile_handler.profile_set = asus_wmi_platform_profile_set; diff --git a/drivers/platform/x86/dell/alienware-wmi.c b/drivers/platform/x86/dell/alienware-wmi.c index 77465ed9b449..f88d89844751 100644 --- a/drivers/platform/x86/dell/alienware-wmi.c +++ b/drivers/platform/x86/dell/alienware-wmi.c @@ -1156,6 +1156,7 @@ static int create_thermal_profile(void) pp_handler.profile_get = thermal_profile_get; pp_handler.profile_set = thermal_profile_set; + pp_handler.name = "alienware-wmi"; return platform_profile_register(&pp_handler); } diff --git a/drivers/platform/x86/dell/dell-pc.c b/drivers/platform/x86/dell/dell-pc.c index 972385ca1990..3cf79e55e312 100644 --- a/drivers/platform/x86/dell/dell-pc.c +++ b/drivers/platform/x86/dell/dell-pc.c @@ -247,6 +247,7 @@ static int thermal_init(void) thermal_handler = kzalloc(sizeof(*thermal_handler), GFP_KERNEL); if (!thermal_handler) return -ENOMEM; + thermal_handler->name = "dell-pc"; thermal_handler->profile_get = thermal_platform_profile_get; thermal_handler->profile_set = thermal_platform_profile_set; diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 81ccc96ffe40..26cac73caf2b 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -1624,6 +1624,7 @@ static int thermal_profile_setup(void) set_bit(PLATFORM_PROFILE_COOL, platform_profile_handler.choices); } + platform_profile_handler.name = "hp-wmi"; set_bit(PLATFORM_PROFILE_BALANCED, platform_profile_handler.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, platform_profile_handler.choices); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 5d2c1f0d1e9f..cbdc965eeb24 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1102,6 +1102,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) mutex_init(&priv->dytc->mutex); + priv->dytc->pprof.name = "ideapad-laptop"; priv->dytc->priv = priv; priv->dytc->pprof.profile_get = dytc_profile_get; priv->dytc->pprof.profile_set = dytc_profile_set; diff --git a/drivers/platform/x86/inspur_platform_profile.c b/drivers/platform/x86/inspur_platform_profile.c index 8440defa6788..03da2c8cf678 100644 --- a/drivers/platform/x86/inspur_platform_profile.c +++ b/drivers/platform/x86/inspur_platform_profile.c @@ -177,6 +177,7 @@ static int inspur_wmi_probe(struct wmi_device *wdev, const void *context) priv->wdev = wdev; dev_set_drvdata(&wdev->dev, priv); + priv->handler.name = "inspur-wmi"; priv->handler.profile_get = inspur_platform_profile_get; priv->handler.profile_set = inspur_platform_profile_set; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 6371a9f765c1..0e03c41e028d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10571,6 +10571,7 @@ static void dytc_profile_refresh(void) } static struct platform_profile_handler dytc_profile = { + .name = "thinkpad-acpi", .profile_get = dytc_profile_get, .profile_set = dytc_profile_set, }; diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index f5492ed413f3..6fa988e41742 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -27,6 +27,7 @@ enum platform_profile_option { }; struct platform_profile_handler { + const char *name; unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; int (*profile_get)(struct platform_profile_handler *pprof, enum platform_profile_option *profile); -- cgit v1.2.3 From 6f5e63ddc333dae371be6f8a8f70a82043697a4c Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Dec 2024 21:18:59 -0600 Subject: ACPI: platform_profile: Add device pointer into platform profile handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to let platform profile handlers manage platform profile for their driver the core code will need a pointer to the device. Add this to the structure and use it in the trivial driver cases. Reviewed-by: Armin Wolf Reviewed-by: Ilpo Järvinen Reviewed-by: Maximilian Luz Reviewed-by: Mark Pearson Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20241206031918.1537-4-mario.limonciello@amd.com Signed-off-by: Ilpo Järvinen --- drivers/platform/surface/surface_platform_profile.c | 1 + drivers/platform/x86/acer-wmi.c | 5 +++-- drivers/platform/x86/amd/pmf/sps.c | 1 + drivers/platform/x86/asus-wmi.c | 1 + drivers/platform/x86/dell/alienware-wmi.c | 5 +++-- drivers/platform/x86/dell/dell-pc.c | 1 + drivers/platform/x86/hp/hp-wmi.c | 5 +++-- drivers/platform/x86/ideapad-laptop.c | 1 + drivers/platform/x86/inspur_platform_profile.c | 1 + drivers/platform/x86/thinkpad_acpi.c | 1 + include/linux/platform_profile.h | 1 + 11 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/platform/surface/surface_platform_profile.c b/drivers/platform/surface/surface_platform_profile.c index 9d3e3f945818..b73cfdd920c6 100644 --- a/drivers/platform/surface/surface_platform_profile.c +++ b/drivers/platform/surface/surface_platform_profile.c @@ -212,6 +212,7 @@ static int surface_platform_profile_probe(struct ssam_device *sdev) tpd->sdev = sdev; tpd->handler.name = "Surface Platform Profile"; + tpd->handler.dev = &sdev->dev; tpd->handler.profile_get = ssam_platform_profile_get; tpd->handler.profile_set = ssam_platform_profile_set; diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index 53fbc9b4d3df..aca4a5746bee 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1873,12 +1873,13 @@ acer_predator_v4_platform_profile_set(struct platform_profile_handler *pprof, return 0; } -static int acer_platform_profile_setup(void) +static int acer_platform_profile_setup(struct platform_device *device) { if (quirks->predator_v4) { int err; platform_profile_handler.name = "acer-wmi"; + platform_profile_handler.dev = &device->dev; platform_profile_handler.profile_get = acer_predator_v4_platform_profile_get; platform_profile_handler.profile_set = @@ -2531,7 +2532,7 @@ static int acer_platform_probe(struct platform_device *device) goto error_rfkill; if (has_cap(ACER_CAP_PLATFORM_PROFILE)) { - err = acer_platform_profile_setup(); + err = acer_platform_profile_setup(device); if (err) goto error_platform_profile; } diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index e2d0cc92c439..1b94af7c0e0c 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -406,6 +406,7 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev) } dev->pprof.name = "amd-pmf"; + dev->pprof.dev = dev->dev; dev->pprof.profile_get = amd_pmf_profile_get; dev->pprof.profile_set = amd_pmf_profile_set; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index a08dc936276b..14d84cad01b4 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -3877,6 +3877,7 @@ static int platform_profile_setup(struct asus_wmi *asus) dev_info(dev, "Using throttle_thermal_policy for platform_profile support\n"); asus->platform_profile_handler.name = "asus-wmi"; + asus->platform_profile_handler.dev = dev; asus->platform_profile_handler.profile_get = asus_wmi_platform_profile_get; asus->platform_profile_handler.profile_set = asus_wmi_platform_profile_set; diff --git a/drivers/platform/x86/dell/alienware-wmi.c b/drivers/platform/x86/dell/alienware-wmi.c index f88d89844751..78aac7613278 100644 --- a/drivers/platform/x86/dell/alienware-wmi.c +++ b/drivers/platform/x86/dell/alienware-wmi.c @@ -1108,7 +1108,7 @@ static int thermal_profile_set(struct platform_profile_handler *pprof, return wmax_thermal_control(supported_thermal_profiles[profile]); } -static int create_thermal_profile(void) +static int create_thermal_profile(struct platform_device *platform_device) { u32 out_data; u8 sys_desc[4]; @@ -1157,6 +1157,7 @@ static int create_thermal_profile(void) pp_handler.profile_get = thermal_profile_get; pp_handler.profile_set = thermal_profile_set; pp_handler.name = "alienware-wmi"; + pp_handler.dev = &platform_device->dev; return platform_profile_register(&pp_handler); } @@ -1225,7 +1226,7 @@ static int __init alienware_wmi_init(void) } if (quirks->thermal) { - ret = create_thermal_profile(); + ret = create_thermal_profile(platform_device); if (ret) goto fail_prep_thermal_profile; } diff --git a/drivers/platform/x86/dell/dell-pc.c b/drivers/platform/x86/dell/dell-pc.c index c0dbbd3b2306..b9a6dd7b321d 100644 --- a/drivers/platform/x86/dell/dell-pc.c +++ b/drivers/platform/x86/dell/dell-pc.c @@ -257,6 +257,7 @@ static int thermal_init(void) goto cleanup_platform_device; } thermal_handler->name = "dell-pc"; + thermal_handler->dev = &platform_device->dev; thermal_handler->profile_get = thermal_platform_profile_get; thermal_handler->profile_set = thermal_platform_profile_set; diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index 26cac73caf2b..ffb09799142b 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -1565,7 +1565,7 @@ static inline void omen_unregister_powersource_event_handler(void) unregister_acpi_notifier(&platform_power_source_nb); } -static int thermal_profile_setup(void) +static int thermal_profile_setup(struct platform_device *device) { int err, tp; @@ -1625,6 +1625,7 @@ static int thermal_profile_setup(void) } platform_profile_handler.name = "hp-wmi"; + platform_profile_handler.dev = &device->dev; set_bit(PLATFORM_PROFILE_BALANCED, platform_profile_handler.choices); set_bit(PLATFORM_PROFILE_PERFORMANCE, platform_profile_handler.choices); @@ -1664,7 +1665,7 @@ static int __init hp_wmi_bios_setup(struct platform_device *device) if (err < 0) return err; - thermal_profile_setup(); + thermal_profile_setup(device); return 0; } diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index cbdc965eeb24..b6586c473ba9 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1103,6 +1103,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) mutex_init(&priv->dytc->mutex); priv->dytc->pprof.name = "ideapad-laptop"; + priv->dytc->pprof.dev = &priv->platform_device->dev; priv->dytc->priv = priv; priv->dytc->pprof.profile_get = dytc_profile_get; priv->dytc->pprof.profile_set = dytc_profile_set; diff --git a/drivers/platform/x86/inspur_platform_profile.c b/drivers/platform/x86/inspur_platform_profile.c index 03da2c8cf678..5a53949bbbf5 100644 --- a/drivers/platform/x86/inspur_platform_profile.c +++ b/drivers/platform/x86/inspur_platform_profile.c @@ -178,6 +178,7 @@ static int inspur_wmi_probe(struct wmi_device *wdev, const void *context) dev_set_drvdata(&wdev->dev, priv); priv->handler.name = "inspur-wmi"; + priv->handler.dev = &wdev->dev; priv->handler.profile_get = inspur_platform_profile_get; priv->handler.profile_set = inspur_platform_profile_set; diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 0e03c41e028d..58af9020382c 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10638,6 +10638,7 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) dbg_printk(TPACPI_DBG_INIT, "DYTC version %d: thermal mode available\n", dytc_version); + dytc_profile.dev = &tpacpi_pdev->dev; /* Create platform_profile structure and register */ err = platform_profile_register(&dytc_profile); /* diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index 6fa988e41742..daec6b9bad81 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -28,6 +28,7 @@ enum platform_profile_option { struct platform_profile_handler { const char *name; + struct device *dev; unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; int (*profile_get)(struct platform_profile_handler *pprof, enum platform_profile_option *profile); -- cgit v1.2.3 From 9b3bb37b44a317626464e79da8b39989b421963f Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Dec 2024 21:19:00 -0600 Subject: ACPI: platform_profile: Add platform handler argument to platform_profile_remove() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To allow registering and unregistering multiple platform handlers calls to platform_profile_remove() will need to know which handler is to be removed. Add an argument for this. Tested-by: Mark Pearson Tested-by: Matthew Schwartz Reviewed-by: Hans de Goede Reviewed-by: Mark Pearson Reviewed-by: Maximilian Luz Reviewed-by: Ilpo Järvinen Reviewed-by: Armin Wolf Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20241206031918.1537-5-mario.limonciello@amd.com Signed-off-by: Ilpo Järvinen --- drivers/acpi/platform_profile.c | 2 +- drivers/platform/surface/surface_platform_profile.c | 6 +++++- drivers/platform/x86/acer-wmi.c | 4 ++-- drivers/platform/x86/amd/pmf/sps.c | 2 +- drivers/platform/x86/asus-wmi.c | 4 ++-- drivers/platform/x86/dell/alienware-wmi.c | 2 +- drivers/platform/x86/dell/dell-pc.c | 2 +- drivers/platform/x86/hp/hp-wmi.c | 2 +- drivers/platform/x86/ideapad-laptop.c | 2 +- drivers/platform/x86/inspur_platform_profile.c | 5 ++++- drivers/platform/x86/thinkpad_acpi.c | 2 +- include/linux/platform_profile.h | 2 +- 12 files changed, 21 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index d2f7fd7743a1..c24744da2091 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -205,7 +205,7 @@ int platform_profile_register(struct platform_profile_handler *pprof) } EXPORT_SYMBOL_GPL(platform_profile_register); -int platform_profile_remove(void) +int platform_profile_remove(struct platform_profile_handler *pprof) { sysfs_remove_group(acpi_kobj, &platform_profile_group); diff --git a/drivers/platform/surface/surface_platform_profile.c b/drivers/platform/surface/surface_platform_profile.c index b73cfdd920c6..6c87e982bfc8 100644 --- a/drivers/platform/surface/surface_platform_profile.c +++ b/drivers/platform/surface/surface_platform_profile.c @@ -210,6 +210,7 @@ static int surface_platform_profile_probe(struct ssam_device *sdev) return -ENOMEM; tpd->sdev = sdev; + ssam_device_set_drvdata(sdev, tpd); tpd->handler.name = "Surface Platform Profile"; tpd->handler.dev = &sdev->dev; @@ -228,7 +229,10 @@ static int surface_platform_profile_probe(struct ssam_device *sdev) static void surface_platform_profile_remove(struct ssam_device *sdev) { - platform_profile_remove(); + struct ssam_platform_profile_device *tpd; + + tpd = ssam_device_get_drvdata(sdev); + platform_profile_remove(&tpd->handler); } static const struct ssam_device_id ssam_platform_profile_match[] = { diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index aca4a5746bee..b12965d9fcdb 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -2547,7 +2547,7 @@ static int acer_platform_probe(struct platform_device *device) error_hwmon: if (platform_profile_support) - platform_profile_remove(); + platform_profile_remove(&platform_profile_handler); error_platform_profile: acer_rfkill_exit(); error_rfkill: @@ -2570,7 +2570,7 @@ static void acer_platform_remove(struct platform_device *device) acer_rfkill_exit(); if (platform_profile_support) - platform_profile_remove(); + platform_profile_remove(&platform_profile_handler); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c index 1b94af7c0e0c..bd2bd6cfc39a 100644 --- a/drivers/platform/x86/amd/pmf/sps.c +++ b/drivers/platform/x86/amd/pmf/sps.c @@ -426,5 +426,5 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev) void amd_pmf_deinit_sps(struct amd_pmf_dev *dev) { - platform_profile_remove(); + platform_profile_remove(&dev->pprof); } diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 14d84cad01b4..805bce80beef 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -4852,7 +4852,7 @@ fail_sysfs: fail_custom_fan_curve: fail_platform_profile_setup: if (asus->platform_profile_support) - platform_profile_remove(); + platform_profile_remove(&asus->platform_profile_handler); fail_fan_boost_mode: fail_platform: kfree(asus); @@ -4879,7 +4879,7 @@ static void asus_wmi_remove(struct platform_device *device) asus_wmi_battery_exit(asus); if (asus->platform_profile_support) - platform_profile_remove(); + platform_profile_remove(&asus->platform_profile_handler); kfree(asus); } diff --git a/drivers/platform/x86/dell/alienware-wmi.c b/drivers/platform/x86/dell/alienware-wmi.c index 78aac7613278..e95d22c7b60c 100644 --- a/drivers/platform/x86/dell/alienware-wmi.c +++ b/drivers/platform/x86/dell/alienware-wmi.c @@ -1165,7 +1165,7 @@ static int create_thermal_profile(struct platform_device *platform_device) static void remove_thermal_profile(void) { if (quirks->thermal) - platform_profile_remove(); + platform_profile_remove(&pp_handler); } static int __init alienware_wmi_init(void) diff --git a/drivers/platform/x86/dell/dell-pc.c b/drivers/platform/x86/dell/dell-pc.c index b9a6dd7b321d..3797a5721dbd 100644 --- a/drivers/platform/x86/dell/dell-pc.c +++ b/drivers/platform/x86/dell/dell-pc.c @@ -289,7 +289,7 @@ cleanup_platform_device: static void thermal_cleanup(void) { if (thermal_handler) - platform_profile_remove(); + platform_profile_remove(thermal_handler); platform_device_unregister(platform_device); } diff --git a/drivers/platform/x86/hp/hp-wmi.c b/drivers/platform/x86/hp/hp-wmi.c index ffb09799142b..6d6e13a0c6e2 100644 --- a/drivers/platform/x86/hp/hp-wmi.c +++ b/drivers/platform/x86/hp/hp-wmi.c @@ -1693,7 +1693,7 @@ static void __exit hp_wmi_bios_remove(struct platform_device *device) } if (platform_profile_support) - platform_profile_remove(); + platform_profile_remove(&platform_profile_handler); } static int hp_wmi_resume_handler(struct device *device) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index b6586c473ba9..95b66ac3ea1d 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1136,7 +1136,7 @@ static void ideapad_dytc_profile_exit(struct ideapad_private *priv) if (!priv->dytc) return; - platform_profile_remove(); + platform_profile_remove(&priv->dytc->pprof); mutex_destroy(&priv->dytc->mutex); kfree(priv->dytc); diff --git a/drivers/platform/x86/inspur_platform_profile.c b/drivers/platform/x86/inspur_platform_profile.c index 5a53949bbbf5..53af73a7fbf7 100644 --- a/drivers/platform/x86/inspur_platform_profile.c +++ b/drivers/platform/x86/inspur_platform_profile.c @@ -191,7 +191,10 @@ static int inspur_wmi_probe(struct wmi_device *wdev, const void *context) static void inspur_wmi_remove(struct wmi_device *wdev) { - platform_profile_remove(); + struct inspur_wmi_priv *priv; + + priv = dev_get_drvdata(&wdev->dev); + platform_profile_remove(&priv->handler); } static const struct wmi_device_id inspur_wmi_id_table[] = { diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 58af9020382c..298100d5c535 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10660,7 +10660,7 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) static void dytc_profile_exit(void) { - platform_profile_remove(); + platform_profile_remove(&dytc_profile); } static struct ibm_struct dytc_profile_driver_data = { diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index daec6b9bad81..bcaf3aa39160 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -37,7 +37,7 @@ struct platform_profile_handler { }; int platform_profile_register(struct platform_profile_handler *pprof); -int platform_profile_remove(void); +int platform_profile_remove(struct platform_profile_handler *pprof); int platform_profile_cycle(void); void platform_profile_notify(void); -- cgit v1.2.3 From 4d5c027bf55661da2621c694ea39908ae2d3a46a Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Dec 2024 21:19:01 -0600 Subject: ACPI: platform_profile: Pass the profile handler into platform_profile_notify() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The profile handler will be used to notify the appropriate class devices. Reviewed-by: Armin Wolf Reviewed-by: Mark Pearson Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20241206031918.1537-6-mario.limonciello@amd.com Reviewed-by: Ilpo Järvinen Signed-off-by: Ilpo Järvinen --- drivers/acpi/platform_profile.c | 2 +- drivers/platform/x86/acer-wmi.c | 2 +- drivers/platform/x86/asus-wmi.c | 2 +- drivers/platform/x86/ideapad-laptop.c | 2 +- drivers/platform/x86/thinkpad_acpi.c | 14 +++++++------- include/linux/platform_profile.h | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index c24744da2091..927a2f7456c9 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -128,7 +128,7 @@ static const struct attribute_group platform_profile_group = { .attrs = platform_profile_attrs }; -void platform_profile_notify(void) +void platform_profile_notify(struct platform_profile_handler *pprof) { if (!cur_profile) return; diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index b12965d9fcdb..001881825807 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -1988,7 +1988,7 @@ static int acer_thermal_profile_change(void) if (tp != ACER_PREDATOR_V4_THERMAL_PROFILE_TURBO_WMI) last_non_turbo_profile = tp; - platform_profile_notify(); + platform_profile_notify(&platform_profile_handler); } return 0; diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 805bce80beef..fdeebab96fc0 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -3789,7 +3789,7 @@ static ssize_t throttle_thermal_policy_store(struct device *dev, * Ensure that platform_profile updates userspace with the change to ensure * that platform_profile and throttle_thermal_policy_mode are in sync. */ - platform_profile_notify(); + platform_profile_notify(&asus->platform_profile_handler); return count; } diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 95b66ac3ea1d..00b9c0c32239 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1041,7 +1041,7 @@ static void dytc_profile_refresh(struct ideapad_private *priv) if (profile != priv->dytc->current_profile) { priv->dytc->current_profile = profile; - platform_profile_notify(); + platform_profile_notify(&priv->dytc->pprof); } } diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 298100d5c535..f51662861738 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -10538,6 +10538,12 @@ unlock: return err; } +static struct platform_profile_handler dytc_profile = { + .name = "thinkpad-acpi", + .profile_get = dytc_profile_get, + .profile_set = dytc_profile_set, +}; + static void dytc_profile_refresh(void) { enum platform_profile_option profile; @@ -10566,16 +10572,10 @@ static void dytc_profile_refresh(void) err = convert_dytc_to_profile(funcmode, perfmode, &profile); if (!err && profile != dytc_current_profile) { dytc_current_profile = profile; - platform_profile_notify(); + platform_profile_notify(&dytc_profile); } } -static struct platform_profile_handler dytc_profile = { - .name = "thinkpad-acpi", - .profile_get = dytc_profile_get, - .profile_set = dytc_profile_set, -}; - static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) { int err, output; diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index bcaf3aa39160..8ec0b8da56db 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -39,6 +39,6 @@ struct platform_profile_handler { int platform_profile_register(struct platform_profile_handler *pprof); int platform_profile_remove(struct platform_profile_handler *pprof); int platform_profile_cycle(void); -void platform_profile_notify(void); +void platform_profile_notify(struct platform_profile_handler *pprof); #endif /*_PLATFORM_PROFILE_H_*/ -- cgit v1.2.3 From 77be5cacb2c2d8c3ddd069f0b4e9408f553af1d8 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Dec 2024 21:19:06 -0600 Subject: ACPI: platform_profile: Create class for ACPI platform profile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When registering a platform profile handler create a class device that will allow changing a single platform profile handler. The class and sysfs group are no longer needed when the platform profile core is a module and unloaded, so remove them at that time as well. Reviewed-by: Armin Wolf Tested-by: Mark Pearson Reviewed-by: Mark Pearson Reviewed-by: Ilpo Järvinen Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20241206031918.1537-11-mario.limonciello@amd.com Signed-off-by: Ilpo Järvinen --- drivers/acpi/platform_profile.c | 82 +++++++++++++++++++++++++++++++++++++--- include/linux/platform_profile.h | 2 + 2 files changed, 79 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index a1f0378f15e6..11eb60b09bac 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,12 @@ static const char * const profile_names[] = { }; static_assert(ARRAY_SIZE(profile_names) == PLATFORM_PROFILE_LAST); +static DEFINE_IDA(platform_profile_ida); + +static const struct class platform_profile_class = { + .name = "platform-profile", +}; + static ssize_t platform_profile_choices_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -101,8 +108,21 @@ static struct attribute *platform_profile_attrs[] = { NULL }; +static int profile_class_registered(struct device *dev, const void *data) +{ + return 1; +} + +static umode_t profile_class_is_visible(struct kobject *kobj, struct attribute *attr, int idx) +{ + if (!class_find_device(&platform_profile_class, NULL, NULL, profile_class_registered)) + return 0; + return attr->mode; +} + static const struct attribute_group platform_profile_group = { - .attrs = platform_profile_attrs + .attrs = platform_profile_attrs, + .is_visible = profile_class_is_visible, }; void platform_profile_notify(struct platform_profile_handler *pprof) @@ -160,25 +180,77 @@ int platform_profile_register(struct platform_profile_handler *pprof) if (cur_profile) return -EEXIST; - err = sysfs_create_group(acpi_kobj, &platform_profile_group); - if (err) - return err; + /* create class interface for individual handler */ + pprof->minor = ida_alloc(&platform_profile_ida, GFP_KERNEL); + if (pprof->minor < 0) + return pprof->minor; + pprof->class_dev = device_create(&platform_profile_class, pprof->dev, + MKDEV(0, 0), pprof, "platform-profile-%d", + pprof->minor); + if (IS_ERR(pprof->class_dev)) { + err = PTR_ERR(pprof->class_dev); + goto cleanup_ida; + } cur_profile = pprof; + + err = sysfs_update_group(acpi_kobj, &platform_profile_group); + if (err) + goto cleanup_cur; + return 0; + +cleanup_cur: + cur_profile = NULL; + device_unregister(pprof->class_dev); + +cleanup_ida: + ida_free(&platform_profile_ida, pprof->minor); + + return err; } EXPORT_SYMBOL_GPL(platform_profile_register); int platform_profile_remove(struct platform_profile_handler *pprof) { + int id; guard(mutex)(&profile_lock); - sysfs_remove_group(acpi_kobj, &platform_profile_group); cur_profile = NULL; + + id = pprof->minor; + device_unregister(pprof->class_dev); + ida_free(&platform_profile_ida, id); + + sysfs_update_group(acpi_kobj, &platform_profile_group); + return 0; } EXPORT_SYMBOL_GPL(platform_profile_remove); +static int __init platform_profile_init(void) +{ + int err; + + err = class_register(&platform_profile_class); + if (err) + return err; + + err = sysfs_create_group(acpi_kobj, &platform_profile_group); + if (err) + class_unregister(&platform_profile_class); + + return err; +} + +static void __exit platform_profile_exit(void) +{ + sysfs_remove_group(acpi_kobj, &platform_profile_group); + class_unregister(&platform_profile_class); +} +module_init(platform_profile_init); +module_exit(platform_profile_exit); + MODULE_AUTHOR("Mark Pearson "); MODULE_DESCRIPTION("ACPI platform profile sysfs interface"); MODULE_LICENSE("GPL"); diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index 8ec0b8da56db..a888fd085c51 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -29,6 +29,8 @@ enum platform_profile_option { struct platform_profile_handler { const char *name; struct device *dev; + struct device *class_dev; + int minor; unsigned long choices[BITS_TO_LONGS(PLATFORM_PROFILE_LAST)]; int (*profile_get)(struct platform_profile_handler *pprof, enum platform_profile_option *profile); -- cgit v1.2.3 From 494637cf5bf098ac0fe125dd6d23368419fe9da4 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Thu, 5 Dec 2024 21:19:12 -0600 Subject: ACPI: platform_profile: Add concept of a "custom" profile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When two profile handlers don't agree on the current profile it's ambiguous what to show to the legacy sysfs interface. Add a "custom" profile string that userspace will be able to use the legacy sysfs interface to distinguish this situation.. Additionally drivers can choose to use this to indicate that a user has modified driver settings in a way that the platform profile advertised by a driver is not accurate. Reviewed-by: Armin Wolf Tested-by: Mark Pearson Reviewed-by: Mark Pearson Reviewed-by: Ilpo Järvinen Signed-off-by: Mario Limonciello Link: https://lore.kernel.org/r/20241206031918.1537-17-mario.limonciello@amd.com Signed-off-by: Ilpo Järvinen --- drivers/acpi/platform_profile.c | 1 + include/linux/platform_profile.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/drivers/acpi/platform_profile.c b/drivers/acpi/platform_profile.c index bdf69255ed9c..54483269b883 100644 --- a/drivers/acpi/platform_profile.c +++ b/drivers/acpi/platform_profile.c @@ -20,6 +20,7 @@ static const char * const profile_names[] = { [PLATFORM_PROFILE_BALANCED] = "balanced", [PLATFORM_PROFILE_BALANCED_PERFORMANCE] = "balanced-performance", [PLATFORM_PROFILE_PERFORMANCE] = "performance", + [PLATFORM_PROFILE_CUSTOM] = "custom", }; static_assert(ARRAY_SIZE(profile_names) == PLATFORM_PROFILE_LAST); diff --git a/include/linux/platform_profile.h b/include/linux/platform_profile.h index a888fd085c51..0682bb4c57e5 100644 --- a/include/linux/platform_profile.h +++ b/include/linux/platform_profile.h @@ -23,6 +23,7 @@ enum platform_profile_option { PLATFORM_PROFILE_BALANCED, PLATFORM_PROFILE_BALANCED_PERFORMANCE, PLATFORM_PROFILE_PERFORMANCE, + PLATFORM_PROFILE_CUSTOM, PLATFORM_PROFILE_LAST, /*must always be last */ }; -- cgit v1.2.3 From 9029409d1a250da19f1086ab1113752411c5163d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 10 Dec 2024 22:55:49 +0100 Subject: power: supply: core: introduce power_supply_for_each_psy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All existing callers of power_supply_for_each_device() want to iterate over 'struct power_supply', not 'struct device'. The power_supply_for_each_device() forces each caller to duplicate the logic to go from one to the other. Introduce power_supply_for_each_psy() to simplify the callers. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241210-power-supply-dev_to_psy-v2-2-9d8c9d24cfe4@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 24 ++++++++++++++++++++++++ include/linux/power_supply.h | 1 + 2 files changed, 25 insertions(+) (limited to 'include') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 502b07468b93..48bc968ff6c5 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -122,6 +122,30 @@ int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void } EXPORT_SYMBOL_GPL(power_supply_for_each_device); +struct psy_for_each_psy_cb_data { + int (*fn)(struct power_supply *psy, void *data); + void *data; +}; + +static int psy_for_each_psy_cb(struct device *dev, void *data) +{ + struct psy_for_each_psy_cb_data *cb_data = data; + struct power_supply *psy = dev_get_drvdata(dev); + + return cb_data->fn(psy, cb_data->data); +} + +int power_supply_for_each_psy(void *data, int (*fn)(struct power_supply *psy, void *data)) +{ + struct psy_for_each_psy_cb_data cb_data = { + .fn = fn, + .data = data, + }; + + return power_supply_for_each_device(&cb_data, psy_for_each_psy_cb); +} +EXPORT_SYMBOL_GPL(power_supply_for_each_psy); + void power_supply_changed(struct power_supply *psy) { unsigned long flags; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index b98106e1a90f..11d54270eaa9 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -882,6 +882,7 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev); extern void *power_supply_get_drvdata(struct power_supply *psy); extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); +extern int power_supply_for_each_psy(void *data, int (*fn)(struct power_supply *psy, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) { -- cgit v1.2.3 From bfc330323cf3ea6d5c9985179384c0b56f2d5372 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 10 Dec 2024 22:55:53 +0100 Subject: power: supply: core: remove power_supply_for_each_device() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are no users anymore. All potential future users are expected to use power_supply_for_each_psy(). Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241210-power-supply-dev_to_psy-v2-6-9d8c9d24cfe4@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 8 +------- include/linux/power_supply.h | 1 - 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 9adea87c6912..0cdccfd585cb 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -115,12 +115,6 @@ static void power_supply_changed_work(struct work_struct *work) spin_unlock_irqrestore(&psy->changed_lock, flags); } -int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)) -{ - return class_for_each_device(&power_supply_class, NULL, data, fn); -} -EXPORT_SYMBOL_GPL(power_supply_for_each_device); - struct psy_for_each_psy_cb_data { int (*fn)(struct power_supply *psy, void *data); void *data; @@ -141,7 +135,7 @@ int power_supply_for_each_psy(void *data, int (*fn)(struct power_supply *psy, vo .data = data, }; - return power_supply_for_each_device(&cb_data, psy_for_each_psy_cb); + return class_for_each_device(&power_supply_class, NULL, &cb_data, psy_for_each_psy_cb); } EXPORT_SYMBOL_GPL(power_supply_for_each_psy); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 11d54270eaa9..3d67f4a6a1c9 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -881,7 +881,6 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev); #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -extern int power_supply_for_each_device(void *data, int (*fn)(struct device *dev, void *data)); extern int power_supply_for_each_psy(void *data, int (*fn)(struct power_supply *psy, void *data)); static inline bool power_supply_is_amp_property(enum power_supply_property psp) -- cgit v1.2.3 From f52204036326bd9c07db08bab6607f423c801716 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Tue, 10 Dec 2024 22:55:54 +0100 Subject: power: supply: core: introduce dev_to_psy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The psy core and drivers currently use dev_get_drvdata() to go from a 'struct device' to its 'struct power_supply'. This is not typesafe and or documented. Introduce a new helper to make this pattern explicit. Instead of using dev_get_drvdata(), use container_of_const() which also preserves the constness. Furthermore 'dev' does need to be dereferenced anymore and at some point the drvdata could be reused for something else. Signed-off-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241210-power-supply-dev_to_psy-v2-7-9d8c9d24cfe4@weissschuh.net Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 3d67f4a6a1c9..17fc383785bf 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -318,6 +318,8 @@ struct power_supply { #endif }; +#define dev_to_psy(__dev) container_of_const(__dev, struct power_supply, dev) + /* * This is recommended structure to specify static power supply parameters. * Generic one, parametrizable for different power supplies. Power supply -- cgit v1.2.3 From be325f08c432ae5ac6d6594d163e1899cdf202df Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Dec 2024 10:07:45 +0000 Subject: rtnetlink: add ndo_fdb_dump_context rtnl_fdb_dump() and various ndo_fdb_dump() helpers share a hidden layout of cb->ctx. Before switching rtnl_fdb_dump() to for_each_netdev_dump() in the following patch, make this more explicit. Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241209100747.2269613-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/freescale/dpaa2/dpaa2-switch.c | 3 ++- drivers/net/ethernet/mscc/ocelot_net.c | 3 ++- drivers/net/vxlan/vxlan_core.c | 5 +++-- include/linux/rtnetlink.h | 7 ++++++ net/bridge/br_fdb.c | 3 ++- net/core/rtnetlink.c | 26 ++++++++++++---------- net/dsa/user.c | 3 ++- 7 files changed, 32 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a293b08f36d4..147a93bf9fa9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -780,13 +780,14 @@ struct ethsw_dump_ctx { static int dpaa2_switch_fdb_dump_nl(struct fdb_dump_entry *entry, struct ethsw_dump_ctx *dump) { + struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; int is_dynamic = entry->type & DPSW_FDB_ENTRY_DINAMIC; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[2]) + if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, diff --git a/drivers/net/ethernet/mscc/ocelot_net.c b/drivers/net/ethernet/mscc/ocelot_net.c index 558e03301aa8..8d48468cddd7 100644 --- a/drivers/net/ethernet/mscc/ocelot_net.c +++ b/drivers/net/ethernet/mscc/ocelot_net.c @@ -758,12 +758,13 @@ static int ocelot_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data) { struct ocelot_dump_ctx *dump = data; + struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[2]) + if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 43cf672b7b9f..0c356e0a61ef 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1352,6 +1352,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; int err = 0; @@ -1364,7 +1365,7 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct vxlan_rdst *rd; if (rcu_access_pointer(f->nh)) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip_nh; err = vxlan_fdb_info(skb, vxlan, f, NETLINK_CB(cb->skb).portid, @@ -1381,7 +1382,7 @@ skip_nh: } list_for_each_entry_rcu(rd, &f->remotes, list) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; err = vxlan_fdb_info(skb, vxlan, f, diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 811ce44113f6..c43cffb014a7 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -178,6 +178,13 @@ void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); +/* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ +struct ndo_fdb_dump_context { + unsigned long s_h; + unsigned long s_idx; + unsigned long fdb_idx; +}; + extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 82bac2426631..902694c0ce64 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -955,6 +955,7 @@ int br_fdb_dump(struct sk_buff *skb, struct net_device *filter_dev, int *idx) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct net_bridge *br = netdev_priv(dev); struct net_bridge_fdb_entry *f; int err = 0; @@ -970,7 +971,7 @@ int br_fdb_dump(struct sk_buff *skb, rcu_read_lock(); hlist_for_each_entry_rcu(f, &br->fdb_list, fdb_node) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; if (filter_dev && (!f->dst || f->dst->dev != filter_dev)) { if (filter_dev != dev) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ab5f201bf0ab..453cc8bf18fb 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4762,15 +4762,16 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, int *idx, struct netdev_hw_addr_list *list) { + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct netdev_hw_addr *ha; - int err; u32 portid, seq; + int err; portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[2]) + if (*idx < ctx->fdb_idx) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -4909,10 +4910,9 @@ static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { - struct net_device *dev; - struct net_device *br_dev = NULL; - const struct net_device_ops *ops = NULL; - const struct net_device_ops *cops = NULL; + const struct net_device_ops *ops = NULL, *cops = NULL; + struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; + struct net_device *dev, *br_dev = NULL; struct net *net = sock_net(skb->sk); struct hlist_head *head; int brport_idx = 0; @@ -4922,6 +4922,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) int err = 0; int fidx = 0; + NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context); + if (cb->strict_check) err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx, cb->extack); @@ -4939,8 +4941,8 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - s_h = cb->args[0]; - s_idx = cb->args[1]; + s_h = ctx->s_h; + s_idx = ctx->s_idx; for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; @@ -4992,7 +4994,7 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cops = NULL; /* reset fdb offset to 0 for rest of the interfaces */ - cb->args[2] = 0; + ctx->fdb_idx = 0; fidx = 0; cont: idx++; @@ -5000,9 +5002,9 @@ cont: } out: - cb->args[0] = h; - cb->args[1] = idx; - cb->args[2] = fidx; + ctx->s_h = h; + ctx->s_idx = idx; + ctx->fdb_idx = fidx; return skb->len; } diff --git a/net/dsa/user.c b/net/dsa/user.c index 06c30a9e29ff..c736c019e2af 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -515,12 +515,13 @@ dsa_user_port_fdb_do_dump(const unsigned char *addr, u16 vid, bool is_static, void *data) { struct dsa_user_dump_ctx *dump = data; + struct ndo_fdb_dump_context *ctx = (void *)dump->cb->ctx; u32 portid = NETLINK_CB(dump->cb->skb).portid; u32 seq = dump->cb->nlh->nlmsg_seq; struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[2]) + if (dump->idx < ctx->fdb_idx) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, -- cgit v1.2.3 From 53970a05f799087e2dd2005973609188504e7fcc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Dec 2024 10:07:46 +0000 Subject: rtnetlink: switch rtnl_fdb_dump() to for_each_netdev_dump() This is the last netdev iterator still using net->dev_index_head[]. Convert to modern for_each_netdev_dump() for better scalability, and use common patterns in our stack. Following patch in this series removes the pad field in struct ndo_fdb_dump_context. Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241209100747.2269613-3-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/rtnetlink.h | 4 +-- net/core/rtnetlink.c | 92 ++++++++++++++++++----------------------------- 2 files changed, 37 insertions(+), 59 deletions(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index c43cffb014a7..5546571c2553 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -180,8 +180,8 @@ void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); /* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ struct ndo_fdb_dump_context { - unsigned long s_h; - unsigned long s_idx; + unsigned long ifindex; + unsigned long pad; unsigned long fdb_idx; }; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 453cc8bf18fb..8fe252c298a2 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -4914,13 +4914,10 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct net_device *dev, *br_dev = NULL; struct net *net = sock_net(skb->sk); - struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int h, s_h; - int idx = 0, s_idx; - int err = 0; int fidx = 0; + int err; NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context); @@ -4941,69 +4938,50 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - s_h = ctx->s_h; - s_idx = ctx->s_idx; - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry(dev, head, index_hlist) { - - if (brport_idx && (dev->ifindex != brport_idx)) - continue; - - if (!br_idx) { /* user did not specify a specific bridge */ - if (netif_is_bridge_port(dev)) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; - } - } else { - if (dev != br_dev && - !netif_is_bridge_port(dev)) - continue; + for_each_netdev_dump(net, dev, ctx->ifindex) { + if (brport_idx && (dev->ifindex != brport_idx)) + continue; - if (br_dev != netdev_master_upper_dev_get(dev) && - !netif_is_bridge_master(dev)) - continue; - cops = ops; + if (!br_idx) { /* user did not specify a specific bridge */ + if (netif_is_bridge_port(dev)) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; } + } else { + if (dev != br_dev && + !netif_is_bridge_port(dev)) + continue; - if (idx < s_idx) - goto cont; + if (br_dev != netdev_master_upper_dev_get(dev) && + !netif_is_bridge_master(dev)) + continue; + cops = ops; + } - if (netif_is_bridge_port(dev)) { - if (cops && cops->ndo_fdb_dump) { - err = cops->ndo_fdb_dump(skb, cb, - br_dev, dev, - &fidx); - if (err == -EMSGSIZE) - goto out; - } + if (netif_is_bridge_port(dev)) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + break; } + } - if (dev->netdev_ops->ndo_fdb_dump) - err = dev->netdev_ops->ndo_fdb_dump(skb, cb, - dev, NULL, - &fidx); - else - err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, - &fidx); - if (err == -EMSGSIZE) - goto out; + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, &fidx); + if (err == -EMSGSIZE) + break; - cops = NULL; + cops = NULL; - /* reset fdb offset to 0 for rest of the interfaces */ - ctx->fdb_idx = 0; - fidx = 0; -cont: - idx++; - } + /* reset fdb offset to 0 for rest of the interfaces */ + ctx->fdb_idx = 0; + fidx = 0; } -out: - ctx->s_h = h; - ctx->s_idx = idx; ctx->fdb_idx = fidx; return skb->len; -- cgit v1.2.3 From 53a6d8912372fc23ea82cc7a49eb59047aa0a650 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Dec 2024 10:07:47 +0000 Subject: rtnetlink: remove pad field in ndo_fdb_dump_context I chose to remove this field in a separate patch to ease potential bisection, in case one ndo_fdb_dump() is still using the old way (cb->args[2] instead of ctx->fdb_idx) Signed-off-by: Eric Dumazet Reviewed-by: Ido Schimmel Reviewed-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20241209100747.2269613-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/rtnetlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 5546571c2553..3b9d132cbc9e 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -181,7 +181,6 @@ void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); /* Shared by rtnl_fdb_dump() and various ndo_fdb_dump() helpers. */ struct ndo_fdb_dump_context { unsigned long ifindex; - unsigned long pad; unsigned long fdb_idx; }; -- cgit v1.2.3 From 5aec7c065fba0c56d6c1ea5d629395210f174be8 Mon Sep 17 00:00:00 2001 From: James Clark Date: Thu, 28 Nov 2024 12:14:14 +0000 Subject: coresight: Drop atomics in connection refcounts These belong to the device being enabled or disabled and are only ever used inside the device's spinlock. Remove the atomics to not imply that there are any other concurrent accesses. If atomics were necessary I don't think they would have been enough anyway. There would be nothing to prevent an enable or disable running concurrently if not for the spinlock. Signed-off-by: James Clark Reviewed-by: Yeoreum Yun Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20241128121414.2425119-1-james.clark@linaro.org --- drivers/hwtracing/coresight/coresight-funnel.c | 6 +++--- drivers/hwtracing/coresight/coresight-replicator.c | 6 +++--- drivers/hwtracing/coresight/coresight-tpda.c | 6 +++--- include/linux/coresight.h | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/hwtracing/coresight/coresight-funnel.c b/drivers/hwtracing/coresight/coresight-funnel.c index 33efe1acbef7..8faf51469bb8 100644 --- a/drivers/hwtracing/coresight/coresight-funnel.c +++ b/drivers/hwtracing/coresight/coresight-funnel.c @@ -86,14 +86,14 @@ static int funnel_enable(struct coresight_device *csdev, bool first_enable = false; spin_lock_irqsave(&drvdata->spinlock, flags); - if (atomic_read(&in->dest_refcnt) == 0) { + if (in->dest_refcnt == 0) { if (drvdata->base) rc = dynamic_funnel_enable_hw(drvdata, in->dest_port); if (!rc) first_enable = true; } if (!rc) - atomic_inc(&in->dest_refcnt); + in->dest_refcnt++; spin_unlock_irqrestore(&drvdata->spinlock, flags); if (first_enable) @@ -130,7 +130,7 @@ static void funnel_disable(struct coresight_device *csdev, bool last_disable = false; spin_lock_irqsave(&drvdata->spinlock, flags); - if (atomic_dec_return(&in->dest_refcnt) == 0) { + if (--in->dest_refcnt == 0) { if (drvdata->base) dynamic_funnel_disable_hw(drvdata, in->dest_port); last_disable = true; diff --git a/drivers/hwtracing/coresight/coresight-replicator.c b/drivers/hwtracing/coresight/coresight-replicator.c index 0fba87de6d1a..a1181c9048c0 100644 --- a/drivers/hwtracing/coresight/coresight-replicator.c +++ b/drivers/hwtracing/coresight/coresight-replicator.c @@ -126,7 +126,7 @@ static int replicator_enable(struct coresight_device *csdev, bool first_enable = false; spin_lock_irqsave(&drvdata->spinlock, flags); - if (atomic_read(&out->src_refcnt) == 0) { + if (out->src_refcnt == 0) { if (drvdata->base) rc = dynamic_replicator_enable(drvdata, in->dest_port, out->src_port); @@ -134,7 +134,7 @@ static int replicator_enable(struct coresight_device *csdev, first_enable = true; } if (!rc) - atomic_inc(&out->src_refcnt); + out->src_refcnt++; spin_unlock_irqrestore(&drvdata->spinlock, flags); if (first_enable) @@ -180,7 +180,7 @@ static void replicator_disable(struct coresight_device *csdev, bool last_disable = false; spin_lock_irqsave(&drvdata->spinlock, flags); - if (atomic_dec_return(&out->src_refcnt) == 0) { + if (--out->src_refcnt == 0) { if (drvdata->base) dynamic_replicator_disable(drvdata, in->dest_port, out->src_port); diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c index bfca103f9f84..4ec676bea1ce 100644 --- a/drivers/hwtracing/coresight/coresight-tpda.c +++ b/drivers/hwtracing/coresight/coresight-tpda.c @@ -190,10 +190,10 @@ static int tpda_enable(struct coresight_device *csdev, int ret = 0; spin_lock(&drvdata->spinlock); - if (atomic_read(&in->dest_refcnt) == 0) { + if (in->dest_refcnt == 0) { ret = __tpda_enable(drvdata, in->dest_port); if (!ret) { - atomic_inc(&in->dest_refcnt); + in->dest_refcnt++; csdev->refcnt++; dev_dbg(drvdata->dev, "TPDA inport %d enabled.\n", in->dest_port); } @@ -223,7 +223,7 @@ static void tpda_disable(struct coresight_device *csdev, struct tpda_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent); spin_lock(&drvdata->spinlock); - if (atomic_dec_return(&in->dest_refcnt) == 0) { + if (--in->dest_refcnt == 0) { __tpda_disable(drvdata, in->dest_port); csdev->refcnt--; } diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c13342594278..834029cb9ba2 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -200,8 +200,8 @@ struct coresight_connection { struct coresight_device *dest_dev; struct coresight_sysfs_link *link; struct coresight_device *src_dev; - atomic_t src_refcnt; - atomic_t dest_refcnt; + int src_refcnt; + int dest_refcnt; }; /** -- cgit v1.2.3 From fd9b7e8e9fbc23d69fa4accc881dea2cf13a2e2e Mon Sep 17 00:00:00 2001 From: Mao Jinlong Date: Thu, 21 Nov 2024 14:28:28 +0800 Subject: coresight: Add support to get static id for system trace sources Dynamic trace id was introduced in coresight subsystem, so trace id is allocated dynamically. However, some hardware ATB source has static trace id and it cannot be changed via software programming. For such source, it can call coresight_get_static_trace_id to get the fixed trace id from device node and pass id to coresight_trace_id_get_static_system_id to reserve the id. Signed-off-by: Mao Jinlong Reviewed-by: Mike Leach Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20241121062829.11571-3-quic_jinlmao@quicinc.com --- drivers/hwtracing/coresight/coresight-platform.c | 6 ++++ drivers/hwtracing/coresight/coresight-trace-id.c | 43 +++++++++++++++++------- drivers/hwtracing/coresight/coresight-trace-id.h | 9 +++++ include/linux/coresight.h | 1 + 4 files changed, 47 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index 64e171eaad82..633d96b9577a 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -796,6 +796,12 @@ int coresight_get_cpu(struct device *dev) } EXPORT_SYMBOL_GPL(coresight_get_cpu); +int coresight_get_static_trace_id(struct device *dev, u32 *id) +{ + return fwnode_property_read_u32(dev_fwnode(dev), "arm,static-trace-id", id); +} +EXPORT_SYMBOL_GPL(coresight_get_static_trace_id); + struct coresight_platform_data * coresight_get_platform_data(struct device *dev) { diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c index d98e12cb30ec..378af743be45 100644 --- a/drivers/hwtracing/coresight/coresight-trace-id.c +++ b/drivers/hwtracing/coresight/coresight-trace-id.c @@ -12,6 +12,12 @@ #include "coresight-trace-id.h" +enum trace_id_flags { + TRACE_ID_ANY = 0x0, + TRACE_ID_PREFER_ODD = 0x1, + TRACE_ID_REQ_STATIC = 0x2, +}; + /* Default trace ID map. Used in sysfs mode and for system sources */ static DEFINE_PER_CPU(atomic_t, id_map_default_cpu_ids) = ATOMIC_INIT(0); static struct coresight_trace_id_map id_map_default = { @@ -74,21 +80,25 @@ static int coresight_trace_id_find_odd_id(struct coresight_trace_id_map *id_map) * Otherwise allocate next available ID. */ static int coresight_trace_id_alloc_new_id(struct coresight_trace_id_map *id_map, - int preferred_id, bool prefer_odd_id) + int preferred_id, unsigned int flags) { int id = 0; /* for backwards compatibility, cpu IDs may use preferred value */ - if (IS_VALID_CS_TRACE_ID(preferred_id) && - !test_bit(preferred_id, id_map->used_ids)) { - id = preferred_id; - goto trace_id_allocated; - } else if (prefer_odd_id) { + if (IS_VALID_CS_TRACE_ID(preferred_id)) { + if (!test_bit(preferred_id, id_map->used_ids)) { + id = preferred_id; + goto trace_id_allocated; + } else if (flags & TRACE_ID_REQ_STATIC) + return -EBUSY; + } else if (flags & TRACE_ID_PREFER_ODD) { /* may use odd ids to avoid preferred legacy cpu IDs */ id = coresight_trace_id_find_odd_id(id_map); if (id) goto trace_id_allocated; - } + } else if (!IS_VALID_CS_TRACE_ID(preferred_id) && + (flags & TRACE_ID_REQ_STATIC)) + return -EINVAL; /* * skip reserved bit 0, look at bitmap length of @@ -153,7 +163,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map */ id = coresight_trace_id_alloc_new_id(id_map, CORESIGHT_LEGACY_CPU_TRACE_ID(cpu), - false); + TRACE_ID_ANY); if (!IS_VALID_CS_TRACE_ID(id)) goto get_cpu_id_out_unlock; @@ -188,14 +198,14 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma DUMP_ID_MAP(id_map); } -static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *id_map) +static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *id_map, + int preferred_id, unsigned int traceid_flags) { unsigned long flags; int id; spin_lock_irqsave(&id_map->lock, flags); - /* prefer odd IDs for system components to avoid legacy CPU IDS */ - id = coresight_trace_id_alloc_new_id(id_map, 0, true); + id = coresight_trace_id_alloc_new_id(id_map, preferred_id, traceid_flags); spin_unlock_irqrestore(&id_map->lock, flags); DUMP_ID(id); @@ -255,10 +265,19 @@ EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id_map); int coresight_trace_id_get_system_id(void) { - return coresight_trace_id_map_get_system_id(&id_map_default); + /* prefer odd IDs for system components to avoid legacy CPU IDS */ + return coresight_trace_id_map_get_system_id(&id_map_default, 0, + TRACE_ID_PREFER_ODD); } EXPORT_SYMBOL_GPL(coresight_trace_id_get_system_id); +int coresight_trace_id_get_static_system_id(int trace_id) +{ + return coresight_trace_id_map_get_system_id(&id_map_default, + trace_id, TRACE_ID_REQ_STATIC); +} +EXPORT_SYMBOL_GPL(coresight_trace_id_get_static_system_id); + void coresight_trace_id_put_system_id(int id) { coresight_trace_id_map_put_system_id(&id_map_default, id); diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h index 9aae50a553ca..db68e1ec56b6 100644 --- a/drivers/hwtracing/coresight/coresight-trace-id.h +++ b/drivers/hwtracing/coresight/coresight-trace-id.h @@ -116,6 +116,15 @@ int coresight_trace_id_read_cpu_id_map(int cpu, struct coresight_trace_id_map *i */ int coresight_trace_id_get_system_id(void); +/** + * Allocate a CoreSight static trace ID for a system component. + * + * Used to allocate static IDs for system trace sources such as dummy source. + * + * return: Trace ID or -EINVAL if allocation is impossible. + */ +int coresight_trace_id_get_static_system_id(int id); + /** * Release an allocated system trace ID. * diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 834029cb9ba2..055ce5cd5c44 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -662,6 +662,7 @@ void coresight_relaxed_write64(struct coresight_device *csdev, void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset); extern int coresight_get_cpu(struct device *dev); +extern int coresight_get_static_trace_id(struct device *dev, u32 *id); struct coresight_platform_data *coresight_get_platform_data(struct device *dev); struct coresight_connection * -- cgit v1.2.3 From eb708cd631a8dca17ff004ccc39bbeb096c1db22 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Wed, 11 Dec 2024 13:35:58 +0000 Subject: regmap: regmap_multi_reg_read(): make register list const Mark the list of registers passed into regmap_multi_reg_read() as a pointer to const. This allows the caller to define the register list as const data. This requires making the same change to _regmap_bulk_read(), which is called by regmap_multi_reg_read(). Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20241211133558.884669-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 4 ++-- include/linux/regmap.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index ff37701b6d6c..f2843f814675 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -3115,7 +3115,7 @@ int regmap_fields_read(struct regmap_field *field, unsigned int id, EXPORT_SYMBOL_GPL(regmap_fields_read); static int _regmap_bulk_read(struct regmap *map, unsigned int reg, - unsigned int *regs, void *val, size_t val_count) + const unsigned int *regs, void *val, size_t val_count) { u32 *u32 = val; u16 *u16 = val; @@ -3209,7 +3209,7 @@ EXPORT_SYMBOL_GPL(regmap_bulk_read); * A value of zero will be returned on success, a negative errno will * be returned in error cases. */ -int regmap_multi_reg_read(struct regmap *map, unsigned int *regs, void *val, +int regmap_multi_reg_read(struct regmap *map, const unsigned int *regs, void *val, size_t val_count) { if (val_count == 0) diff --git a/include/linux/regmap.h b/include/linux/regmap.h index fd41baccbf3e..3871c74f7677 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1244,7 +1244,7 @@ int regmap_noinc_read(struct regmap *map, unsigned int reg, void *val, size_t val_len); int regmap_bulk_read(struct regmap *map, unsigned int reg, void *val, size_t val_count); -int regmap_multi_reg_read(struct regmap *map, unsigned int *reg, void *val, +int regmap_multi_reg_read(struct regmap *map, const unsigned int *reg, void *val, size_t val_count); int regmap_update_bits_base(struct regmap *map, unsigned int reg, unsigned int mask, unsigned int val, -- cgit v1.2.3 From 8392bc2ff8c8bf7c4c5e6dfa71ccd893a3c046f6 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 15 Nov 2024 10:30:29 -0500 Subject: fsnotify: generate pre-content permission event on page fault FS_PRE_ACCESS will be generated on page fault depending on the faulting method. This pre-content event is meant to be used by hierarchical storage managers that want to fill in the file content on first read access. Export a simple helper that file systems that have their own ->fault() will use, and have a more complicated helper to be do fancy things in filemap_fault. Signed-off-by: Josef Bacik Signed-off-by: Jan Kara Link: https://patch.msgid.link/aa56c50ce81b1fd18d7f5d71dd2dfced5eba9687.1731684329.git.josef@toxicpanda.com --- include/linux/mm.h | 1 + mm/filemap.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/nommu.c | 7 ++++++ 3 files changed, 82 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index c39c4945946c..e6c3c9cbcfe5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3420,6 +3420,7 @@ extern vm_fault_t filemap_fault(struct vm_fault *vmf); extern vm_fault_t filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff); extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf); +extern vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf); extern unsigned long stack_guard_gap; /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ diff --git a/mm/filemap.c b/mm/filemap.c index e9a0f330d33e..6fdd5dc093c0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -3288,6 +3289,48 @@ static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) return ret; } +/** + * filemap_fsnotify_fault - maybe emit a pre-content event. + * @vmf: struct vm_fault containing details of the fault. + * + * If we have a pre-content watch on this file we will emit an event for this + * range. If we return anything the fault caller should return immediately, we + * will return VM_FAULT_RETRY if we had to emit an event, which will trigger the + * fault again and then the fault handler will run the second time through. + * + * Return: a bitwise-OR of %VM_FAULT_ codes, 0 if nothing happened. + */ +vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) +{ + struct file *fpin = NULL; + int mask = (vmf->flags & FAULT_FLAG_WRITE) ? MAY_WRITE : MAY_ACCESS; + loff_t pos = vmf->pgoff >> PAGE_SHIFT; + size_t count = PAGE_SIZE; + int err; + + /* + * We already did this and now we're retrying with everything locked, + * don't emit the event and continue. + */ + if (vmf->flags & FAULT_FLAG_TRIED) + return 0; + + /* No watches, we're done. */ + if (likely(!FMODE_FSNOTIFY_HSM(vmf->vma->vm_file->f_mode))) + return 0; + + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + if (!fpin) + return VM_FAULT_SIGBUS; + + err = fsnotify_file_area_perm(fpin, mask, &pos, count); + fput(fpin); + if (err) + return VM_FAULT_SIGBUS; + return VM_FAULT_RETRY; +} +EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); + /** * filemap_fault - read in file data for page fault handling * @vmf: struct vm_fault containing details of the fault @@ -3391,6 +3434,37 @@ retry_find: * or because readahead was otherwise unable to retrieve it. */ if (unlikely(!folio_test_uptodate(folio))) { + /* + * If this is a precontent file we have can now emit an event to + * try and populate the folio. + */ + if (!(vmf->flags & FAULT_FLAG_TRIED) && + unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { + loff_t pos = folio_pos(folio); + size_t count = folio_size(folio); + + /* We're NOWAIT, we have to retry. */ + if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT) { + folio_unlock(folio); + goto out_retry; + } + + if (mapping_locked) + filemap_invalidate_unlock_shared(mapping); + mapping_locked = false; + + folio_unlock(folio); + fpin = maybe_unlock_mmap_for_io(vmf, fpin); + if (!fpin) + goto out_retry; + + error = fsnotify_file_area_perm(fpin, MAY_ACCESS, &pos, + count); + if (error) + ret = VM_FAULT_SIGBUS; + goto out_retry; + } + /* * If the invalidate lock is not held, the folio was in cache * and uptodate and now it is not. Strange but possible since we diff --git a/mm/nommu.c b/mm/nommu.c index 9cb6e99215e2..baa79abdaf03 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1613,6 +1613,13 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); +vm_fault_t filemap_fsnotify_fault(struct vm_fault *vmf) +{ + BUG(); + return 0; +} +EXPORT_SYMBOL_GPL(filemap_fsnotify_fault); + vm_fault_t filemap_fault(struct vm_fault *vmf) { BUG(); -- cgit v1.2.3 From 0357ef03c94ef835bd44a0658b8edb672a9dbf51 Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Thu, 28 Nov 2024 15:25:32 +0100 Subject: fs: don't block write during exec on pre-content watched files Commit 2a010c412853 ("fs: don't block i_writecount during exec") removed the legacy behavior of getting ETXTBSY on attempt to open and executable file for write while it is being executed. This commit was reverted because an application that depends on this legacy behavior was broken by the change. We need to allow HSM writing into executable files while executed to fill their content on-the-fly. To that end, disable the ETXTBSY legacy behavior for files that are watched by pre-content events. This change is not expected to cause regressions with existing systems which do not have any pre-content event listeners. Signed-off-by: Amir Goldstein Acked-by: Christian Brauner Signed-off-by: Jan Kara Link: https://patch.msgid.link/20241128142532.465176-1-amir73il@gmail.com --- fs/binfmt_elf.c | 4 ++-- fs/binfmt_elf_fdpic.c | 4 ++-- fs/exec.c | 8 ++++---- include/linux/fs.h | 22 ++++++++++++++++++++++ kernel/fork.c | 12 ++++++------ 5 files changed, 36 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 106f0e8af177..8054f44d39cf 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1257,7 +1257,7 @@ out_free_interp: } reloc_func_desc = interp_load_addr; - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); kfree(interp_elf_ex); @@ -1354,7 +1354,7 @@ out_free_dentry: kfree(interp_elf_ex); kfree(interp_elf_phdata); out_free_file: - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); if (interpreter) fput(interpreter); out_free_ph: diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index f1a7c4875c4a..c13ee8180b17 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -394,7 +394,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) goto error; } - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); interpreter = NULL; } @@ -467,7 +467,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) error: if (interpreter) { - allow_write_access(interpreter); + exe_file_allow_write_access(interpreter); fput(interpreter); } kfree(interpreter_name); diff --git a/fs/exec.c b/fs/exec.c index 98cb7ba9983c..c41cfd35c74c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -912,7 +912,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) path_noexec(&file->f_path)) return ERR_PTR(-EACCES); - err = deny_write_access(file); + err = exe_file_deny_write_access(file); if (err) return ERR_PTR(err); @@ -927,7 +927,7 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) * Returns ERR_PTR on failure or allocated struct file on success. * * As this is a wrapper for the internal do_open_execat(), callers - * must call allow_write_access() before fput() on release. Also see + * must call exe_file_allow_write_access() before fput() on release. Also see * do_close_execat(). */ struct file *open_exec(const char *name) @@ -1471,7 +1471,7 @@ static void do_close_execat(struct file *file) { if (!file) return; - allow_write_access(file); + exe_file_allow_write_access(file); fput(file); } @@ -1797,7 +1797,7 @@ static int exec_binprm(struct linux_binprm *bprm) bprm->file = bprm->interpreter; bprm->interpreter = NULL; - allow_write_access(exec); + exe_file_allow_write_access(exec); if (unlikely(bprm->have_execfd)) { if (bprm->executable) { fput(exec); diff --git a/include/linux/fs.h b/include/linux/fs.h index 3f4d59464965..a1230c40fef1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3095,6 +3095,28 @@ static inline void allow_write_access(struct file *file) if (file) atomic_inc(&file_inode(file)->i_writecount); } + +/* + * Do not prevent write to executable file when watched by pre-content events. + * + * Note that FMODE_FSNOTIFY_HSM mode is set depending on pre-content watches at + * the time of file open and remains constant for entire lifetime of the file, + * so if pre-content watches are added post execution or removed before the end + * of the execution, it will not cause i_writecount reference leak. + */ +static inline int exe_file_deny_write_access(struct file *exe_file) +{ + if (unlikely(FMODE_FSNOTIFY_HSM(exe_file->f_mode))) + return 0; + return deny_write_access(exe_file); +} +static inline void exe_file_allow_write_access(struct file *exe_file) +{ + if (unlikely(!exe_file || FMODE_FSNOTIFY_HSM(exe_file->f_mode))) + return; + allow_write_access(exe_file); +} + static inline bool inode_is_open_for_write(const struct inode *inode) { return atomic_read(&inode->i_writecount) > 0; diff --git a/kernel/fork.c b/kernel/fork.c index 1450b461d196..015c397f47ca 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -625,8 +625,8 @@ static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm) * We depend on the oldmm having properly denied write access to the * exe_file already. */ - if (exe_file && deny_write_access(exe_file)) - pr_warn_once("deny_write_access() failed in %s\n", __func__); + if (exe_file && exe_file_deny_write_access(exe_file)) + pr_warn_once("exe_file_deny_write_access() failed in %s\n", __func__); } #ifdef CONFIG_MMU @@ -1424,13 +1424,13 @@ int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) * We expect the caller (i.e., sys_execve) to already denied * write access, so this is unlikely to fail. */ - if (unlikely(deny_write_access(new_exe_file))) + if (unlikely(exe_file_deny_write_access(new_exe_file))) return -EACCES; get_file(new_exe_file); } rcu_assign_pointer(mm->exe_file, new_exe_file); if (old_exe_file) { - allow_write_access(old_exe_file); + exe_file_allow_write_access(old_exe_file); fput(old_exe_file); } return 0; @@ -1471,7 +1471,7 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) return ret; } - ret = deny_write_access(new_exe_file); + ret = exe_file_deny_write_access(new_exe_file); if (ret) return -EACCES; get_file(new_exe_file); @@ -1483,7 +1483,7 @@ int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file) mmap_write_unlock(mm); if (old_exe_file) { - allow_write_access(old_exe_file); + exe_file_allow_write_access(old_exe_file); fput(old_exe_file); } return 0; -- cgit v1.2.3 From a87ef09b1fdf75fdc2d6b386ff23a35589173055 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 6 Dec 2024 18:28:36 +0100 Subject: iio: adc: ad_sigma_delta: Add support for reading irq status using a GPIO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of the ADCs by Analog signal their irq condition on the MISO line. So typically that line is connected to an SPI controller and a GPIO. The GPIO is used as input and the respective interrupt is enabled when the last SPI transfer is completed. Depending on the GPIO controller the toggling MISO line might make the interrupt pending even while it's masked. In that case the irq handler is called immediately after irq_enable() and so before the device actually pulls that line low which results in non-sense values being reported to the upper layers. The only way to find out if the line was actually pulled low is to read the GPIO. (There is a flag in AD7124's status register that also signals if an interrupt was asserted, but reading that register toggles the MISO line and so might trigger another spurious interrupt.) Add the possibility to specify an interrupt GPIO in the machine description in addition to the plain interrupt. This GPIO is used then to check if the irq line is actually active in the irq handler. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/5be9a4cc4dc600ec384c88db01dd661a21506b9c.1733504533.git.u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 38 +++++++++++++++++++++++++++++----- include/linux/iio/adc/ad_sigma_delta.h | 2 ++ 2 files changed, 35 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 3fd200b34161..8fe2ed8b30f9 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -539,12 +539,29 @@ static irqreturn_t ad_sd_data_rdy_trig_poll(int irq, void *private) { struct ad_sigma_delta *sigma_delta = private; - complete(&sigma_delta->completion); - disable_irq_nosync(irq); - sigma_delta->irq_dis = true; - iio_trigger_poll(sigma_delta->trig); + /* + * AD7124 and a few others use the same physical line for interrupt + * reporting (R̅D̅Y̅) and MISO. + * As MISO toggles when reading a register, this likely results in a + * pending interrupt. This has two consequences: a) The irq might + * trigger immediately after it's enabled even though the conversion + * isn't done yet; and b) checking the STATUS register's R̅D̅Y̅ flag is + * off-limits as reading that would trigger another irq event. + * + * So read the MOSI line as GPIO (if available) and only trigger the irq + * if the line is active. Without such a GPIO assume this is a valid + * interrupt. + */ + if (!sigma_delta->rdy_gpiod || gpiod_get_value(sigma_delta->rdy_gpiod)) { + complete(&sigma_delta->completion); + disable_irq_nosync(irq); + sigma_delta->irq_dis = true; + iio_trigger_poll(sigma_delta->trig); - return IRQ_HANDLED; + return IRQ_HANDLED; + } + + return IRQ_NONE; } /** @@ -679,6 +696,17 @@ int ad_sd_init(struct ad_sigma_delta *sigma_delta, struct iio_dev *indio_dev, else sigma_delta->irq_line = spi->irq; + sigma_delta->rdy_gpiod = devm_gpiod_get_optional(&spi->dev, "rdy", GPIOD_IN); + if (IS_ERR(sigma_delta->rdy_gpiod)) + return dev_err_probe(&spi->dev, PTR_ERR(sigma_delta->rdy_gpiod), + "Failed to find rdy gpio\n"); + + if (sigma_delta->rdy_gpiod && !sigma_delta->irq_line) { + sigma_delta->irq_line = gpiod_to_irq(sigma_delta->rdy_gpiod); + if (sigma_delta->irq_line < 0) + return sigma_delta->irq_line; + } + iio_device_set_drvdata(indio_dev, sigma_delta); return 0; diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 1851f8fed3a4..895b7ebf4be5 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -29,6 +29,7 @@ struct ad_sd_calib_data { struct ad_sigma_delta; struct device; +struct gpio_desc; struct iio_dev; /** @@ -96,6 +97,7 @@ struct ad_sigma_delta { unsigned int active_slots; unsigned int current_slot; unsigned int num_slots; + struct gpio_desc *rdy_gpiod; int irq_line; bool status_appended; /* map slots to channels in order to know what to expect from devices */ -- cgit v1.2.3 From f522589c139debb8af56dbead0c6e9dfca2d5ce4 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 6 Dec 2024 18:28:38 +0100 Subject: iio: adc: ad_sigma_delta: Fix a race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ad_sigma_delta driver helper uses irq_disable_nosync(). With that one it is possible that the irq handler still runs after the irq_disable_nosync() function call returns. Also to properly synchronize irq disabling in the different threads proper locking is needed and because it's unclear if the irq handler's irq_disable_nosync() call comes first or the one in the enabler's error path, all code locations that disable the irq must check for .irq_dis first to ensure there is exactly one disable call per enable call. So add a spinlock to the struct ad_sigma_delta and use it to synchronize irq enabling and disabling. Also only act in the irq handler if the irq is still enabled. Fixes: af3008485ea0 ("iio:adc: Add common code for ADI Sigma Delta devices") Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/9e6def47e2e773e0e15b7a2c29d22629b53d91b1.1733504533.git.u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad_sigma_delta.c | 56 +++++++++++++++++++++------------- include/linux/iio/adc/ad_sigma_delta.h | 1 + 2 files changed, 36 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 65608dc2bfec..950baf4160da 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -202,6 +202,27 @@ int ad_sd_reset(struct ad_sigma_delta *sigma_delta, } EXPORT_SYMBOL_NS_GPL(ad_sd_reset, "IIO_AD_SIGMA_DELTA"); +static bool ad_sd_disable_irq(struct ad_sigma_delta *sigma_delta) +{ + guard(spinlock_irqsave)(&sigma_delta->irq_lock); + + /* It's already off, return false to indicate nothing was changed */ + if (sigma_delta->irq_dis) + return false; + + sigma_delta->irq_dis = true; + disable_irq_nosync(sigma_delta->irq_line); + return true; +} + +static void ad_sd_enable_irq(struct ad_sigma_delta *sigma_delta) +{ + guard(spinlock_irqsave)(&sigma_delta->irq_lock); + + sigma_delta->irq_dis = false; + enable_irq(sigma_delta->irq_line); +} + int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, unsigned int mode, unsigned int channel) { @@ -221,12 +242,10 @@ int ad_sd_calibrate(struct ad_sigma_delta *sigma_delta, if (ret < 0) goto out; - sigma_delta->irq_dis = false; - enable_irq(sigma_delta->irq_line); + ad_sd_enable_irq(sigma_delta); time_left = wait_for_completion_timeout(&sigma_delta->completion, 2 * HZ); if (time_left == 0) { - sigma_delta->irq_dis = true; - disable_irq_nosync(sigma_delta->irq_line); + ad_sd_disable_irq(sigma_delta); ret = -EIO; } else { ret = 0; @@ -294,8 +313,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_SINGLE); - sigma_delta->irq_dis = false; - enable_irq(sigma_delta->irq_line); + ad_sd_enable_irq(sigma_delta); ret = wait_for_completion_interruptible_timeout( &sigma_delta->completion, HZ); @@ -314,10 +332,7 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, &raw_sample); out: - if (!sigma_delta->irq_dis) { - disable_irq_nosync(sigma_delta->irq_line); - sigma_delta->irq_dis = true; - } + ad_sd_disable_irq(sigma_delta); sigma_delta->keep_cs_asserted = false; ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); @@ -396,8 +411,7 @@ static int ad_sd_buffer_postenable(struct iio_dev *indio_dev) if (ret) goto err_unlock; - sigma_delta->irq_dis = false; - enable_irq(sigma_delta->irq_line); + ad_sd_enable_irq(sigma_delta); return 0; @@ -414,10 +428,7 @@ static int ad_sd_buffer_postdisable(struct iio_dev *indio_dev) reinit_completion(&sigma_delta->completion); wait_for_completion_timeout(&sigma_delta->completion, HZ); - if (!sigma_delta->irq_dis) { - disable_irq_nosync(sigma_delta->irq_line); - sigma_delta->irq_dis = true; - } + ad_sd_disable_irq(sigma_delta); sigma_delta->keep_cs_asserted = false; ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE); @@ -516,8 +527,7 @@ static irqreturn_t ad_sd_trigger_handler(int irq, void *p) irq_handled: iio_trigger_notify_done(indio_dev->trig); - sigma_delta->irq_dis = false; - enable_irq(sigma_delta->irq_line); + ad_sd_enable_irq(sigma_delta); return IRQ_HANDLED; } @@ -551,11 +561,13 @@ static irqreturn_t ad_sd_data_rdy_trig_poll(int irq, void *private) * So read the MOSI line as GPIO (if available) and only trigger the irq * if the line is active. Without such a GPIO assume this is a valid * interrupt. + * + * Also as disable_irq_nosync() is used to disable the irq, only act if + * the irq wasn't disabled before. */ - if (!sigma_delta->rdy_gpiod || gpiod_get_value(sigma_delta->rdy_gpiod)) { + if ((!sigma_delta->rdy_gpiod || gpiod_get_value(sigma_delta->rdy_gpiod)) && + ad_sd_disable_irq(sigma_delta)) { complete(&sigma_delta->completion); - disable_irq_nosync(irq); - sigma_delta->irq_dis = true; iio_trigger_poll(sigma_delta->trig); return IRQ_HANDLED; @@ -691,6 +703,8 @@ int ad_sd_init(struct ad_sigma_delta *sigma_delta, struct iio_dev *indio_dev, } } + spin_lock_init(&sigma_delta->irq_lock); + if (info->irq_line) sigma_delta->irq_line = info->irq_line; else diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 895b7ebf4be5..200130e4244d 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -86,6 +86,7 @@ struct ad_sigma_delta { /* private: */ struct completion completion; + spinlock_t irq_lock; /* protects .irq_dis and irq en/disable state */ bool irq_dis; bool bus_locked; -- cgit v1.2.3 From 07a28874bb49700036a3ab435dd95ae31afd21ae Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Fri, 6 Dec 2024 18:28:39 +0100 Subject: iio: adc: ad_sigma_delta: Store information about reset sequence length MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The various chips can be reset using a sequence of SPI transfers with MOSI = 1. The length of such a sequence varies from chip to chip. Store that length in struct ad_sigma_delta_info and replace the respective parameter to ad_sd_reset() with it. Note the ad7192 used to pass 48 as length but the documentation specifies 40 as the required length. Assuming the latter is right. (Using a too long sequence doesn't hurt apart from using a longer spi transfer than necessary, so this is no relevant fix.) The motivation for storing this information is that this is useful to clear a pending R̅D̅Y̅ signal in the next change. Signed-off-by: Uwe Kleine-König Link: https://patch.msgid.link/9750db62fce638bf140ff48172c23bff7f785e5b.1733504533.git.u.kleine-koenig@baylibre.com Signed-off-by: Jonathan Cameron --- drivers/iio/adc/ad7124.c | 3 ++- drivers/iio/adc/ad7173.c | 1 + drivers/iio/adc/ad7192.c | 4 +++- drivers/iio/adc/ad7791.c | 1 + drivers/iio/adc/ad7793.c | 3 ++- drivers/iio/adc/ad_sigma_delta.c | 5 ++--- include/linux/iio/adc/ad_sigma_delta.h | 5 +++-- 7 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index b7491198e33c..af74bea18a69 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -571,6 +571,7 @@ static const struct ad_sigma_delta_info ad7124_sigma_delta_info = { .data_reg = AD7124_DATA, .num_slots = 8, .irq_flags = IRQF_TRIGGER_FALLING, + .num_resetclks = 64, }; static int ad7124_read_raw(struct iio_dev *indio_dev, @@ -756,7 +757,7 @@ static int ad7124_soft_reset(struct ad7124_state *st) unsigned int readval, timeout; int ret; - ret = ad_sd_reset(&st->sd, 64); + ret = ad_sd_reset(&st->sd); if (ret < 0) return ret; diff --git a/drivers/iio/adc/ad7173.c b/drivers/iio/adc/ad7173.c index c5ac4b7e7c2c..d48b5d98207e 100644 --- a/drivers/iio/adc/ad7173.c +++ b/drivers/iio/adc/ad7173.c @@ -763,6 +763,7 @@ static struct ad_sigma_delta_info ad7173_sigma_delta_info = { .read_mask = BIT(6), .status_ch_mask = GENMASK(3, 0), .data_reg = AD7173_REG_DATA, + .num_resetclks = 64, }; static int ad7173_setup(struct iio_dev *indio_dev) diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 1c87db0e0460..e96a5ae92375 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -361,6 +361,7 @@ static const struct ad_sigma_delta_info ad7192_sigma_delta_info = { .status_ch_mask = GENMASK(3, 0), .num_slots = 4, .irq_flags = IRQF_TRIGGER_FALLING, + .num_resetclks = 40, }; static const struct ad_sigma_delta_info ad7194_sigma_delta_info = { @@ -373,6 +374,7 @@ static const struct ad_sigma_delta_info ad7194_sigma_delta_info = { .read_mask = BIT(6), .status_ch_mask = GENMASK(3, 0), .irq_flags = IRQF_TRIGGER_FALLING, + .num_resetclks = 40, }; static const struct ad_sd_calib_data ad7192_calib_arr[8] = { @@ -565,7 +567,7 @@ static int ad7192_setup(struct iio_dev *indio_dev, struct device *dev) int i, ret, id; /* reset the serial interface */ - ret = ad_sd_reset(&st->sd, 48); + ret = ad_sd_reset(&st->sd); if (ret < 0) return ret; usleep_range(500, 1000); /* Wait for at least 500us */ diff --git a/drivers/iio/adc/ad7791.c b/drivers/iio/adc/ad7791.c index e1bf13fe2cd7..76118fe22db8 100644 --- a/drivers/iio/adc/ad7791.c +++ b/drivers/iio/adc/ad7791.c @@ -254,6 +254,7 @@ static const struct ad_sigma_delta_info ad7791_sigma_delta_info = { .addr_shift = 4, .read_mask = BIT(3), .irq_flags = IRQF_TRIGGER_FALLING, + .num_resetclks = 32, }; static int ad7791_read_raw(struct iio_dev *indio_dev, diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index d55c71566707..1b50d9643a63 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -206,6 +206,7 @@ static const struct ad_sigma_delta_info ad7793_sigma_delta_info = { .addr_shift = 3, .read_mask = BIT(6), .irq_flags = IRQF_TRIGGER_FALLING, + .num_resetclks = 32, }; static const struct ad_sd_calib_data ad7793_calib_arr[6] = { @@ -265,7 +266,7 @@ static int ad7793_setup(struct iio_dev *indio_dev, return ret; /* reset the serial interface */ - ret = ad_sd_reset(&st->sd, 32); + ret = ad_sd_reset(&st->sd); if (ret < 0) goto out; usleep_range(500, 2000); /* Wait for at least 500us */ diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c index 950baf4160da..c290d07ab1c5 100644 --- a/drivers/iio/adc/ad_sigma_delta.c +++ b/drivers/iio/adc/ad_sigma_delta.c @@ -178,13 +178,12 @@ EXPORT_SYMBOL_NS_GPL(ad_sd_read_reg, "IIO_AD_SIGMA_DELTA"); * ad_sd_reset() - Reset the serial interface * * @sigma_delta: The sigma delta device - * @reset_length: Number of SCLKs with DIN = 1 * * Returns 0 on success, an error code otherwise. **/ -int ad_sd_reset(struct ad_sigma_delta *sigma_delta, - unsigned int reset_length) +int ad_sd_reset(struct ad_sigma_delta *sigma_delta) { + unsigned int reset_length = sigma_delta->info->num_resetclks; uint8_t *buf; unsigned int size; int ret; diff --git a/include/linux/iio/adc/ad_sigma_delta.h b/include/linux/iio/adc/ad_sigma_delta.h index 200130e4244d..417073c52380 100644 --- a/include/linux/iio/adc/ad_sigma_delta.h +++ b/include/linux/iio/adc/ad_sigma_delta.h @@ -54,6 +54,7 @@ struct iio_dev; * @irq_flags: flags for the interrupt used by the triggered buffer * @num_slots: Number of sequencer slots * @irq_line: IRQ for reading conversions. If 0, spi->irq will be used + * @num_resetclks: Number of SPI clk cycles with MOSI=1 to reset the chip. */ struct ad_sigma_delta_info { int (*set_channel)(struct ad_sigma_delta *, unsigned int channel); @@ -70,6 +71,7 @@ struct ad_sigma_delta_info { unsigned long irq_flags; unsigned int num_slots; int irq_line; + unsigned int num_resetclks; }; /** @@ -181,8 +183,7 @@ int ad_sd_write_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, int ad_sd_read_reg(struct ad_sigma_delta *sigma_delta, unsigned int reg, unsigned int size, unsigned int *val); -int ad_sd_reset(struct ad_sigma_delta *sigma_delta, - unsigned int reset_length); +int ad_sd_reset(struct ad_sigma_delta *sigma_delta); int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev, const struct iio_chan_spec *chan, int *val); -- cgit v1.2.3 From 22ccb0a1c57c436de899ccd3170d6d2ce7238836 Mon Sep 17 00:00:00 2001 From: Matteo Martelli Date: Mon, 2 Dec 2024 16:11:07 +0100 Subject: iio: consumers: ensure read buffers for labels and ext_info are page aligned Attributes of iio providers are exposed via sysfs. Typically, providers pass attribute values to the iio core, which handles formatting and printing to sysfs. However, some attributes, such as labels or extended info, are directly formatted and printed to sysfs by provider drivers using sysfs_emit() and sysfs_emit_at(). These helpers assume the read buffer, allocated by sysfs fop, is page-aligned. When these attributes are accessed by consumer drivers, the read buffer is allocated by the consumer and may not be page-aligned, leading to failures in the provider's callback that utilizes sysfs_emit*. Add a check to ensure that read buffers for labels and external info attributes are page-aligned. Update the prototype documentation as well. Signed-off-by: Matteo Martelli Link: https://patch.msgid.link/20241202-iio-kmalloc-align-v1-1-aa9568c03937@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/inkern.c | 11 +++++++++++ include/linux/iio/consumer.h | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c index 136b225b6bc8..520743fde103 100644 --- a/drivers/iio/inkern.c +++ b/drivers/iio/inkern.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -989,6 +990,11 @@ ssize_t iio_read_channel_ext_info(struct iio_channel *chan, { const struct iio_chan_spec_ext_info *ext_info; + if (!buf || offset_in_page(buf)) { + pr_err("iio: invalid ext_info read buffer\n"); + return -EINVAL; + } + ext_info = iio_lookup_ext_info(chan, attr); if (!ext_info) return -EINVAL; @@ -1014,6 +1020,11 @@ EXPORT_SYMBOL_GPL(iio_write_channel_ext_info); ssize_t iio_read_channel_label(struct iio_channel *chan, char *buf) { + if (!buf || offset_in_page(buf)) { + pr_err("iio: invalid label read buffer\n"); + return -EINVAL; + } + return do_iio_read_channel_label(chan->indio_dev, chan->channel, buf); } EXPORT_SYMBOL_GPL(iio_read_channel_label); diff --git a/include/linux/iio/consumer.h b/include/linux/iio/consumer.h index 333d1d8ccb37..6a4479616479 100644 --- a/include/linux/iio/consumer.h +++ b/include/linux/iio/consumer.h @@ -418,7 +418,7 @@ unsigned int iio_get_channel_ext_info_count(struct iio_channel *chan); * @chan: The channel being queried. * @attr: The ext_info attribute to read. * @buf: Where to store the attribute value. Assumed to hold - * at least PAGE_SIZE bytes. + * at least PAGE_SIZE bytes and to be aligned at PAGE_SIZE. * * Returns the number of bytes written to buf (perhaps w/o zero termination; * it need not even be a string), or an error code. @@ -445,7 +445,7 @@ ssize_t iio_write_channel_ext_info(struct iio_channel *chan, const char *attr, * iio_read_channel_label() - read label for a given channel * @chan: The channel being queried. * @buf: Where to store the attribute value. Assumed to hold - * at least PAGE_SIZE bytes. + * at least PAGE_SIZE bytes and to be aligned at PAGE_SIZE. * * Returns the number of bytes written to buf, or an error code. */ -- cgit v1.2.3 From bad6722e478f5b17a5ceb039dfb4c680cf2c0b48 Mon Sep 17 00:00:00 2001 From: Eliav Farber Date: Wed, 4 Dec 2024 14:20:02 +0000 Subject: kexec: Consolidate machine_kexec_mask_interrupts() implementation Consolidate the machine_kexec_mask_interrupts implementation into a common function located in a new file: kernel/irq/kexec.c. This removes duplicate implementations from architecture-specific files in arch/arm, arch/arm64, arch/powerpc, and arch/riscv, reducing code duplication and improving maintainability. The new implementation retains architecture-specific behavior for CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD, which was previously implemented for ARM64. When enabled (currently for ARM64), it clears the active state of interrupts forwarded to virtual machines (VMs) before handling other interrupt masking operations. Signed-off-by: Eliav Farber Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20241204142003.32859-2-farbere@amazon.com --- arch/arm/kernel/machine_kexec.c | 23 ---------------------- arch/arm64/Kconfig | 1 + arch/arm64/kernel/machine_kexec.c | 31 ------------------------------ arch/powerpc/include/asm/kexec.h | 1 - arch/powerpc/kexec/core.c | 22 --------------------- arch/powerpc/kexec/core_32.c | 1 + arch/riscv/kernel/machine_kexec.c | 23 ---------------------- include/linux/irq.h | 3 +++ kernel/irq/Kconfig | 6 ++++++ kernel/irq/Makefile | 2 +- kernel/irq/kexec.c | 40 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 52 insertions(+), 101 deletions(-) create mode 100644 kernel/irq/kexec.c (limited to 'include') diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 80ceb5bd2680..dd430477e7c1 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -127,29 +127,6 @@ void crash_smp_send_stop(void) cpus_stopped = 1; } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - void machine_crash_shutdown(struct pt_regs *regs) { local_irq_disable(); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 100570a048c5..dcc3551cf6c2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -149,6 +149,7 @@ config ARM64 select GENERIC_IDLE_POLL_SETUP select GENERIC_IOREMAP select GENERIC_IRQ_IPI + select GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL diff --git a/arch/arm64/kernel/machine_kexec.c b/arch/arm64/kernel/machine_kexec.c index 82e2203d86a3..6f121a0164a4 100644 --- a/arch/arm64/kernel/machine_kexec.c +++ b/arch/arm64/kernel/machine_kexec.c @@ -207,37 +207,6 @@ void machine_kexec(struct kimage *kimage) BUG(); /* Should never get here. */ } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - int ret; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - /* - * First try to remove the active state. If this - * fails, try to EOI the interrupt. - */ - ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); - - if (ret && irqd_irq_inprogress(&desc->irq_data) && - chip->irq_eoi) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /** * machine_crash_shutdown - shutdown non-crashing cpus and save registers */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 270ee93a0f7d..601e569303e1 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -61,7 +61,6 @@ struct pt_regs; extern void kexec_smp_wait(void); /* get and clear naca physid, wait for master to copy new code to 0 */ extern void default_machine_kexec(struct kimage *image); -extern void machine_kexec_mask_interrupts(void); void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_code_buffer, unsigned long start_address) __noreturn; diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c index b8333a49ea5d..58a930a47422 100644 --- a/arch/powerpc/kexec/core.c +++ b/arch/powerpc/kexec/core.c @@ -22,28 +22,6 @@ #include #include -void machine_kexec_mask_interrupts(void) { - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - #ifdef CONFIG_CRASH_DUMP void machine_crash_shutdown(struct pt_regs *regs) { diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c index c95f96850c9e..deb28eb44f30 100644 --- a/arch/powerpc/kexec/core_32.c +++ b/arch/powerpc/kexec/core_32.c @@ -7,6 +7,7 @@ * Copyright (C) 2005 IBM Corporation. */ +#include #include #include #include diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index 3c830a6f7ef4..2306ce3e5f22 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -114,29 +114,6 @@ void machine_shutdown(void) #endif } -static void machine_kexec_mask_interrupts(void) -{ - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * diff --git a/include/linux/irq.h b/include/linux/irq.h index fa711f80957b..25f51bf3c351 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -694,6 +694,9 @@ extern int irq_chip_request_resources_parent(struct irq_data *data); extern void irq_chip_release_resources_parent(struct irq_data *data); #endif +/* Disable or mask interrupts during a kernel kexec */ +extern void machine_kexec_mask_interrupts(void); + /* Handling of unhandled and spurious interrupts: */ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 529adb1f5859..875f25ed6f71 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -141,6 +141,12 @@ config GENERIC_IRQ_DEBUGFS If you don't know what to do here, say N. +# Clear forwarded VM interrupts during kexec. +# This option ensures the kernel clears active states for interrupts +# forwarded to virtual machines (VMs) during a machine kexec. +config GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD + bool + endmenu config GENERIC_IRQ_MULTI_HANDLER diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index f19d3080bf11..c0f44c06d69d 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o +obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o kexec.o obj-$(CONFIG_IRQ_TIMINGS) += timings.o ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) CFLAGS_timings.o += -DDEBUG diff --git a/kernel/irq/kexec.c b/kernel/irq/kexec.c new file mode 100644 index 000000000000..0f9548c1708d --- /dev/null +++ b/kernel/irq/kexec.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#include "internals.h" + +void machine_kexec_mask_interrupts(void) +{ + struct irq_desc *desc; + unsigned int i; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int check_eoi = 1; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (IS_ENABLED(CONFIG_GENERIC_IRQ_KEXEC_CLEAR_VM_FORWARD)) { + /* + * First try to remove the active state from an interrupt which is forwarded + * to a VM. If the interrupt is not forwarded, try to EOI the interrupt. + */ + check_eoi = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + } + + if (check_eoi && chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} -- cgit v1.2.3 From 41d7ea30494cc0dde3e124a75ce0add93f988ba9 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 10 Dec 2024 12:27:12 -0800 Subject: lib: packing: add pack_fields() and unpack_fields() This is new API which caters to the following requirements: - Pack or unpack a large number of fields to/from a buffer with a small code footprint. The current alternative is to open-code a large number of calls to pack() and unpack(), or to use packing() to reduce that number to half. But packing() is not const-correct. - Use unpacked numbers stored in variables smaller than u64. This reduces the rodata footprint of the stored field arrays. - Perform error checking at compile time, rather than runtime, and return void from the API functions. Because the C preprocessor can't generate variable length code (loops), this is a bit tricky to do with macros. To handle this, implement macros which sanity check the packed field definitions based on their size. Finally, a single macro with a chain of __builtin_choose_expr() is used to select the appropriate macros. We enforce the use of ascending or descending order to avoid O(N^2) scaling when checking for overlap. Note that the macros are written with care to ensure that the compilers can correctly evaluate the resulting code at compile time. In particular, care was taken with avoiding too many nested statement expressions. Nested statement expressions trip up some compilers, especially when passing down variables created in previous statement expressions. There are two key design choices intended to keep the overall macro code size small. First, the definition of each CHECK_PACKED_FIELDS_N macro is implemented recursively, by calling the N-1 macro. This avoids needing the code to repeat multiple times. Second, the CHECK_PACKED_FIELD macro enforces that the fields in the array are sorted in order. This allows checking for overlap only with neighboring fields, rather than the general overlap case where each field would need to be checked against other fields. The overlap checks use the first two fields to determine the order of the remaining fields, thus allowing either ascending or descending order. This enables drivers the flexibility to keep the fields ordered in which ever order most naturally fits their hardware design and its associated documentation. The CHECK_PACKED_FIELDS macro is directly called from within pack_fields and unpack_fields, ensuring that all drivers using the API receive the benefits of the compile-time checks. Users do not need to directly call any of the macros directly. The CHECK_PACKED_FIELDS and its helper macros CHECK_PACKED_FIELDS_(0..50) are generated using a simple C program in scripts/gen_packed_field_checks.c This program can be compiled on demand and executed to generate the macro code in include/linux/packing.h. This will aid in the event that a driver needs more than 50 fields. The generator can be updated with a new size, and used to update the packing.h header file. In practice, the ice driver will need to support 27 fields, and the sja1105 driver will need to support 0 fields. This on-demand generation avoids the need to modify Kbuild. We do not anticipate the maximum number of fields to grow very often. - Reduced rodata footprint for the storage of the packed field arrays. To that end, we have struct packed_field_u8 and packed_field_u16, which define the fields with the associated type. More can be added as needed (unlikely for now). On these types, the same generic pack_fields() and unpack_fields() API can be used, thanks to the new C11 _Generic() selection feature, which can call pack_fields_u8() or pack_fields_16(), depending on the type of the "fields" array - a simplistic form of polymorphism. It is evaluated at compile time which function will actually be called. Over time, packing() is expected to be completely replaced either with pack() or with pack_fields(). Signed-off-by: Vladimir Oltean Co-developed-by: Jacob Keller Signed-off-by: Jacob Keller Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/20241210-packing-pack-fields-and-ice-implementation-v10-3-ee56a47479ac@intel.com Signed-off-by: Jakub Kicinski --- MAINTAINERS | 1 + Makefile | 4 + include/linux/packing.h | 425 ++++++++++++++++++++++++++++++++++++++ lib/packing.c | 153 ++++++++++++++ lib/packing_test.c | 61 ++++++ scripts/.gitignore | 1 + scripts/Makefile | 2 +- scripts/gen_packed_field_checks.c | 37 ++++ 8 files changed, 683 insertions(+), 1 deletion(-) create mode 100644 scripts/gen_packed_field_checks.c (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index af35519be320..15cf366c0aec 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -17644,6 +17644,7 @@ F: Documentation/core-api/packing.rst F: include/linux/packing.h F: lib/packing.c F: lib/packing_test.c +F: scripts/gen_packed_field_checks.c PADATA PARALLEL EXECUTION MECHANISM M: Steffen Klassert diff --git a/Makefile b/Makefile index 93ab62cef244..9a9fd5504ae8 100644 --- a/Makefile +++ b/Makefile @@ -1367,6 +1367,10 @@ PHONY += scripts_unifdef scripts_unifdef: scripts_basic $(Q)$(MAKE) $(build)=scripts scripts/unifdef +PHONY += scripts_gen_packed_field_checks +scripts_gen_packed_field_checks: scripts_basic + $(Q)$(MAKE) $(build)=scripts scripts/gen_packed_field_checks + # --------------------------------------------------------------------------- # Install diff --git a/include/linux/packing.h b/include/linux/packing.h index 5d36dcd06f60..0589d70bbe04 100644 --- a/include/linux/packing.h +++ b/include/linux/packing.h @@ -8,6 +8,83 @@ #include #include +#define GEN_PACKED_FIELD_STRUCT(__type) \ + struct packed_field_ ## __type { \ + __type startbit; \ + __type endbit; \ + __type offset; \ + __type size; \ + } + +/* struct packed_field_u8. Use with bit offsets < 256, buffers < 32B and + * unpacked structures < 256B. + */ +GEN_PACKED_FIELD_STRUCT(u8); + +/* struct packed_field_u16. Use with bit offsets < 65536, buffers < 8KB and + * unpacked structures < 64KB. + */ +GEN_PACKED_FIELD_STRUCT(u16); + +#define PACKED_FIELD(start, end, struct_name, struct_field) \ +{ \ + (start), \ + (end), \ + offsetof(struct_name, struct_field), \ + sizeof_field(struct_name, struct_field), \ +} + +#define CHECK_PACKED_FIELD_OVERLAP(fields, index1, index2) ({ \ + typeof(&(fields)[0]) __f = (fields); \ + typeof(__f[0]) _f1 = __f[index1]; typeof(__f[0]) _f2 = __f[index2]; \ + const bool _ascending = __f[0].startbit < __f[1].startbit; \ + BUILD_BUG_ON_MSG(_ascending && _f1.startbit >= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks ascending order"); \ + BUILD_BUG_ON_MSG(!_ascending && _f1.startbit <= _f2.startbit, \ + __stringify(fields) " field " __stringify(index2) \ + " breaks descending order"); \ + BUILD_BUG_ON_MSG(max(_f1.endbit, _f2.endbit) <= \ + min(_f1.startbit, _f2.startbit), \ + __stringify(fields) " field " __stringify(index2) \ + " overlaps with previous field"); \ +}) + +#define CHECK_PACKED_FIELD(fields, index) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(_f[0]) __f = _f[index]; \ + BUILD_BUG_ON_MSG(__f.startbit < __f.endbit, \ + __stringify(fields) " field " __stringify(index) \ + " start bit must not be smaller than end bit"); \ + BUILD_BUG_ON_MSG(__f.size != 1 && __f.size != 2 && \ + __f.size != 4 && __f.size != 8, \ + __stringify(fields) " field " __stringify(index) \ + " has unsupported unpacked storage size"); \ + BUILD_BUG_ON_MSG(__f.startbit - __f.endbit >= BITS_PER_BYTE * __f.size, \ + __stringify(fields) " field " __stringify(index) \ + " exceeds unpacked storage size"); \ + __builtin_choose_expr(index != 0, \ + CHECK_PACKED_FIELD_OVERLAP(fields, index - 1, index), \ + 1); \ +}) + +/* Note that the packed fields may be either in ascending or descending order. + * Thus, we must check that both the first and last field wit within the + * packed buffer size. + */ +#define CHECK_PACKED_FIELDS_SIZE(fields, pbuflen) ({ \ + typeof(&(fields)[0]) _f = (fields); \ + typeof(pbuflen) _len = (pbuflen); \ + const size_t num_fields = ARRAY_SIZE(fields); \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_len), \ + __stringify(fields) " pbuflen " __stringify(pbuflen) \ + " must be a compile time constant"); \ + BUILD_BUG_ON_MSG(_f[0].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " first field exceeds packed buffer size"); \ + BUILD_BUG_ON_MSG(_f[num_fields - 1].startbit >= BITS_PER_BYTE * _len, \ + __stringify(fields) " last field exceeds packed buffer size"); \ +}) + #define QUIRK_MSB_ON_THE_RIGHT BIT(0) #define QUIRK_LITTLE_ENDIAN BIT(1) #define QUIRK_LSW32_IS_FIRST BIT(2) @@ -26,4 +103,352 @@ int pack(void *pbuf, u64 uval, size_t startbit, size_t endbit, size_t pbuflen, int unpack(const void *pbuf, u64 *uval, size_t startbit, size_t endbit, size_t pbuflen, u8 quirks); +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks); + +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks); + +/* Do not hand-edit the following packed field check macros! + * + * They are generated using scripts/gen_packed_field_checks.c, which may be + * built via "make scripts_gen_packed_field_checks". If larger macro sizes are + * needed in the future, please use this program to re-generate the macros and + * insert them here. + */ + +#define CHECK_PACKED_FIELDS_1(fields) \ + CHECK_PACKED_FIELD(fields, 0) + +#define CHECK_PACKED_FIELDS_2(fields) do { \ + CHECK_PACKED_FIELDS_1(fields); \ + CHECK_PACKED_FIELD(fields, 1); \ +} while (0) + +#define CHECK_PACKED_FIELDS_3(fields) do { \ + CHECK_PACKED_FIELDS_2(fields); \ + CHECK_PACKED_FIELD(fields, 2); \ +} while (0) + +#define CHECK_PACKED_FIELDS_4(fields) do { \ + CHECK_PACKED_FIELDS_3(fields); \ + CHECK_PACKED_FIELD(fields, 3); \ +} while (0) + +#define CHECK_PACKED_FIELDS_5(fields) do { \ + CHECK_PACKED_FIELDS_4(fields); \ + CHECK_PACKED_FIELD(fields, 4); \ +} while (0) + +#define CHECK_PACKED_FIELDS_6(fields) do { \ + CHECK_PACKED_FIELDS_5(fields); \ + CHECK_PACKED_FIELD(fields, 5); \ +} while (0) + +#define CHECK_PACKED_FIELDS_7(fields) do { \ + CHECK_PACKED_FIELDS_6(fields); \ + CHECK_PACKED_FIELD(fields, 6); \ +} while (0) + +#define CHECK_PACKED_FIELDS_8(fields) do { \ + CHECK_PACKED_FIELDS_7(fields); \ + CHECK_PACKED_FIELD(fields, 7); \ +} while (0) + +#define CHECK_PACKED_FIELDS_9(fields) do { \ + CHECK_PACKED_FIELDS_8(fields); \ + CHECK_PACKED_FIELD(fields, 8); \ +} while (0) + +#define CHECK_PACKED_FIELDS_10(fields) do { \ + CHECK_PACKED_FIELDS_9(fields); \ + CHECK_PACKED_FIELD(fields, 9); \ +} while (0) + +#define CHECK_PACKED_FIELDS_11(fields) do { \ + CHECK_PACKED_FIELDS_10(fields); \ + CHECK_PACKED_FIELD(fields, 10); \ +} while (0) + +#define CHECK_PACKED_FIELDS_12(fields) do { \ + CHECK_PACKED_FIELDS_11(fields); \ + CHECK_PACKED_FIELD(fields, 11); \ +} while (0) + +#define CHECK_PACKED_FIELDS_13(fields) do { \ + CHECK_PACKED_FIELDS_12(fields); \ + CHECK_PACKED_FIELD(fields, 12); \ +} while (0) + +#define CHECK_PACKED_FIELDS_14(fields) do { \ + CHECK_PACKED_FIELDS_13(fields); \ + CHECK_PACKED_FIELD(fields, 13); \ +} while (0) + +#define CHECK_PACKED_FIELDS_15(fields) do { \ + CHECK_PACKED_FIELDS_14(fields); \ + CHECK_PACKED_FIELD(fields, 14); \ +} while (0) + +#define CHECK_PACKED_FIELDS_16(fields) do { \ + CHECK_PACKED_FIELDS_15(fields); \ + CHECK_PACKED_FIELD(fields, 15); \ +} while (0) + +#define CHECK_PACKED_FIELDS_17(fields) do { \ + CHECK_PACKED_FIELDS_16(fields); \ + CHECK_PACKED_FIELD(fields, 16); \ +} while (0) + +#define CHECK_PACKED_FIELDS_18(fields) do { \ + CHECK_PACKED_FIELDS_17(fields); \ + CHECK_PACKED_FIELD(fields, 17); \ +} while (0) + +#define CHECK_PACKED_FIELDS_19(fields) do { \ + CHECK_PACKED_FIELDS_18(fields); \ + CHECK_PACKED_FIELD(fields, 18); \ +} while (0) + +#define CHECK_PACKED_FIELDS_20(fields) do { \ + CHECK_PACKED_FIELDS_19(fields); \ + CHECK_PACKED_FIELD(fields, 19); \ +} while (0) + +#define CHECK_PACKED_FIELDS_21(fields) do { \ + CHECK_PACKED_FIELDS_20(fields); \ + CHECK_PACKED_FIELD(fields, 20); \ +} while (0) + +#define CHECK_PACKED_FIELDS_22(fields) do { \ + CHECK_PACKED_FIELDS_21(fields); \ + CHECK_PACKED_FIELD(fields, 21); \ +} while (0) + +#define CHECK_PACKED_FIELDS_23(fields) do { \ + CHECK_PACKED_FIELDS_22(fields); \ + CHECK_PACKED_FIELD(fields, 22); \ +} while (0) + +#define CHECK_PACKED_FIELDS_24(fields) do { \ + CHECK_PACKED_FIELDS_23(fields); \ + CHECK_PACKED_FIELD(fields, 23); \ +} while (0) + +#define CHECK_PACKED_FIELDS_25(fields) do { \ + CHECK_PACKED_FIELDS_24(fields); \ + CHECK_PACKED_FIELD(fields, 24); \ +} while (0) + +#define CHECK_PACKED_FIELDS_26(fields) do { \ + CHECK_PACKED_FIELDS_25(fields); \ + CHECK_PACKED_FIELD(fields, 25); \ +} while (0) + +#define CHECK_PACKED_FIELDS_27(fields) do { \ + CHECK_PACKED_FIELDS_26(fields); \ + CHECK_PACKED_FIELD(fields, 26); \ +} while (0) + +#define CHECK_PACKED_FIELDS_28(fields) do { \ + CHECK_PACKED_FIELDS_27(fields); \ + CHECK_PACKED_FIELD(fields, 27); \ +} while (0) + +#define CHECK_PACKED_FIELDS_29(fields) do { \ + CHECK_PACKED_FIELDS_28(fields); \ + CHECK_PACKED_FIELD(fields, 28); \ +} while (0) + +#define CHECK_PACKED_FIELDS_30(fields) do { \ + CHECK_PACKED_FIELDS_29(fields); \ + CHECK_PACKED_FIELD(fields, 29); \ +} while (0) + +#define CHECK_PACKED_FIELDS_31(fields) do { \ + CHECK_PACKED_FIELDS_30(fields); \ + CHECK_PACKED_FIELD(fields, 30); \ +} while (0) + +#define CHECK_PACKED_FIELDS_32(fields) do { \ + CHECK_PACKED_FIELDS_31(fields); \ + CHECK_PACKED_FIELD(fields, 31); \ +} while (0) + +#define CHECK_PACKED_FIELDS_33(fields) do { \ + CHECK_PACKED_FIELDS_32(fields); \ + CHECK_PACKED_FIELD(fields, 32); \ +} while (0) + +#define CHECK_PACKED_FIELDS_34(fields) do { \ + CHECK_PACKED_FIELDS_33(fields); \ + CHECK_PACKED_FIELD(fields, 33); \ +} while (0) + +#define CHECK_PACKED_FIELDS_35(fields) do { \ + CHECK_PACKED_FIELDS_34(fields); \ + CHECK_PACKED_FIELD(fields, 34); \ +} while (0) + +#define CHECK_PACKED_FIELDS_36(fields) do { \ + CHECK_PACKED_FIELDS_35(fields); \ + CHECK_PACKED_FIELD(fields, 35); \ +} while (0) + +#define CHECK_PACKED_FIELDS_37(fields) do { \ + CHECK_PACKED_FIELDS_36(fields); \ + CHECK_PACKED_FIELD(fields, 36); \ +} while (0) + +#define CHECK_PACKED_FIELDS_38(fields) do { \ + CHECK_PACKED_FIELDS_37(fields); \ + CHECK_PACKED_FIELD(fields, 37); \ +} while (0) + +#define CHECK_PACKED_FIELDS_39(fields) do { \ + CHECK_PACKED_FIELDS_38(fields); \ + CHECK_PACKED_FIELD(fields, 38); \ +} while (0) + +#define CHECK_PACKED_FIELDS_40(fields) do { \ + CHECK_PACKED_FIELDS_39(fields); \ + CHECK_PACKED_FIELD(fields, 39); \ +} while (0) + +#define CHECK_PACKED_FIELDS_41(fields) do { \ + CHECK_PACKED_FIELDS_40(fields); \ + CHECK_PACKED_FIELD(fields, 40); \ +} while (0) + +#define CHECK_PACKED_FIELDS_42(fields) do { \ + CHECK_PACKED_FIELDS_41(fields); \ + CHECK_PACKED_FIELD(fields, 41); \ +} while (0) + +#define CHECK_PACKED_FIELDS_43(fields) do { \ + CHECK_PACKED_FIELDS_42(fields); \ + CHECK_PACKED_FIELD(fields, 42); \ +} while (0) + +#define CHECK_PACKED_FIELDS_44(fields) do { \ + CHECK_PACKED_FIELDS_43(fields); \ + CHECK_PACKED_FIELD(fields, 43); \ +} while (0) + +#define CHECK_PACKED_FIELDS_45(fields) do { \ + CHECK_PACKED_FIELDS_44(fields); \ + CHECK_PACKED_FIELD(fields, 44); \ +} while (0) + +#define CHECK_PACKED_FIELDS_46(fields) do { \ + CHECK_PACKED_FIELDS_45(fields); \ + CHECK_PACKED_FIELD(fields, 45); \ +} while (0) + +#define CHECK_PACKED_FIELDS_47(fields) do { \ + CHECK_PACKED_FIELDS_46(fields); \ + CHECK_PACKED_FIELD(fields, 46); \ +} while (0) + +#define CHECK_PACKED_FIELDS_48(fields) do { \ + CHECK_PACKED_FIELDS_47(fields); \ + CHECK_PACKED_FIELD(fields, 47); \ +} while (0) + +#define CHECK_PACKED_FIELDS_49(fields) do { \ + CHECK_PACKED_FIELDS_48(fields); \ + CHECK_PACKED_FIELD(fields, 48); \ +} while (0) + +#define CHECK_PACKED_FIELDS_50(fields) do { \ + CHECK_PACKED_FIELDS_49(fields); \ + CHECK_PACKED_FIELD(fields, 49); \ +} while (0) + +#define CHECK_PACKED_FIELDS(fields) \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 1, ({ CHECK_PACKED_FIELDS_1(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 2, ({ CHECK_PACKED_FIELDS_2(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 3, ({ CHECK_PACKED_FIELDS_3(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 4, ({ CHECK_PACKED_FIELDS_4(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 5, ({ CHECK_PACKED_FIELDS_5(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 6, ({ CHECK_PACKED_FIELDS_6(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 7, ({ CHECK_PACKED_FIELDS_7(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 8, ({ CHECK_PACKED_FIELDS_8(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 9, ({ CHECK_PACKED_FIELDS_9(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 10, ({ CHECK_PACKED_FIELDS_10(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 11, ({ CHECK_PACKED_FIELDS_11(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 12, ({ CHECK_PACKED_FIELDS_12(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 13, ({ CHECK_PACKED_FIELDS_13(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 14, ({ CHECK_PACKED_FIELDS_14(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 15, ({ CHECK_PACKED_FIELDS_15(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 16, ({ CHECK_PACKED_FIELDS_16(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 17, ({ CHECK_PACKED_FIELDS_17(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 18, ({ CHECK_PACKED_FIELDS_18(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 19, ({ CHECK_PACKED_FIELDS_19(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 20, ({ CHECK_PACKED_FIELDS_20(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 21, ({ CHECK_PACKED_FIELDS_21(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 22, ({ CHECK_PACKED_FIELDS_22(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 23, ({ CHECK_PACKED_FIELDS_23(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 24, ({ CHECK_PACKED_FIELDS_24(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 25, ({ CHECK_PACKED_FIELDS_25(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 26, ({ CHECK_PACKED_FIELDS_26(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 27, ({ CHECK_PACKED_FIELDS_27(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 28, ({ CHECK_PACKED_FIELDS_28(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 29, ({ CHECK_PACKED_FIELDS_29(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 30, ({ CHECK_PACKED_FIELDS_30(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 31, ({ CHECK_PACKED_FIELDS_31(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 32, ({ CHECK_PACKED_FIELDS_32(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 33, ({ CHECK_PACKED_FIELDS_33(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 34, ({ CHECK_PACKED_FIELDS_34(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 35, ({ CHECK_PACKED_FIELDS_35(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 36, ({ CHECK_PACKED_FIELDS_36(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 37, ({ CHECK_PACKED_FIELDS_37(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 38, ({ CHECK_PACKED_FIELDS_38(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 39, ({ CHECK_PACKED_FIELDS_39(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 40, ({ CHECK_PACKED_FIELDS_40(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 41, ({ CHECK_PACKED_FIELDS_41(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 42, ({ CHECK_PACKED_FIELDS_42(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 43, ({ CHECK_PACKED_FIELDS_43(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 44, ({ CHECK_PACKED_FIELDS_44(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 45, ({ CHECK_PACKED_FIELDS_45(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 46, ({ CHECK_PACKED_FIELDS_46(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 47, ({ CHECK_PACKED_FIELDS_47(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 48, ({ CHECK_PACKED_FIELDS_48(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 49, ({ CHECK_PACKED_FIELDS_49(fields); }), \ + __builtin_choose_expr(ARRAY_SIZE(fields) == 50, ({ CHECK_PACKED_FIELDS_50(fields); }), \ + ({ BUILD_BUG_ON_MSG(1, "CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than 50."); }) \ +)))))))))))))))))))))))))))))))))))))))))))))))))) + +/* End of generated content */ + +#define pack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : pack_fields_u8, \ + const struct packed_field_u16 * : pack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + +#define unpack_fields(pbuf, pbuflen, ustruct, fields, quirks) \ + ({ \ + CHECK_PACKED_FIELDS(fields); \ + CHECK_PACKED_FIELDS_SIZE((fields), (pbuflen)); \ + _Generic((fields), \ + const struct packed_field_u8 * : unpack_fields_u8, \ + const struct packed_field_u16 * : unpack_fields_u16 \ + )((pbuf), (pbuflen), (ustruct), (fields), ARRAY_SIZE(fields), (quirks)); \ + }) + #endif diff --git a/lib/packing.c b/lib/packing.c index 09a2d195b943..bb1643d9e64d 100644 --- a/lib/packing.c +++ b/lib/packing.c @@ -5,10 +5,37 @@ #include #include #include +#include #include #include #include +#define __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \ + ({ \ + for (size_t i = 0; i < (num_fields); i++) { \ + typeof(&(fields)[0]) field = &(fields)[i]; \ + u64 uval; \ + \ + uval = ustruct_field_to_u64(ustruct, field->offset, field->size); \ + \ + __pack(pbuf, uval, field->startbit, field->endbit, \ + pbuflen, quirks); \ + } \ + }) + +#define __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks) \ + ({ \ + for (size_t i = 0; i < (num_fields); i++) { \ + typeof(&(fields)[0]) field = &fields[i]; \ + u64 uval; \ + \ + __unpack(pbuf, &uval, field->startbit, field->endbit, \ + pbuflen, quirks); \ + \ + u64_to_ustruct_field(ustruct, field->offset, field->size, uval); \ + } \ + }) + /** * calculate_box_addr - Determine physical location of byte in buffer * @box: Index of byte within buffer seen as a logical big-endian big number @@ -322,4 +349,130 @@ int packing(void *pbuf, u64 *uval, int startbit, int endbit, size_t pbuflen, } EXPORT_SYMBOL(packing); +static u64 ustruct_field_to_u64(const void *ustruct, size_t field_offset, + size_t field_size) +{ + switch (field_size) { + case 1: + return *((u8 *)(ustruct + field_offset)); + case 2: + return *((u16 *)(ustruct + field_offset)); + case 4: + return *((u32 *)(ustruct + field_offset)); + default: + return *((u64 *)(ustruct + field_offset)); + } +} + +static void u64_to_ustruct_field(void *ustruct, size_t field_offset, + size_t field_size, u64 uval) +{ + switch (field_size) { + case 1: + *((u8 *)(ustruct + field_offset)) = uval; + break; + case 2: + *((u16 *)(ustruct + field_offset)) = uval; + break; + case 4: + *((u32 *)(ustruct + field_offset)) = uval; + break; + default: + *((u64 *)(ustruct + field_offset)) = uval; + break; + } +} + +/** + * pack_fields_u8 - Pack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u8 definitions. + * @fields: Array of packed_field_u8 field definition. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the pack_fields() macro instead of calling this directly. + */ +void pack_fields_u8(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks) +{ + __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(pack_fields_u8); + +/** + * pack_fields_u16 - Pack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u16 definitions. + * @fields: Array of packed_field_u16 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the pack_fields() macro instead of calling this directly. + */ +void pack_fields_u16(void *pbuf, size_t pbuflen, const void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks) +{ + __pack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(pack_fields_u16); + +/** + * unpack_fields_u8 - Unpack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u8 definitions. + * @fields: Array of packed_field_u8 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the unpack_fields() macro instead of calling this directly. + */ +void unpack_fields_u8(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u8 *fields, size_t num_fields, + u8 quirks) +{ + __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(unpack_fields_u8); + +/** + * unpack_fields_u16 - Unpack array of fields + * + * @pbuf: Pointer to a buffer holding the packed value. + * @pbuflen: The length in bytes of the packed buffer pointed to by @pbuf. + * @ustruct: Pointer to CPU-readable structure holding the unpacked value. + * It is expected (but not checked) that this has the same data type + * as all struct packed_field_u16 definitions. + * @fields: Array of packed_field_u16 field definitions. They must not overlap. + * @num_fields: Length of @fields array. + * @quirks: A bit mask of QUIRK_LITTLE_ENDIAN, QUIRK_LSW32_IS_FIRST and + * QUIRK_MSB_ON_THE_RIGHT. + * + * Use the unpack_fields() macro instead of calling this directly. + */ +void unpack_fields_u16(const void *pbuf, size_t pbuflen, void *ustruct, + const struct packed_field_u16 *fields, size_t num_fields, + u8 quirks) +{ + __unpack_fields(pbuf, pbuflen, ustruct, fields, num_fields, quirks); +} +EXPORT_SYMBOL(unpack_fields_u16); + MODULE_DESCRIPTION("Generic bitfield packing and unpacking"); diff --git a/lib/packing_test.c b/lib/packing_test.c index b38ea43c03fd..ce3b83d33b04 100644 --- a/lib/packing_test.c +++ b/lib/packing_test.c @@ -396,9 +396,70 @@ static void packing_test_unpack(struct kunit *test) KUNIT_EXPECT_EQ(test, uval, params->uval); } +#define PACKED_BUF_SIZE 8 + +typedef struct __packed { u8 buf[PACKED_BUF_SIZE]; } packed_buf_t; + +struct test_data { + u32 field3; + u16 field2; + u16 field4; + u16 field6; + u8 field1; + u8 field5; +}; + +static const struct packed_field_u8 test_fields[] = { + PACKED_FIELD(63, 61, struct test_data, field1), + PACKED_FIELD(60, 52, struct test_data, field2), + PACKED_FIELD(51, 28, struct test_data, field3), + PACKED_FIELD(27, 14, struct test_data, field4), + PACKED_FIELD(13, 9, struct test_data, field5), + PACKED_FIELD(8, 0, struct test_data, field6), +}; + +static void packing_test_pack_fields(struct kunit *test) +{ + const struct test_data data = { + .field1 = 0x2, + .field2 = 0x100, + .field3 = 0xF00050, + .field4 = 0x7D3, + .field5 = 0x9, + .field6 = 0x10B, + }; + packed_buf_t expect = { + .buf = { 0x50, 0x0F, 0x00, 0x05, 0x01, 0xF4, 0xD3, 0x0B }, + }; + packed_buf_t buf = {}; + + pack_fields(&buf, sizeof(buf), &data, test_fields, 0); + + KUNIT_EXPECT_MEMEQ(test, &expect, &buf, sizeof(buf)); +} + +static void packing_test_unpack_fields(struct kunit *test) +{ + const packed_buf_t buf = { + .buf = { 0x17, 0x28, 0x10, 0x19, 0x3D, 0xA9, 0x07, 0x9C }, + }; + struct test_data data = {}; + + unpack_fields(&buf, sizeof(buf), &data, test_fields, 0); + + KUNIT_EXPECT_EQ(test, 0, data.field1); + KUNIT_EXPECT_EQ(test, 0x172, data.field2); + KUNIT_EXPECT_EQ(test, 0x810193, data.field3); + KUNIT_EXPECT_EQ(test, 0x36A4, data.field4); + KUNIT_EXPECT_EQ(test, 0x3, data.field5); + KUNIT_EXPECT_EQ(test, 0x19C, data.field6); +} + static struct kunit_case packing_test_cases[] = { KUNIT_CASE_PARAM(packing_test_pack, packing_gen_params), KUNIT_CASE_PARAM(packing_test_unpack, packing_gen_params), + KUNIT_CASE(packing_test_pack_fields), + KUNIT_CASE(packing_test_unpack_fields), {}, }; diff --git a/scripts/.gitignore b/scripts/.gitignore index 3dbb8bb2457b..c2ef68848da5 100644 --- a/scripts/.gitignore +++ b/scripts/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only /asn1_compiler +/gen_packed_field_checks /generate_rust_target /insert-sys-cert /kallsyms diff --git a/scripts/Makefile b/scripts/Makefile index 6bcda4b9d054..546e8175e1c4 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -47,7 +47,7 @@ HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif # The following programs are only built on demand -hostprogs += unifdef +hostprogs += unifdef gen_packed_field_checks # The module linker script is preprocessed on demand targets += module.lds diff --git a/scripts/gen_packed_field_checks.c b/scripts/gen_packed_field_checks.c new file mode 100644 index 000000000000..60042b7616ee --- /dev/null +++ b/scripts/gen_packed_field_checks.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2024, Intel Corporation +#include +#include + +#define MAX_PACKED_FIELD_SIZE 50 + +int main(int argc, char **argv) +{ + /* The first macro doesn't need a 'do {} while(0)' loop */ + printf("#define CHECK_PACKED_FIELDS_1(fields) \\\n"); + printf("\tCHECK_PACKED_FIELD(fields, 0)\n\n"); + + /* Remaining macros require a do/while loop, and are implemented + * recursively by calling the previous iteration's macro. + */ + for (int i = 2; i <= MAX_PACKED_FIELD_SIZE; i++) { + printf("#define CHECK_PACKED_FIELDS_%d(fields) do { \\\n", i); + printf("\tCHECK_PACKED_FIELDS_%d(fields); \\\n", i - 1); + printf("\tCHECK_PACKED_FIELD(fields, %d); \\\n", i - 1); + printf("} while (0)\n\n"); + } + + printf("#define CHECK_PACKED_FIELDS(fields) \\\n"); + + for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++) + printf("\t__builtin_choose_expr(ARRAY_SIZE(fields) == %d, ({ CHECK_PACKED_FIELDS_%d(fields); }), \\\n", + i, i); + + printf("\t({ BUILD_BUG_ON_MSG(1, \"CHECK_PACKED_FIELDS() must be regenerated to support array sizes larger than %d.\"); }) \\\n", + MAX_PACKED_FIELD_SIZE); + + for (int i = 1; i <= MAX_PACKED_FIELD_SIZE; i++) + printf(")"); + + printf("\n"); +} -- cgit v1.2.3 From 19ce8cd3046587efbd2c6253947be7c22dfccc18 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 9 Dec 2024 20:38:03 +0100 Subject: tcp: Measure TIME-WAIT reuse delay with millisecond precision Prepare ground for TIME-WAIT socket reuse with subsecond delay. Today the last TS.Recent update timestamp, recorded in seconds and stored tp->ts_recent_stamp and tw->tw_ts_recent_stamp fields, has two purposes. Firstly, it is used to track the age of the last recorded TS.Recent value to detect when that value becomes outdated due to potential wrap-around of the other TCP timestamp clock (RFC 7323, section 5.5). For this purpose a second-based timestamp is completely sufficient as even in the worst case scenario of a peer using a high resolution microsecond timestamp, the wrap-around interval is ~36 minutes long. Secondly, it serves as a threshold value for allowing TIME-WAIT socket reuse. A TIME-WAIT socket can be reused only once the virtual 1 Hz clock, ktime_get_seconds, is past the TS.Recent update timestamp. The purpose behind delaying the TIME-WAIT socket reuse is to wait for the other TCP timestamp clock to tick at least once before reusing the connection. It is only then that the PAWS mechanism for the reopened connection can detect old duplicate segments from the previous connection incarnation (RFC 7323, appendix B.2). In this case using a timestamp with second resolution not only blocks the way toward allowing faster TIME-WAIT reuse after shorter subsecond delay, but also makes it impossible to reliably delay TW reuse by one second. As Eric Dumazet has pointed out [1], due to timestamp rounding, the TW reuse delay will actually be between (0, 1] seconds, and 0.5 seconds on average. We delay TW reuse for one full second only when last TS.Recent update coincides with our virtual 1 Hz clock tick. Considering the above, introduce a dedicated field to store a millisecond timestamp of transition into the TIME-WAIT state. Place it in an existing 4-byte hole inside inet_timewait_sock structure to avoid an additional memory cost. Use the new timestamp to (i) reliably delay TIME-WAIT reuse by one second, and (ii) prepare for configurable subsecond reuse delay in the subsequent change. We assume here that a full one second delay was the original intention in [2] because it accounts for the worst case scenario of the other TCP using the slowest recommended 1 Hz timestamp clock. A more involved alternative would be to change the resolution of the last TS.Recent update timestamp, tw->tw_ts_recent_stamp, to milliseconds. [1] https://lore.kernel.org/netdev/CANn89iKB4GFd8sVzCbRttqw_96o3i2wDhX-3DraQtsceNGYwug@mail.gmail.com/ [2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=b8439924316d5bcb266d165b93d632a4b4b859af Signed-off-by: Jakub Sitnicki Reviewed-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://patch.msgid.link/20241209-jakub-krn-909-poc-msec-tw-tstamp-v2-1-66aca0eed03e@cloudflare.com Signed-off-by: Jakub Kicinski --- include/net/inet_timewait_sock.h | 4 ++++ net/ipv4/tcp_ipv4.c | 5 +++-- net/ipv4/tcp_minisocks.c | 7 ++++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 62c0a7e65d6b..67a313575780 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -74,6 +74,10 @@ struct inet_timewait_sock { tw_tos : 8; u32 tw_txhash; u32 tw_priority; + /** + * @tw_reuse_stamp: Time of entry into %TCP_TIME_WAIT state in msec. + */ + u32 tw_entry_stamp; struct timer_list tw_timer; struct inet_bind_bucket *tw_tb; struct inet_bind2_bucket *tw_tb2; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a38c8b1f44db..3b6ba1d16921 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -120,6 +120,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw); struct tcp_sock *tp = tcp_sk(sk); int ts_recent_stamp; + u32 reuse_thresh; if (READ_ONCE(tw->tw_substate) == TCP_FIN_WAIT2) reuse = 0; @@ -162,9 +163,9 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); + reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + MSEC_PER_SEC; if (ts_recent_stamp && - (!twp || (reuse && time_after32(ktime_get_seconds(), - ts_recent_stamp)))) { + (!twp || (reuse && time_after32(tcp_clock_ms(), reuse_thresh)))) { /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk * and releasing the bucket lock. */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7121d8573928..b089b08e9617 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -157,8 +157,11 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, rcv_nxt); if (tmp_opt.saw_tstamp) { + u64 ts = tcp_clock_ms(); + + WRITE_ONCE(tw->tw_entry_stamp, ts); WRITE_ONCE(tcptw->tw_ts_recent_stamp, - ktime_get_seconds()); + div_u64(ts, MSEC_PER_SEC)); WRITE_ONCE(tcptw->tw_ts_recent, tmp_opt.rcv_tsval); } @@ -316,6 +319,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) tw->tw_mark = sk->sk_mark; tw->tw_priority = READ_ONCE(sk->sk_priority); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; + /* refreshed when we enter true TIME-WAIT state */ + tw->tw_entry_stamp = tcp_time_stamp_ms(tp); tcptw->tw_rcv_nxt = tp->rcv_nxt; tcptw->tw_snd_nxt = tp->snd_nxt; tcptw->tw_rcv_wnd = tcp_receive_window(tp); -- cgit v1.2.3 From ca6a6f93867a9763bdf8685c788e2e558d10975f Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Mon, 9 Dec 2024 20:38:04 +0100 Subject: tcp: Add sysctl to configure TIME-WAIT reuse delay Today we have a hardcoded delay of 1 sec before a TIME-WAIT socket can be reused by reopening a connection. This is a safe choice based on an assumption that the other TCP timestamp clock frequency, which is unknown to us, may be as low as 1 Hz (RFC 7323, section 5.4). However, this means that in the presence of short lived connections with an RTT of couple of milliseconds, the time during which a 4-tuple is blocked from reuse can be orders of magnitude longer that the connection lifetime. Combined with a reduced pool of ephemeral ports, when using IP_LOCAL_PORT_RANGE to share an egress IP address between hosts [1], the long TIME-WAIT reuse delay can lead to port exhaustion, where all available 4-tuples are tied up in TIME-WAIT state. Turn the reuse delay into a per-netns setting so that sysadmins can make more aggressive assumptions about remote TCP timestamp clock frequency and shorten the delay in order to allow connections to reincarnate faster. Note that applications can completely bypass the TIME-WAIT delay protection already today by locking the local port with bind() before connecting. Such immediate connection reuse may result in PAWS failing to detect old duplicate segments, leaving us with just the sequence number check as a safety net. This new configurable offers a trade off where the sysadmin can balance between the risk of PAWS detection failing to act versus exhausting ports by having sockets tied up in TIME-WAIT state for too long. [1] https://lpc.events/event/16/contributions/1349/ Signed-off-by: Jakub Sitnicki Reviewed-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://patch.msgid.link/20241209-jakub-krn-909-poc-msec-tw-tstamp-v2-2-66aca0eed03e@cloudflare.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ip-sysctl.rst | 14 ++++++++++++++ .../networking/net_cachelines/netns_ipv4_sysctl.rst | 1 + include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 10 ++++++++++ net/ipv4/tcp_ipv4.c | 4 +++- 5 files changed, 29 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index eacf8983e230..2f2b00295836 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1000,6 +1000,20 @@ tcp_tw_reuse - INTEGER Default: 2 +tcp_tw_reuse_delay - UNSIGNED INTEGER + The delay in milliseconds before a TIME-WAIT socket can be reused by a + new connection, if TIME-WAIT socket reuse is enabled. The actual reuse + threshold is within [N, N+1] range, where N is the requested delay in + milliseconds, to ensure the delay interval is never shorter than the + configured value. + + This setting contains an assumption about the other TCP timestamp clock + tick interval. It should not be set to a value lower than the peer's + clock tick for PAWS (Protection Against Wrapped Sequence numbers) + mechanism work correctly for the reused connection. + + Default: 1000 (milliseconds) + tcp_window_scaling - BOOLEAN Enable window scaling as defined in RFC1323. diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index 629da6dc6d74..de0263302f16 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -79,6 +79,7 @@ u8 sysctl_tcp_retries1 u8 sysctl_tcp_retries2 u8 sysctl_tcp_orphan_retries u8 sysctl_tcp_tw_reuse timewait_sock_ops +unsigned_int sysctl_tcp_tw_reuse_delay timewait_sock_ops int sysctl_tcp_fin_timeout TCP_LAST_ACK/tcp_rcv_state_process unsigned_int sysctl_tcp_notsent_lowat read_mostly tcp_notsent_lowat/tcp_stream_memory_free u8 sysctl_tcp_sack tcp_syn_options diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 3c014170e001..46452da35206 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -175,6 +175,7 @@ struct netns_ipv4 { u8 sysctl_tcp_retries2; u8 sysctl_tcp_orphan_retries; u8 sysctl_tcp_tw_reuse; + unsigned int sysctl_tcp_tw_reuse_delay; int sysctl_tcp_fin_timeout; u8 sysctl_tcp_sack; u8 sysctl_tcp_window_scaling; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a79b2a52ce01..42cb5dc9cb24 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -45,6 +45,7 @@ static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; static unsigned int udp_child_hash_entries_max = UDP_HTABLE_SIZE_MAX; static int tcp_plb_max_rounds = 31; static int tcp_plb_max_cong_thresh = 256; +static unsigned int tcp_tw_reuse_delay_max = TCP_PAWS_MSL * MSEC_PER_SEC; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1065,6 +1066,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "tcp_tw_reuse_delay", + .data = &init_net.ipv4.sysctl_tcp_tw_reuse_delay, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &tcp_tw_reuse_delay_max, + }, { .procname = "tcp_max_syn_backlog", .data = &init_net.ipv4.sysctl_max_syn_backlog, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3b6ba1d16921..e45222d5fc2e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -163,7 +163,8 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) and use initial timestamp retrieved from peer table. */ ts_recent_stamp = READ_ONCE(tcptw->tw_ts_recent_stamp); - reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + MSEC_PER_SEC; + reuse_thresh = READ_ONCE(tw->tw_entry_stamp) + + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse_delay); if (ts_recent_stamp && (!twp || (reuse && time_after32(tcp_clock_ms(), reuse_thresh)))) { /* inet_twsk_hashdance_schedule() sets sk_refcnt after putting twsk @@ -3458,6 +3459,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; net->ipv4.sysctl_tcp_tw_reuse = 2; + net->ipv4.sysctl_tcp_tw_reuse_delay = 1 * MSEC_PER_SEC; net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); -- cgit v1.2.3 From 9723a77318b7c0cfd06ea207e52a042f8c815318 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:16 +0000 Subject: net: dsa: add hook to determine whether EEE is supported Add a hook to determine whether the switch supports EEE. This will return false if the switch does not, or true if it does. If the method is not implemented, we assume (currently) that the switch supports EEE. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL144-006cZD-El@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 1 + net/dsa/user.c | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index 72ae65e7246a..aaa75bbaa0ea 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -988,6 +988,7 @@ struct dsa_switch_ops { /* * Port's MAC EEE settings */ + bool (*support_eee)(struct dsa_switch *ds, int port); int (*set_mac_eee)(struct dsa_switch *ds, int port, struct ethtool_keee *e); int (*get_mac_eee)(struct dsa_switch *ds, int port, diff --git a/net/dsa/user.c b/net/dsa/user.c index e1a0b153c353..a74339680010 100644 --- a/net/dsa/user.c +++ b/net/dsa/user.c @@ -1229,6 +1229,10 @@ static int dsa_user_set_eee(struct net_device *dev, struct ethtool_keee *e) struct dsa_switch *ds = dp->ds; int ret; + /* Check whether the switch supports EEE */ + if (ds->ops->support_eee && !ds->ops->support_eee(ds, dp->index)) + return -EOPNOTSUPP; + /* Port's PHY and MAC both need to be EEE capable */ if (!dev->phydev) return -ENODEV; @@ -1249,6 +1253,10 @@ static int dsa_user_get_eee(struct net_device *dev, struct ethtool_keee *e) struct dsa_switch *ds = dp->ds; int ret; + /* Check whether the switch supports EEE */ + if (ds->ops->support_eee && !ds->ops->support_eee(ds, dp->index)) + return -EOPNOTSUPP; + /* Port's PHY and MAC both need to be EEE capable */ if (!dev->phydev) return -ENODEV; -- cgit v1.2.3 From 99379f587278c818777cb4778e2c79c6c1440c65 Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 10 Dec 2024 14:18:21 +0000 Subject: net: dsa: provide implementation of .support_eee() Provide a trivial implementation for the .support_eee() method which switch drivers can use to simply indicate that they support EEE on all their user ports. Signed-off-by: Russell King (Oracle) Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Link: https://patch.msgid.link/E1tL149-006cZJ-JJ@rmk-PC.armlinux.org.uk Signed-off-by: Jakub Kicinski --- include/net/dsa.h | 1 + net/dsa/port.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include') diff --git a/include/net/dsa.h b/include/net/dsa.h index aaa75bbaa0ea..4aeedb296d67 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -1384,5 +1384,6 @@ static inline bool dsa_user_dev_check(const struct net_device *dev) netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); +bool dsa_supports_eee(struct dsa_switch *ds, int port); #endif diff --git a/net/dsa/port.c b/net/dsa/port.c index ee0aaec4c8e0..5c9d1798e830 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -1575,6 +1575,22 @@ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, cpu_dp->tag_ops = tag_ops; } +/* dsa_supports_eee - indicate that EEE is supported + * @ds: pointer to &struct dsa_switch + * @port: port index + * + * A default implementation for the .support_eee() DSA operations member, + * which drivers can use to indicate that they support EEE on all of their + * user ports. + * + * Returns: true + */ +bool dsa_supports_eee(struct dsa_switch *ds, int port) +{ + return true; +} +EXPORT_SYMBOL_GPL(dsa_supports_eee); + static void dsa_port_phylink_mac_config(struct phylink_config *config, unsigned int mode, const struct phylink_link_state *state) -- cgit v1.2.3 From 082e8f6db9092d19ae84549874daaef240c2207b Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Mon, 11 Nov 2024 11:03:45 +0100 Subject: turris-omnia-mcu-interface.h: Move command execution function to global header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the command execution functions from the turris-omnia-mcu platform driver private header to the global turris-omnia-mcu-interface.h header, so that they can be used by the LED driver. Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20241111100355.6978-2-kabel@kernel.org Signed-off-by: Lee Jones --- drivers/platform/cznic/turris-omnia-mcu-base.c | 1 + drivers/platform/cznic/turris-omnia-mcu.h | 130 ----------------------- include/linux/turris-omnia-mcu-interface.h | 136 ++++++++++++++++++++++++- 3 files changed, 136 insertions(+), 131 deletions(-) (limited to 'include') diff --git a/drivers/platform/cznic/turris-omnia-mcu-base.c b/drivers/platform/cznic/turris-omnia-mcu-base.c index 58f9afae2867..bb871226e357 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-base.c +++ b/drivers/platform/cznic/turris-omnia-mcu-base.c @@ -52,6 +52,7 @@ int omnia_cmd_write_read(const struct i2c_client *client, return 0; } +EXPORT_SYMBOL_GPL(omnia_cmd_write_read); static int omnia_get_version_hash(struct omnia_mcu *mcu, bool bootloader, char version[static OMNIA_FW_VERSION_HEX_LEN]) diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 2b13e28ee323..088541be3f4c 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -8,7 +8,6 @@ #ifndef __TURRIS_OMNIA_MCU_H #define __TURRIS_OMNIA_MCU_H -#include #include #include #include @@ -17,8 +16,6 @@ #include #include #include -#include -#include struct i2c_client; struct rtc_device; @@ -93,133 +90,6 @@ struct omnia_mcu { #endif }; -int omnia_cmd_write_read(const struct i2c_client *client, - void *cmd, unsigned int cmd_len, - void *reply, unsigned int reply_len); - -static inline int omnia_cmd_write(const struct i2c_client *client, void *cmd, - unsigned int len) -{ - return omnia_cmd_write_read(client, cmd, len, NULL, 0); -} - -static inline int omnia_cmd_write_u8(const struct i2c_client *client, u8 cmd, - u8 val) -{ - u8 buf[2] = { cmd, val }; - - return omnia_cmd_write(client, buf, sizeof(buf)); -} - -static inline int omnia_cmd_write_u16(const struct i2c_client *client, u8 cmd, - u16 val) -{ - u8 buf[3]; - - buf[0] = cmd; - put_unaligned_le16(val, &buf[1]); - - return omnia_cmd_write(client, buf, sizeof(buf)); -} - -static inline int omnia_cmd_write_u32(const struct i2c_client *client, u8 cmd, - u32 val) -{ - u8 buf[5]; - - buf[0] = cmd; - put_unaligned_le32(val, &buf[1]); - - return omnia_cmd_write(client, buf, sizeof(buf)); -} - -static inline int omnia_cmd_read(const struct i2c_client *client, u8 cmd, - void *reply, unsigned int len) -{ - return omnia_cmd_write_read(client, &cmd, 1, reply, len); -} - -static inline unsigned int -omnia_compute_reply_length(unsigned long mask, bool interleaved, - unsigned int offset) -{ - if (!mask) - return 0; - - return ((__fls(mask) >> 3) << interleaved) + 1 + offset; -} - -/* Returns 0 on success */ -static inline int omnia_cmd_read_bits(const struct i2c_client *client, u8 cmd, - unsigned long bits, unsigned long *dst) -{ - __le32 reply; - int err; - - if (!bits) { - *dst = 0; - return 0; - } - - err = omnia_cmd_read(client, cmd, &reply, - omnia_compute_reply_length(bits, false, 0)); - if (err) - return err; - - *dst = le32_to_cpu(reply) & bits; - - return 0; -} - -static inline int omnia_cmd_read_bit(const struct i2c_client *client, u8 cmd, - unsigned long bit) -{ - unsigned long reply; - int err; - - err = omnia_cmd_read_bits(client, cmd, bit, &reply); - if (err) - return err; - - return !!reply; -} - -static inline int omnia_cmd_read_u32(const struct i2c_client *client, u8 cmd, - u32 *dst) -{ - __le32 reply; - int err; - - err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); - if (err) - return err; - - *dst = le32_to_cpu(reply); - - return 0; -} - -static inline int omnia_cmd_read_u16(const struct i2c_client *client, u8 cmd, - u16 *dst) -{ - __le16 reply; - int err; - - err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); - if (err) - return err; - - *dst = le16_to_cpu(reply); - - return 0; -} - -static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, - u8 *reply) -{ - return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); -} - #ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO extern const u8 omnia_int_to_gpio_idx[32]; extern const struct attribute_group omnia_mcu_gpio_group; diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h index 2da8cbeb158a..7f24cc682780 100644 --- a/include/linux/turris-omnia-mcu-interface.h +++ b/include/linux/turris-omnia-mcu-interface.h @@ -9,7 +9,10 @@ #define __TURRIS_OMNIA_MCU_INTERFACE_H #include -#include +#include +#include +#include +#include enum omnia_commands_e { OMNIA_CMD_GET_STATUS_WORD = 0x01, /* slave sends status word back */ @@ -246,4 +249,135 @@ enum omnia_cmd_usb_ovc_prot_e { OMNIA_CMD_xET_USB_OVC_PROT_ENABLE = BIT(4), }; +/* Command execution functions */ + +struct i2c_client; + +int omnia_cmd_write_read(const struct i2c_client *client, + void *cmd, unsigned int cmd_len, + void *reply, unsigned int reply_len); + +static inline int omnia_cmd_write(const struct i2c_client *client, void *cmd, + unsigned int len) +{ + return omnia_cmd_write_read(client, cmd, len, NULL, 0); +} + +static inline int omnia_cmd_write_u8(const struct i2c_client *client, u8 cmd, + u8 val) +{ + u8 buf[2] = { cmd, val }; + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_write_u16(const struct i2c_client *client, u8 cmd, + u16 val) +{ + u8 buf[3]; + + buf[0] = cmd; + put_unaligned_le16(val, &buf[1]); + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_write_u32(const struct i2c_client *client, u8 cmd, + u32 val) +{ + u8 buf[5]; + + buf[0] = cmd; + put_unaligned_le32(val, &buf[1]); + + return omnia_cmd_write(client, buf, sizeof(buf)); +} + +static inline int omnia_cmd_read(const struct i2c_client *client, u8 cmd, + void *reply, unsigned int len) +{ + return omnia_cmd_write_read(client, &cmd, 1, reply, len); +} + +static inline unsigned int +omnia_compute_reply_length(unsigned long mask, bool interleaved, + unsigned int offset) +{ + if (!mask) + return 0; + + return ((__fls(mask) >> 3) << interleaved) + 1 + offset; +} + +/* Returns 0 on success */ +static inline int omnia_cmd_read_bits(const struct i2c_client *client, u8 cmd, + unsigned long bits, unsigned long *dst) +{ + __le32 reply; + int err; + + if (!bits) { + *dst = 0; + return 0; + } + + err = omnia_cmd_read(client, cmd, &reply, + omnia_compute_reply_length(bits, false, 0)); + if (err) + return err; + + *dst = le32_to_cpu(reply) & bits; + + return 0; +} + +static inline int omnia_cmd_read_bit(const struct i2c_client *client, u8 cmd, + unsigned long bit) +{ + unsigned long reply; + int err; + + err = omnia_cmd_read_bits(client, cmd, bit, &reply); + if (err) + return err; + + return !!reply; +} + +static inline int omnia_cmd_read_u32(const struct i2c_client *client, u8 cmd, + u32 *dst) +{ + __le32 reply; + int err; + + err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); + if (err) + return err; + + *dst = le32_to_cpu(reply); + + return 0; +} + +static inline int omnia_cmd_read_u16(const struct i2c_client *client, u8 cmd, + u16 *dst) +{ + __le16 reply; + int err; + + err = omnia_cmd_read(client, cmd, &reply, sizeof(reply)); + if (err) + return err; + + *dst = le16_to_cpu(reply); + + return 0; +} + +static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, + u8 *reply) +{ + return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); +} + #endif /* __TURRIS_OMNIA_MCU_INTERFACE_H */ -- cgit v1.2.3 From d665d7f2800fff5da9311e4c8c236966ba57d440 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Mon, 11 Nov 2024 11:03:47 +0100 Subject: turris-omnia-mcu-interface.h: Add LED commands related definitions to global header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add definitions for contents of the OMNIA_CMD_LED_MODE and OMNIA_CMD_LED_STATE commands to the global turris-omnia-mcu-interface.h header. Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20241111100355.6978-4-kabel@kernel.org Signed-off-by: Lee Jones --- include/linux/turris-omnia-mcu-interface.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h index 7f24cc682780..06c94e032c6f 100644 --- a/include/linux/turris-omnia-mcu-interface.h +++ b/include/linux/turris-omnia-mcu-interface.h @@ -239,6 +239,18 @@ enum omnia_int_e { OMNIA_INT_LAN5_LED1 = BIT(31), }; +enum omnia_cmd_led_mode_e { + OMNIA_CMD_LED_MODE_LED_MASK = GENMASK(3, 0), +#define OMNIA_CMD_LED_MODE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_MODE_LED_MASK, _l) + OMNIA_CMD_LED_MODE_USER = BIT(4), +}; + +enum omnia_cmd_led_state_e { + OMNIA_CMD_LED_STATE_LED_MASK = GENMASK(3, 0), +#define OMNIA_CMD_LED_STATE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_STATE_LED_MASK, _l) + OMNIA_CMD_LED_STATE_ON = BIT(4), +}; + enum omnia_cmd_poweroff_e { OMNIA_CMD_POWER_OFF_POWERON_BUTTON = BIT(0), OMNIA_CMD_POWER_OFF_MAGIC = 0xdead, -- cgit v1.2.3 From 76850b54943ffd5037c97cc27794449ce05c31e9 Mon Sep 17 00:00:00 2001 From: Rick Wertenbroek Date: Thu, 12 Dec 2024 17:25:47 +0100 Subject: PCI: endpoint: Replace magic number '6' by PCI_STD_NUM_BARS Replace the constant "6" by PCI_STD_NUM_BARS, as defined in include/uapi/linux/pci_regs.h: #define PCI_STD_NUM_BARS 6 /* Number of standard BARs */ Link: https://lore.kernel.org/r/20241212162547.225880-1-rick.wertenbroek@gmail.com Signed-off-by: Rick Wertenbroek Signed-off-by: Bjorn Helgaas --- include/linux/pci-epf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 18a3aeb62ae4..ee6156bcbbd0 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -157,7 +157,7 @@ struct pci_epf { struct device dev; const char *name; struct pci_epf_header *header; - struct pci_epf_bar bar[6]; + struct pci_epf_bar bar[PCI_STD_NUM_BARS]; u8 msi_interrupts; u16 msix_interrupts; u8 func_no; @@ -174,7 +174,7 @@ struct pci_epf { /* Below members are to attach secondary EPC to an endpoint function */ struct pci_epc *sec_epc; struct list_head sec_epc_list; - struct pci_epf_bar sec_epc_bar[6]; + struct pci_epf_bar sec_epc_bar[PCI_STD_NUM_BARS]; u8 sec_epc_func_no; struct config_group *group; unsigned int is_bound; -- cgit v1.2.3 From d24bf99214b199c25f9c2cb04b3a4993d1c7ab60 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 11 Dec 2024 18:44:49 +0100 Subject: power: supply: core: Add new "charge_types" property MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new "charge_types" property, this is identical to "charge_type" but reading returns a list of supported charge-types with the currently active type surrounded by square brackets, e.g.: Fast [Standard] "Long_Life" This has the advantage over the existing "charge_type" property that this allows userspace to find out which charge-types are supported for writable charge_type properties. Drivers which already support "charge_type" can easily add support for this by setting power_supply_desc.charge_types to a bitmask representing valid charge_type values. The existing "charge_type" get_property() and set_property() code paths can be re-used for "charge_types". Signed-off-by: Hans de Goede Reviewed-by: Thomas Weißschuh Link: https://lore.kernel.org/r/20241211174451.355421-3-hdegoede@redhat.com Signed-off-by: Sebastian Reichel --- Documentation/ABI/testing/sysfs-class-power | 20 ++++++++++++++++ drivers/power/supply/power_supply_sysfs.c | 36 +++++++++++++++++++++++++++++ include/linux/power_supply.h | 23 +++++++++++++++++- 3 files changed, 78 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index 45180b62d426..74050dfb5fc0 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -407,10 +407,30 @@ Description: Access: Read, Write + Reading this returns the current active value, e.g. 'Standard'. + Check charge_types to get the values supported by the battery. + Valid values: "Unknown", "N/A", "Trickle", "Fast", "Standard", "Adaptive", "Custom", "Long Life", "Bypass" +What: /sys/class/power_supply//charge_types +Date: December 2024 +Contact: linux-pm@vger.kernel.org +Description: + Identical to charge_type but reading returns a list of supported + charge-types with the currently active type surrounded by square + brackets, e.g.: "Fast [Standard] Long_Life". + + power_supply class devices may support both charge_type and + charge_types for backward compatibility. In this case both will + always have the same active value and the active value can be + changed by writing either property. + + Note charge-types which contain a space such as "Long Life" will + have the space replaced by a '_' resulting in e.g. "Long_Life". + When writing charge-types both variants are accepted. + What: /sys/class/power_supply//charge_term_current Date: July 2014 Contact: linux-pm@vger.kernel.org diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 9b604a1bcd17..e18f1ee53f21 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -182,6 +182,8 @@ static struct power_supply_attr power_supply_attrs[] __ro_after_init = { POWER_SUPPLY_ATTR(CHARGE_CONTROL_START_THRESHOLD), POWER_SUPPLY_ATTR(CHARGE_CONTROL_END_THRESHOLD), POWER_SUPPLY_ENUM_ATTR(CHARGE_BEHAVIOUR), + /* Same enum value texts as "charge_type" without the 's' at the end */ + _POWER_SUPPLY_ENUM_ATTR(CHARGE_TYPES, POWER_SUPPLY_CHARGE_TYPE_TEXT), POWER_SUPPLY_ATTR(INPUT_CURRENT_LIMIT), POWER_SUPPLY_ATTR(INPUT_VOLTAGE_LIMIT), POWER_SUPPLY_ATTR(INPUT_POWER_LIMIT), @@ -339,6 +341,12 @@ static ssize_t power_supply_format_property(struct device *dev, ret = power_supply_charge_behaviour_show(dev, psy->desc->charge_behaviours, value.intval, buf); break; + case POWER_SUPPLY_PROP_CHARGE_TYPES: + if (uevent) /* no possible values in uevents */ + goto default_format; + ret = power_supply_charge_types_show(dev, psy->desc->charge_types, + value.intval, buf); + break; case POWER_SUPPLY_PROP_MODEL_NAME ... POWER_SUPPLY_PROP_SERIAL_NUMBER: ret = sysfs_emit(buf, "%s\n", value.strval); break; @@ -556,3 +564,31 @@ int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const return -EINVAL; } EXPORT_SYMBOL_GPL(power_supply_charge_behaviour_parse); + +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf) +{ + return power_supply_show_enum_with_available( + dev, POWER_SUPPLY_CHARGE_TYPE_TEXT, + ARRAY_SIZE(POWER_SUPPLY_CHARGE_TYPE_TEXT), + available_types, current_type, buf); +} +EXPORT_SYMBOL_GPL(power_supply_charge_types_show); + +int power_supply_charge_types_parse(unsigned int available_types, const char *buf) +{ + int i = power_supply_match_string(POWER_SUPPLY_CHARGE_TYPE_TEXT, + ARRAY_SIZE(POWER_SUPPLY_CHARGE_TYPE_TEXT), + buf); + + if (i < 0) + return i; + + if (available_types & BIT(i)) + return i; + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(power_supply_charge_types_parse); diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 17fc383785bf..0d96657d1a2b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -40,7 +40,7 @@ enum { }; /* What algorithm is the charger using? */ -enum { +enum power_supply_charge_type { POWER_SUPPLY_CHARGE_TYPE_UNKNOWN = 0, POWER_SUPPLY_CHARGE_TYPE_NONE, POWER_SUPPLY_CHARGE_TYPE_TRICKLE, /* slow speed */ @@ -99,6 +99,7 @@ enum power_supply_property { /* Properties of type `int' */ POWER_SUPPLY_PROP_STATUS = 0, POWER_SUPPLY_PROP_CHARGE_TYPE, + POWER_SUPPLY_PROP_CHARGE_TYPES, POWER_SUPPLY_PROP_HEALTH, POWER_SUPPLY_PROP_PRESENT, POWER_SUPPLY_PROP_ONLINE, @@ -245,6 +246,7 @@ struct power_supply_desc { const char *name; enum power_supply_type type; u8 charge_behaviours; + u32 charge_types; u32 usb_types; const enum power_supply_property *properties; size_t num_properties; @@ -946,6 +948,11 @@ ssize_t power_supply_charge_behaviour_show(struct device *dev, char *buf); int power_supply_charge_behaviour_parse(unsigned int available_behaviours, const char *buf); +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf); +int power_supply_charge_types_parse(unsigned int available_types, const char *buf); #else static inline ssize_t power_supply_charge_behaviour_show(struct device *dev, @@ -961,6 +968,20 @@ static inline int power_supply_charge_behaviour_parse(unsigned int available_beh { return -EOPNOTSUPP; } + +static inline +ssize_t power_supply_charge_types_show(struct device *dev, + unsigned int available_types, + enum power_supply_charge_type current_type, + char *buf) +{ + return -EOPNOTSUPP; +} + +static inline int power_supply_charge_types_parse(unsigned int available_types, const char *buf) +{ + return -EOPNOTSUPP; +} #endif #endif /* __LINUX_POWER_SUPPLY_H__ */ -- cgit v1.2.3 From a42d71e322a8066dcfa228ce8529bb073c521ae9 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Wed, 11 Dec 2024 11:17:09 +0100 Subject: net_sched: sch_cake: Add drop reasons MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add three qdisc-specific drop reasons and use them in sch_cake: 1) SKB_DROP_REASON_QDISC_OVERLIMIT Whenever the total queue limit for a qdisc instance is exceeded and a packet is dropped to make room. 2) SKB_DROP_REASON_QDISC_CONGESTED Whenever a packet is dropped by the qdisc AQM algorithm because congestion is detected. 3) SKB_DROP_REASON_CAKE_FLOOD Whenever a packet is dropped by the flood protection part of the CAKE AQM algorithm (BLUE). Also use the existing SKB_DROP_REASON_QUEUE_PURGE in cake_clear_tin(). Reasons show up as: perf record -a -e skb:kfree_skb sleep 1; perf script iperf3 665 [005] 848.656964: skb:kfree_skb: skbaddr=0xffff98168a333500 rx_sk=(nil) protocol=34525 location=__dev_queue_xmit+0x10f0 reason: QDISC_OVERLIMIT swapper 0 [001] 909.166055: skb:kfree_skb: skbaddr=0xffff98168280cee0 rx_sk=(nil) protocol=34525 location=cake_dequeue+0x5ef reason: QDISC_CONGESTED Reviewed-by: Eric Dumazet Reviewed-by: Jamal Hadi Salim Acked-by: Dave Taht Signed-off-by: Toke Høiland-Jørgensen Link: https://patch.msgid.link/20241211-cake-drop-reason-v2-1-920afadf4d1b@redhat.com Signed-off-by: Jakub Kicinski --- include/net/dropreason-core.h | 18 ++++++++++++++++++ net/sched/sch_cake.c | 43 +++++++++++++++++++++++-------------------- 2 files changed, 41 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index c29282fabae6..ead4170a1d0a 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -58,6 +58,9 @@ FN(TC_EGRESS) \ FN(SECURITY_HOOK) \ FN(QDISC_DROP) \ + FN(QDISC_OVERLIMIT) \ + FN(QDISC_CONGESTED) \ + FN(CAKE_FLOOD) \ FN(FQ_BAND_LIMIT) \ FN(FQ_HORIZON_LIMIT) \ FN(FQ_FLOW_LIMIT) \ @@ -314,6 +317,21 @@ enum skb_drop_reason { * failed to enqueue to current qdisc) */ SKB_DROP_REASON_QDISC_DROP, + /** + * @SKB_DROP_REASON_QDISC_OVERLIMIT: dropped by qdisc when a qdisc + * instance exceeds its total buffer size limit. + */ + SKB_DROP_REASON_QDISC_OVERLIMIT, + /** + * @SKB_DROP_REASON_QDISC_CONGESTED: dropped by a qdisc AQM algorithm + * due to congestion. + */ + SKB_DROP_REASON_QDISC_CONGESTED, + /** + * @SKB_DROP_REASON_CAKE_FLOOD: dropped by the flood protection part of + * CAKE qdisc AQM algorithm (BLUE). + */ + SKB_DROP_REASON_CAKE_FLOOD, /** * @SKB_DROP_REASON_FQ_BAND_LIMIT: dropped by fq qdisc when per band * limit is reached. diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 8d8b2db4653c..deb0925f536d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -484,13 +484,14 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, /* Call this with a freshly dequeued packet for possible congestion marking. * Returns true as an instruction to drop the packet, false for delivery. */ -static bool cobalt_should_drop(struct cobalt_vars *vars, - struct cobalt_params *p, - ktime_t now, - struct sk_buff *skb, - u32 bulk_flows) +static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now, + struct sk_buff *skb, + u32 bulk_flows) { - bool next_due, over_target, drop = false; + enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; + bool next_due, over_target; ktime_t schedule; u64 sojourn; @@ -533,7 +534,8 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, if (next_due && vars->dropping) { /* Use ECN mark if possible, otherwise drop */ - drop = !(vars->ecn_marked = INET_ECN_set_ce(skb)); + if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) + reason = SKB_DROP_REASON_QDISC_CONGESTED; vars->count++; if (!vars->count) @@ -556,16 +558,17 @@ static bool cobalt_should_drop(struct cobalt_vars *vars, } /* Simple BLUE implementation. Lack of ECN is deliberate. */ - if (vars->p_drop) - drop |= (get_random_u32() < vars->p_drop); + if (vars->p_drop && reason == SKB_NOT_DROPPED_YET && + get_random_u32() < vars->p_drop) + reason = SKB_DROP_REASON_CAKE_FLOOD; /* Overload the drop_next field as an activity timeout */ if (!vars->count) vars->drop_next = ktime_add_ns(now, p->interval); - else if (ktime_to_ns(schedule) > 0 && !drop) + else if (ktime_to_ns(schedule) > 0 && reason == SKB_NOT_DROPPED_YET) vars->drop_next = now; - return drop; + return reason; } static bool cake_update_flowkeys(struct flow_keys *keys, @@ -1528,12 +1531,11 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) flow->dropped++; b->tin_dropped++; - sch->qstats.drops++; if (q->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); - __qdisc_drop(skb, to_free); + qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); sch->q.qlen--; qdisc_tree_reduce_backlog(sch, 1, len); @@ -1926,7 +1928,7 @@ static void cake_clear_tin(struct Qdisc *sch, u16 tin) q->cur_tin = tin; for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++) while (!!(skb = cake_dequeue_one(sch))) - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_QUEUE_PURGE); } static struct sk_buff *cake_dequeue(struct Qdisc *sch) @@ -1934,6 +1936,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[q->cur_tin]; struct cake_host *srchost, *dsthost; + enum skb_drop_reason reason; ktime_t now = ktime_get(); struct cake_flow *flow; struct list_head *head; @@ -2143,12 +2146,12 @@ retry: goto begin; } + reason = cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, + (b->bulk_flow_count * + !!(q->rate_flags & + CAKE_FLAG_INGRESS))); /* Last packet in queue may be marked, shouldn't be dropped */ - if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, - (b->bulk_flow_count * - !!(q->rate_flags & - CAKE_FLAG_INGRESS))) || - !flow->head) + if (reason == SKB_NOT_DROPPED_YET || !flow->head) break; /* drop this packet, get another one */ @@ -2162,7 +2165,7 @@ retry: b->tin_dropped++; qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); qdisc_qstats_drop(sch); - kfree_skb(skb); + kfree_skb_reason(skb, reason); if (q->rate_flags & CAKE_FLAG_INGRESS) goto retry; } -- cgit v1.2.3 From fcc680a647ba77370480fe753664cc10d572b240 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:38 +0100 Subject: page_pool: allow mixing PPs within one bulk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main reason for this change was to allow mixing pages from different &page_pools within one &xdp_buff/&xdp_frame. Why not? With stuff like devmem and io_uring zerocopy Rx, it's required to have separate PPs for header buffers and payload buffers. Adjust xdp_return_frame_bulk() and page_pool_put_netmem_bulk(), so that they won't be tied to a particular pool. Let the latter create a separate bulk of pages which's PP is different from the first netmem of the bulk and process it after the main loop. This greatly optimizes xdp_return_frame_bulk(): no more hashtable lookups and forced flushes on PP mismatch. Also make xdp_flush_frame_bulk() inline, as it's just one if + function call + one u32 read, not worth extending the call ladder. Co-developed-by: Toke Høiland-Jørgensen # iterative Signed-off-by: Toke Høiland-Jørgensen Suggested-by: Jakub Kicinski # while (count) Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-2-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 6 +-- include/net/xdp.h | 16 +++++-- net/core/page_pool.c | 109 ++++++++++++++++++++++++++++-------------- net/core/xdp.c | 29 +---------- 4 files changed, 87 insertions(+), 73 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 1ea16b0e9c79..05a864031271 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -259,8 +259,7 @@ void page_pool_disable_direct_recycling(struct page_pool *pool); void page_pool_destroy(struct page_pool *pool); void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem); -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count); +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count); #else static inline void page_pool_destroy(struct page_pool *pool) { @@ -272,8 +271,7 @@ static inline void page_pool_use_xdp_mem(struct page_pool *pool, { } -static inline void page_pool_put_netmem_bulk(struct page_pool *pool, - netmem_ref *data, u32 count) +static inline void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { } #endif diff --git a/include/net/xdp.h b/include/net/xdp.h index f4020b29122f..9e7eb8223513 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -11,6 +11,8 @@ #include #include /* skb_shared_info */ +#include + /** * DOC: XDP RX-queue information * @@ -193,14 +195,12 @@ xdp_frame_is_frag_pfmemalloc(const struct xdp_frame *frame) #define XDP_BULK_QUEUE_SIZE 16 struct xdp_frame_bulk { int count; - void *xa; netmem_ref q[XDP_BULK_QUEUE_SIZE]; }; static __always_inline void xdp_frame_bulk_init(struct xdp_frame_bulk *bq) { - /* bq->count will be zero'ed when bq->xa gets updated */ - bq->xa = NULL; + bq->count = 0; } static inline struct skb_shared_info * @@ -317,10 +317,18 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq); +static inline void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) +{ + if (unlikely(!bq->count)) + return; + + page_pool_put_netmem_bulk(bq->q, bq->count); + bq->count = 0; +} + static __always_inline unsigned int xdp_get_frame_len(const struct xdp_frame *xdpf) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 4c85b77cfdac..8292e3edbbfd 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -839,9 +839,41 @@ void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, } EXPORT_SYMBOL(page_pool_put_unrefed_page); +static void page_pool_recycle_ring_bulk(struct page_pool *pool, + netmem_ref *bulk, + u32 bulk_len) +{ + bool in_softirq; + u32 i; + + /* Bulk produce into ptr_ring page_pool cache */ + in_softirq = page_pool_producer_lock(pool); + + for (i = 0; i < bulk_len; i++) { + if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { + /* ring full */ + recycle_stat_inc(pool, ring_full); + break; + } + } + + page_pool_producer_unlock(pool, in_softirq); + recycle_stat_add(pool, ring, i); + + /* Hopefully all pages were returned into ptr_ring */ + if (likely(i == bulk_len)) + return; + + /* + * ptr_ring cache is full, free remaining pages outside producer lock + * since put_page() with refcnt == 1 can be an expensive operation. + */ + for (; i < bulk_len; i++) + page_pool_return_page(pool, bulk[i]); +} + /** * page_pool_put_netmem_bulk() - release references on multiple netmems - * @pool: pool from which pages were allocated * @data: array holding netmem references * @count: number of entries in @data * @@ -854,52 +886,55 @@ EXPORT_SYMBOL(page_pool_put_unrefed_page); * Please note the caller must not use data area after running * page_pool_put_netmem_bulk(), as this function overwrites it. */ -void page_pool_put_netmem_bulk(struct page_pool *pool, netmem_ref *data, - u32 count) +void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { - int i, bulk_len = 0; - bool allow_direct; - bool in_softirq; - - allow_direct = page_pool_napi_local(pool); + u32 bulk_len = 0; - for (i = 0; i < count; i++) { + for (u32 i = 0; i < count; i++) { netmem_ref netmem = netmem_compound_head(data[i]); - /* It is not the last user for the page frag case */ - if (!page_pool_is_last_ref(netmem)) - continue; - - netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); - /* Approved for bulk recycling in ptr_ring cache */ - if (netmem) + if (page_pool_is_last_ref(netmem)) data[bulk_len++] = netmem; } - if (!bulk_len) - return; - - /* Bulk producer into ptr_ring page_pool cache */ - in_softirq = page_pool_producer_lock(pool); - for (i = 0; i < bulk_len; i++) { - if (__ptr_ring_produce(&pool->ring, (__force void *)data[i])) { - /* ring full */ - recycle_stat_inc(pool, ring_full); - break; + count = bulk_len; + while (count) { + netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; + struct page_pool *pool = NULL; + bool allow_direct; + u32 foreign = 0; + + bulk_len = 0; + + for (u32 i = 0; i < count; i++) { + struct page_pool *netmem_pp; + netmem_ref netmem = data[i]; + + netmem_pp = netmem_get_pp(netmem); + if (unlikely(!pool)) { + pool = netmem_pp; + allow_direct = page_pool_napi_local(pool); + } else if (netmem_pp != pool) { + /* + * If the netmem belongs to a different + * page_pool, save it for another round. + */ + data[foreign++] = netmem; + continue; + } + + netmem = __page_pool_put_page(pool, netmem, -1, + allow_direct); + /* Approved for bulk recycling in ptr_ring cache */ + if (netmem) + bulk[bulk_len++] = netmem; } - } - recycle_stat_add(pool, ring, i); - page_pool_producer_unlock(pool, in_softirq); - /* Hopefully all pages was return into ptr_ring */ - if (likely(i == bulk_len)) - return; + if (bulk_len) + page_pool_recycle_ring_bulk(pool, bulk, bulk_len); - /* ptr_ring cache full, free remaining pages outside producer lock - * since put_page() with refcnt == 1 can be an expensive operation - */ - for (; i < bulk_len; i++) - page_pool_return_page(pool, data[i]); + count = foreign; + } } EXPORT_SYMBOL(page_pool_put_netmem_bulk); diff --git a/net/core/xdp.c b/net/core/xdp.c index 938ad15c9857..56127e8ec85f 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -511,46 +511,19 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); * xdp_frame_bulk is usually stored/allocated on the function * call-stack to avoid locking penalties. */ -void xdp_flush_frame_bulk(struct xdp_frame_bulk *bq) -{ - struct xdp_mem_allocator *xa = bq->xa; - - if (unlikely(!xa || !bq->count)) - return; - - page_pool_put_netmem_bulk(xa->page_pool, bq->q, bq->count); - /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */ - bq->count = 0; -} -EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk); /* Must be called with rcu_read_lock held */ void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq) { - struct xdp_mem_info *mem = &xdpf->mem; - struct xdp_mem_allocator *xa; - - if (mem->type != MEM_TYPE_PAGE_POOL) { + if (xdpf->mem.type != MEM_TYPE_PAGE_POOL) { xdp_return_frame(xdpf); return; } - xa = bq->xa; - if (unlikely(!xa)) { - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - bq->count = 0; - bq->xa = xa; - } - if (bq->count == XDP_BULK_QUEUE_SIZE) xdp_flush_frame_bulk(bq); - if (unlikely(mem->id != xa->mem.id)) { - xdp_flush_frame_bulk(bq); - bq->xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); - } - if (unlikely(xdp_frame_has_frags(xdpf))) { struct skb_shared_info *sinfo; int i; -- cgit v1.2.3 From 56d95b0adfa224bb1c67733dbcad30dd8debd39e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:39 +0100 Subject: xdp: get rid of xdp_frame::mem.id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially, xdp_frame::mem.id was used to search for the corresponding &page_pool to return the page correctly. However, after that struct page was extended to have a direct pointer to its PP (netmem has it as well), further keeping of this field makes no sense. xdp_return_frame_bulk() still used it to do a lookup, and this leftover is now removed. Remove xdp_frame::mem and replace it with ::mem_type, as only memory type still matters and we need to know it to be able to free the frame correctly. As a cute side effect, we can now make every scalar field in &xdp_frame of 4 byte width, speeding up accesses to them. Reviewed-by: Toke Høiland-Jørgensen Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-3-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +- drivers/net/veth.c | 4 ++-- include/net/xdp.h | 14 ++++++------- kernel/bpf/cpumap.c | 2 +- net/bpf/test_run.c | 4 ++-- net/core/filter.c | 12 +++++------ net/core/xdp.c | 28 +++++++++++++------------- 7 files changed, 33 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index bf5baef5c3e0..4948b4906584 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2281,7 +2281,7 @@ static int dpaa_a050385_wa_xdpf(struct dpaa_priv *priv, new_xdpf->len = xdpf->len; new_xdpf->headroom = priv->tx_headroom; new_xdpf->frame_sz = DPAA_BP_RAW_SIZE; - new_xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; + new_xdpf->mem_type = MEM_TYPE_PAGE_ORDER0; /* Release the initial buffer */ xdp_return_frame_rx_napi(xdpf); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 07ebb800edf1..01251868a9c2 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -634,7 +634,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, break; case XDP_TX: orig_frame = *frame; - xdp->rxq->mem = frame->mem; + xdp->rxq->mem.type = frame->mem_type; if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) { trace_xdp_exception(rq->dev, xdp_prog, act); frame = &orig_frame; @@ -646,7 +646,7 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq, goto xdp_xmit; case XDP_REDIRECT: orig_frame = *frame; - xdp->rxq->mem = frame->mem; + xdp->rxq->mem.type = frame->mem_type; if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) { frame = &orig_frame; stats->rx_drops++; diff --git a/include/net/xdp.h b/include/net/xdp.h index 9e7eb8223513..1c260869a353 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -169,13 +169,13 @@ out: struct xdp_frame { void *data; - u16 len; - u16 headroom; + u32 len; + u32 headroom; u32 metasize; /* uses lower 8-bits */ /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, - * while mem info is valid on remote CPU. + * while mem_type is valid on remote CPU. */ - struct xdp_mem_info mem; + enum xdp_mem_type mem_type:32; struct net_device *dev_rx; /* used by cpumap */ u32 frame_sz; u32 flags; /* supported values defined in xdp_buff_flags */ @@ -306,13 +306,13 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) if (unlikely(xdp_update_frame_from_buff(xdp, xdp_frame) < 0)) return NULL; - /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ - xdp_frame->mem = xdp->rxq->mem; + /* rxq only valid until napi_schedule ends, convert to xdp_mem_type */ + xdp_frame->mem_type = xdp->rxq->mem.type; return xdp_frame; } -void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, +void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index a2f46785ac3b..774accbd4a22 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -190,7 +190,7 @@ static int cpu_map_bpf_prog_run_xdp(struct bpf_cpu_map_entry *rcpu, int err; rxq.dev = xdpf->dev_rx; - rxq.mem = xdpf->mem; + rxq.mem.type = xdpf->mem_type; /* TODO: report queue_index to xdp_rxq_info */ xdp_convert_frame_to_buff(xdpf, &xdp); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 501ec4249fed..9ae2a7f1738b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -153,7 +153,7 @@ static void xdp_test_run_init_page(netmem_ref netmem, void *arg) new_ctx->data = new_ctx->data_meta + meta_len; xdp_update_frame_from_buff(new_ctx, frm); - frm->mem = new_ctx->rxq->mem; + frm->mem_type = new_ctx->rxq->mem.type; memcpy(&head->orig_ctx, new_ctx, sizeof(head->orig_ctx)); } @@ -246,7 +246,7 @@ static void reset_ctx(struct xdp_page_head *head) head->ctx.data_meta = head->orig_ctx.data_meta; head->ctx.data_end = head->orig_ctx.data_end; xdp_update_frame_from_buff(&head->ctx, head->frame); - head->frame->mem = head->orig_ctx.rxq->mem; + head->frame->mem_type = head->orig_ctx.rxq->mem.type; } static int xdp_recv_frames(struct xdp_frame **frames, int nframes, diff --git a/net/core/filter.c b/net/core/filter.c index fac245065b0a..6c036708634b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4119,13 +4119,13 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset) } static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, - struct xdp_mem_info *mem_info, bool release) + enum xdp_mem_type mem_type, bool release) { struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp); if (release) { xsk_buff_del_tail(zc_frag); - __xdp_return(NULL, mem_info, false, zc_frag); + __xdp_return(NULL, mem_type, false, zc_frag); } else { zc_frag->data_end -= shrink; } @@ -4134,18 +4134,18 @@ static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, int shrink) { - struct xdp_mem_info *mem_info = &xdp->rxq->mem; + enum xdp_mem_type mem_type = xdp->rxq->mem.type; bool release = skb_frag_size(frag) == shrink; - if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) { - bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release); + if (mem_type == MEM_TYPE_XSK_BUFF_POOL) { + bpf_xdp_shrink_data_zc(xdp, shrink, mem_type, release); goto out; } if (release) { struct page *page = skb_frag_page(frag); - __xdp_return(page_address(page), mem_info, false, NULL); + __xdp_return(page_address(page), mem_type, false, NULL); } out: diff --git a/net/core/xdp.c b/net/core/xdp.c index 56127e8ec85f..d367571c5838 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -430,12 +430,12 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, +void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, struct xdp_buff *xdp) { struct page *page; - switch (mem->type) { + switch (mem_type) { case MEM_TYPE_PAGE_POOL: page = virt_to_head_page(data); if (napi_direct && xdp_return_frame_no_direct()) @@ -458,7 +458,7 @@ void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, break; default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ - WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); + WARN(1, "Incorrect XDP memory type (%d) usage", mem_type); break; } } @@ -475,10 +475,10 @@ void xdp_return_frame(struct xdp_frame *xdpf) for (i = 0; i < sinfo->nr_frags; i++) { struct page *page = skb_frag_page(&sinfo->frags[i]); - __xdp_return(page_address(page), &xdpf->mem, false, NULL); + __xdp_return(page_address(page), xdpf->mem_type, false, NULL); } out: - __xdp_return(xdpf->data, &xdpf->mem, false, NULL); + __xdp_return(xdpf->data, xdpf->mem_type, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); @@ -494,10 +494,10 @@ void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) for (i = 0; i < sinfo->nr_frags; i++) { struct page *page = skb_frag_page(&sinfo->frags[i]); - __xdp_return(page_address(page), &xdpf->mem, true, NULL); + __xdp_return(page_address(page), xdpf->mem_type, true, NULL); } out: - __xdp_return(xdpf->data, &xdpf->mem, true, NULL); + __xdp_return(xdpf->data, xdpf->mem_type, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -516,7 +516,7 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_return_frame_bulk(struct xdp_frame *xdpf, struct xdp_frame_bulk *bq) { - if (xdpf->mem.type != MEM_TYPE_PAGE_POOL) { + if (xdpf->mem_type != MEM_TYPE_PAGE_POOL) { xdp_return_frame(xdpf); return; } @@ -553,10 +553,11 @@ void xdp_return_buff(struct xdp_buff *xdp) for (i = 0; i < sinfo->nr_frags; i++) { struct page *page = skb_frag_page(&sinfo->frags[i]); - __xdp_return(page_address(page), &xdp->rxq->mem, true, xdp); + __xdp_return(page_address(page), xdp->rxq->mem.type, true, + xdp); } out: - __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); + __xdp_return(xdp->data, xdp->rxq->mem.type, true, xdp); } EXPORT_SYMBOL_GPL(xdp_return_buff); @@ -602,7 +603,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) xdpf->headroom = 0; xdpf->metasize = metasize; xdpf->frame_sz = PAGE_SIZE; - xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; + xdpf->mem_type = MEM_TYPE_PAGE_ORDER0; xsk_buff_free(xdp); return xdpf; @@ -672,7 +673,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf, * - RX ring dev queue index (skb_record_rx_queue) */ - if (xdpf->mem.type == MEM_TYPE_PAGE_POOL) + if (xdpf->mem_type == MEM_TYPE_PAGE_POOL) skb_mark_for_recycle(skb); /* Allow SKB to reuse area used by xdp_frame */ @@ -719,8 +720,7 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf) nxdpf = addr; nxdpf->data = addr + headroom; nxdpf->frame_sz = PAGE_SIZE; - nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0; - nxdpf->mem.id = 0; + nxdpf->mem_type = MEM_TYPE_PAGE_ORDER0; return nxdpf; } -- cgit v1.2.3 From 207ff83cecaeaacf0d47c8ccbe927c8354ac1280 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:40 +0100 Subject: xdp: make __xdp_return() MP-agnostic Currently, __xdp_return() takes pointer to the virtual memory to free a buffer. Apart from that this sometimes provokes redundant data <--> page conversions, taking data pointer effectively prevents lots of XDP code to support non-page-backed buffers, as there's no mapping for the non-host memory (data is always NULL). Just convert it to always take netmem reference. For xdp_return_{buff,frame*}(), this chops off one page_address() per each frag and adds one virt_to_netmem() (same as virt_to_page()) per header buffer. For __xdp_return() itself, it removes one virt_to_page() for MEM_TYPE_PAGE_POOL and another one for MEM_TYPE_PAGE_ORDER0, adding one page_address() for [not really common nowadays] MEM_TYPE_PAGE_SHARED, but the main effect is that the abovementioned functions won't die or memleak anymore if the frame has non-host memory attached and will correctly free those. Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-4-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/net/xdp.h | 4 ++-- net/core/filter.c | 9 +++------ net/core/xdp.c | 47 +++++++++++++++++++---------------------------- 3 files changed, 24 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 1c260869a353..d2089cfecefd 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -312,8 +312,8 @@ struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp) return xdp_frame; } -void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, - struct xdp_buff *xdp); +void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, + bool napi_direct, struct xdp_buff *xdp); void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); diff --git a/net/core/filter.c b/net/core/filter.c index 6c036708634b..5fea874025d3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4125,7 +4125,7 @@ static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink, if (release) { xsk_buff_del_tail(zc_frag); - __xdp_return(NULL, mem_type, false, zc_frag); + __xdp_return(0, mem_type, false, zc_frag); } else { zc_frag->data_end -= shrink; } @@ -4142,11 +4142,8 @@ static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag, goto out; } - if (release) { - struct page *page = skb_frag_page(frag); - - __xdp_return(page_address(page), mem_type, false, NULL); - } + if (release) + __xdp_return(skb_frag_netmem(frag), mem_type, false, NULL); out: return release; diff --git a/net/core/xdp.c b/net/core/xdp.c index d367571c5838..f1165a35411b 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -430,27 +430,25 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_attach_page_pool); * is used for those calls sites. Thus, allowing for faster recycling * of xdp_frames/pages in those cases. */ -void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, - struct xdp_buff *xdp) +void __xdp_return(netmem_ref netmem, enum xdp_mem_type mem_type, + bool napi_direct, struct xdp_buff *xdp) { - struct page *page; - switch (mem_type) { case MEM_TYPE_PAGE_POOL: - page = virt_to_head_page(data); + netmem = netmem_compound_head(netmem); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) * as mem->type knows this a page_pool page */ - page_pool_put_full_page(page->pp, page, napi_direct); + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, + napi_direct); break; case MEM_TYPE_PAGE_SHARED: - page_frag_free(data); + page_frag_free(__netmem_address(netmem)); break; case MEM_TYPE_PAGE_ORDER0: - page = virt_to_page(data); /* Assumes order0 page*/ - put_page(page); + put_page(__netmem_to_page(netmem)); break; case MEM_TYPE_XSK_BUFF_POOL: /* NB! Only valid from an xdp_buff! */ @@ -466,38 +464,34 @@ void __xdp_return(void *data, enum xdp_mem_type mem_type, bool napi_direct, void xdp_return_frame(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; - int i; if (likely(!xdp_frame_has_frags(xdpf))) goto out; sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); + for (u32 i = 0; i < sinfo->nr_frags; i++) + __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type, + false, NULL); - __xdp_return(page_address(page), xdpf->mem_type, false, NULL); - } out: - __xdp_return(xdpf->data, xdpf->mem_type, false, NULL); + __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { struct skb_shared_info *sinfo; - int i; if (likely(!xdp_frame_has_frags(xdpf))) goto out; sinfo = xdp_get_shared_info_from_frame(xdpf); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); + for (u32 i = 0; i < sinfo->nr_frags; i++) + __xdp_return(skb_frag_netmem(&sinfo->frags[i]), xdpf->mem_type, + true, NULL); - __xdp_return(page_address(page), xdpf->mem_type, true, NULL); - } out: - __xdp_return(xdpf->data, xdpf->mem_type, true, NULL); + __xdp_return(virt_to_netmem(xdpf->data), xdpf->mem_type, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); @@ -544,20 +538,17 @@ EXPORT_SYMBOL_GPL(xdp_return_frame_bulk); void xdp_return_buff(struct xdp_buff *xdp) { struct skb_shared_info *sinfo; - int i; if (likely(!xdp_buff_has_frags(xdp))) goto out; sinfo = xdp_get_shared_info_from_buff(xdp); - for (i = 0; i < sinfo->nr_frags; i++) { - struct page *page = skb_frag_page(&sinfo->frags[i]); + for (u32 i = 0; i < sinfo->nr_frags; i++) + __xdp_return(skb_frag_netmem(&sinfo->frags[i]), + xdp->rxq->mem.type, true, xdp); - __xdp_return(page_address(page), xdp->rxq->mem.type, true, - xdp); - } out: - __xdp_return(xdp->data, xdp->rxq->mem.type, true, xdp); + __xdp_return(virt_to_netmem(xdp->data), xdp->rxq->mem.type, true, xdp); } EXPORT_SYMBOL_GPL(xdp_return_buff); -- cgit v1.2.3 From 0dffdb3b3366c932fb7d210f5032476c552f7000 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Wed, 11 Dec 2024 18:26:47 +0100 Subject: skbuff: allow 2-4-argument skb_frag_dma_map() skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE) is repeated across dozens of drivers and really wants a shorthand. Add a macro which will count args and handle all possible number from 2 to 5. Semantics: skb_frag_dma_map(dev, frag) -> __skb_frag_dma_map(dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE) skb_frag_dma_map(dev, frag, offset) -> __skb_frag_dma_map(dev, frag, offset, skb_frag_size(frag) - offset, DMA_TO_DEVICE) skb_frag_dma_map(dev, frag, offset, size) -> __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) skb_frag_dma_map(dev, frag, offset, size, dir) -> __skb_frag_dma_map(dev, frag, offset, size, dir) No object code size changes for the existing callers. Users passing less arguments also won't have bigger size comparing to the full equivalent call. Signed-off-by: Alexander Lobakin Link: https://patch.msgid.link/20241211172649.761483-11-aleksander.lobakin@intel.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69624b394cd9..b2509cd0b930 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3674,7 +3674,7 @@ static inline void skb_frag_page_copy(skb_frag_t *fragto, bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** - * skb_frag_dma_map - maps a paged fragment via the DMA API + * __skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to * @frag: the paged fragment to map * @offset: the offset within the fragment (starting at the @@ -3684,15 +3684,36 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); * * Maps the page associated with @frag to @device. */ -static inline dma_addr_t skb_frag_dma_map(struct device *dev, - const skb_frag_t *frag, - size_t offset, size_t size, - enum dma_data_direction dir) +static inline dma_addr_t __skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } +#define skb_frag_dma_map(dev, frag, ...) \ + CONCATENATE(_skb_frag_dma_map, \ + COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__) + +#define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \ + const skb_frag_t *uf = (frag); \ + size_t uo = (offset); \ + \ + __skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \ + DMA_TO_DEVICE); \ +}) +#define _skb_frag_dma_map1(dev, frag, offset) \ + __skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \ + __UNIQUE_ID(offset_)) +#define _skb_frag_dma_map0(dev, frag) \ + _skb_frag_dma_map1(dev, frag, 0) +#define _skb_frag_dma_map2(dev, frag, offset, size) \ + __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) +#define _skb_frag_dma_map3(dev, frag, offset, size, dir) \ + __skb_frag_dma_map(dev, frag, offset, size, dir) + static inline struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { -- cgit v1.2.3 From 91a152cbb49c26609d217cf2f116d46143b9b8be Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:28 +0000 Subject: net: page_pool: rename page_pool_alloc_netmem to *_netmems page_pool_alloc_netmem (without an s) was the mirror of page_pool_alloc_pages (with an s), which was confusing. Rename to page_pool_alloc_netmems so it's the mirror of page_pool_alloc_pages. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241211212033.1684197-2-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/types.h | 2 +- net/core/page_pool.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 05a864031271..3270c92841b4 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -242,7 +242,7 @@ struct page_pool { }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp); +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp); netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 8292e3edbbfd..7a17af286a9e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -574,7 +574,7 @@ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ -netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) +netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { netmem_ref netmem; @@ -590,11 +590,11 @@ netmem_ref page_pool_alloc_netmem(struct page_pool *pool, gfp_t gfp) netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; } -EXPORT_SYMBOL(page_pool_alloc_netmem); +EXPORT_SYMBOL(page_pool_alloc_netmems); struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { - return netmem_to_page(page_pool_alloc_netmem(pool, gfp)); + return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); @@ -992,7 +992,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, } if (!netmem) { - netmem = page_pool_alloc_netmem(pool, gfp); + netmem = page_pool_alloc_netmems(pool, gfp); if (unlikely(!netmem)) { pool->frag_page = 0; return 0; -- cgit v1.2.3 From 8156c310499a34c8f42b2e2b7360abb805683bbe Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:29 +0000 Subject: net: page_pool: create page_pool_alloc_netmem Create page_pool_alloc_netmem to be the mirror of page_pool_alloc. This enables drivers that want currently use page_pool_alloc to transition to netmem by converting the call sites to page_pool_alloc_netmem. Signed-off-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20241211212033.1684197-3-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 26caa2c20912..95af7f0b029e 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -115,22 +115,22 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool, return page_pool_alloc_frag(pool, offset, size, gfp); } -static inline struct page *page_pool_alloc(struct page_pool *pool, - unsigned int *offset, - unsigned int *size, gfp_t gfp) +static inline netmem_ref page_pool_alloc_netmem(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; - struct page *page; + netmem_ref netmem; if ((*size << 1) > max_size) { *size = max_size; *offset = 0; - return page_pool_alloc_pages(pool, gfp); + return page_pool_alloc_netmems(pool, gfp); } - page = page_pool_alloc_frag(pool, offset, *size, gfp); - if (unlikely(!page)) - return NULL; + netmem = page_pool_alloc_frag_netmem(pool, offset, *size, gfp); + if (unlikely(!netmem)) + return 0; /* There is very likely not enough space for another fragment, so append * the remaining size to the current fragment to avoid truesize @@ -141,7 +141,14 @@ static inline struct page *page_pool_alloc(struct page_pool *pool, pool->frag_offset = max_size; } - return page; + return netmem; +} + +static inline struct page *page_pool_alloc(struct page_pool *pool, + unsigned int *offset, + unsigned int *size, gfp_t gfp) +{ + return netmem_to_page(page_pool_alloc_netmem(pool, offset, size, gfp)); } /** -- cgit v1.2.3 From 7dba339faae991a23c54f7b93a58798c58f8c16f Mon Sep 17 00:00:00 2001 From: Mina Almasry Date: Wed, 11 Dec 2024 21:20:31 +0000 Subject: page_pool: disable sync for cpu for dmabuf memory provider dmabuf dma-addresses should not be dma_sync'd for CPU/device. Typically its the driver responsibility to dma_sync for CPU, but the driver should not dma_sync for CPU if the netmem is actually coming from a dmabuf memory provider. The page_pool already exposes a helper for dma_sync_for_cpu: page_pool_dma_sync_for_cpu. Upgrade this existing helper to handle netmem, and have it skip dma_sync if the memory is from a dmabuf memory provider. Drivers should migrate to using this helper when adding support for netmem. Also minimize the impact on the dma syncing performance for pages. Special case the dma-sync path for pages to not go through the overhead checks for dma-syncing and conversion to netmem. Cc: Alexander Lobakin Cc: Jason Gunthorpe Signed-off-by: Mina Almasry Link: https://patch.msgid.link/20241211212033.1684197-5-almasrymina@google.com Signed-off-by: Jakub Kicinski --- include/net/page_pool/helpers.h | 35 ++++++++++++++++++++++++++++++----- include/net/page_pool/types.h | 3 ++- net/core/devmem.c | 1 + net/core/page_pool.c | 1 + 4 files changed, 34 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 95af7f0b029e..e555921e5233 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -422,7 +422,21 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem) */ static inline dma_addr_t page_pool_get_dma_addr(const struct page *page) { - return page_pool_get_dma_addr_netmem(page_to_netmem((struct page *)page)); + dma_addr_t ret = page->dma_addr; + + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) + ret <<= PAGE_SHIFT; + + return ret; +} + +static inline void __page_pool_dma_sync_for_cpu(const struct page_pool *pool, + const dma_addr_t dma_addr, + u32 offset, u32 dma_sync_size) +{ + dma_sync_single_range_for_cpu(pool->p.dev, dma_addr, + offset + pool->p.offset, dma_sync_size, + page_pool_get_dma_dir(pool)); } /** @@ -441,10 +455,21 @@ static inline void page_pool_dma_sync_for_cpu(const struct page_pool *pool, const struct page *page, u32 offset, u32 dma_sync_size) { - dma_sync_single_range_for_cpu(pool->p.dev, - page_pool_get_dma_addr(page), - offset + pool->p.offset, dma_sync_size, - page_pool_get_dma_dir(pool)); + __page_pool_dma_sync_for_cpu(pool, page_pool_get_dma_addr(page), offset, + dma_sync_size); +} + +static inline void +page_pool_dma_sync_netmem_for_cpu(const struct page_pool *pool, + const netmem_ref netmem, u32 offset, + u32 dma_sync_size) +{ + if (!pool->dma_sync_for_cpu) + return; + + __page_pool_dma_sync_for_cpu(pool, + page_pool_get_dma_addr_netmem(netmem), + offset, dma_sync_size); } static inline bool page_pool_put(struct page_pool *pool) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 3270c92841b4..ed4cd114180a 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -164,7 +164,8 @@ struct page_pool { bool has_init_callback:1; /* slow::init_callback is set */ bool dma_map:1; /* Perform DMA mapping */ - bool dma_sync:1; /* Perform DMA sync */ + bool dma_sync:1; /* Perform DMA sync for device */ + bool dma_sync_for_cpu:1; /* Perform DMA sync for cpu */ #ifdef CONFIG_PAGE_POOL_STATS bool system:1; /* This is a global percpu pool */ #endif diff --git a/net/core/devmem.c b/net/core/devmem.c index 3ebdeed2bf18..0b6ed7525b22 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -335,6 +335,7 @@ int mp_dmabuf_devmem_init(struct page_pool *pool) * dma_sync_for_cpu/device. Force disable dma_sync. */ pool->dma_sync = false; + pool->dma_sync_for_cpu = false; if (pool->p.order != 0) return -E2BIG; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 275a7fd209d7..e07ad7315955 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -201,6 +201,7 @@ static int page_pool_init(struct page_pool *pool, memcpy(&pool->slow, ¶ms->slow, sizeof(pool->slow)); pool->cpuid = cpuid; + pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ if (pool->slow.flags & ~PP_FLAG_ALL) -- cgit v1.2.3 From f4425e3ab2f796d442a44f31262eade9b6427ff7 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Thu, 12 Dec 2024 15:45:04 +0800 Subject: ALSA: compress: Add output rate and output format support Add 'pcm_format' for struct snd_codec, add 'pcm_formats' for struct snd_codec_desc, these are used for accelerator usage. Current accelerator example is sample rate converter (SRC). Define struct snd_codec_desc_src for descript minmum and maxmum sample rates. And add 'src_d' in union snd_codec_options structure. These are mainly used for capbility query. Signed-off-by: Jaroslav Kysela Signed-off-by: Shengjiu Wang Acked-by: Jaroslav Kysela Acked-by: Vinod Koul Link: https://patch.msgid.link/20241212074509.3445859-2-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- include/uapi/sound/compress_params.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/sound/compress_params.h b/include/uapi/sound/compress_params.h index ddc77322d571..bc7648a30746 100644 --- a/include/uapi/sound/compress_params.h +++ b/include/uapi/sound/compress_params.h @@ -334,6 +334,14 @@ union snd_codec_options { struct snd_dec_wma wma_d; struct snd_dec_alac alac_d; struct snd_dec_ape ape_d; + struct { + __u32 out_sample_rate; + } src_d; +} __attribute__((packed, aligned(4))); + +struct snd_codec_desc_src { + __u32 out_sample_rate_min; + __u32 out_sample_rate_max; } __attribute__((packed, aligned(4))); /** struct snd_codec_desc - description of codec capabilities @@ -347,6 +355,9 @@ union snd_codec_options { * @modes: Supported modes. See SND_AUDIOMODE defines * @formats: Supported formats. See SND_AUDIOSTREAMFORMAT defines * @min_buffer: Minimum buffer size handled by codec implementation + * @pcm_formats: Output (for decoders) or input (for encoders) + * PCM formats (required to accel mode, 0 for other modes) + * @u_space: union space (for codec dependent data) * @reserved: reserved for future use * * This structure provides a scalar value for profiles, modes and stream @@ -370,7 +381,12 @@ struct snd_codec_desc { __u32 modes; __u32 formats; __u32 min_buffer; - __u32 reserved[15]; + __u32 pcm_formats; + union { + __u32 u_space[6]; + struct snd_codec_desc_src src; + } __attribute__((packed, aligned(4))); + __u32 reserved[8]; } __attribute__((packed, aligned(4))); /** struct snd_codec @@ -395,6 +411,8 @@ struct snd_codec_desc { * @align: Block alignment in bytes of an audio sample. * Only required for PCM or IEC formats. * @options: encoder-specific settings + * @pcm_format: Output (for decoders) or input (for encoders) + * PCM formats (required to accel mode, 0 for other modes) * @reserved: reserved for future use */ @@ -411,7 +429,8 @@ struct snd_codec { __u32 format; __u32 align; union snd_codec_options options; - __u32 reserved[3]; + __u32 pcm_format; + __u32 reserved[2]; } __attribute__((packed, aligned(4))); #endif -- cgit v1.2.3 From 25458fdd39a18a5ce00c36f38992da54bb7453f3 Mon Sep 17 00:00:00 2001 From: Biju Das Date: Tue, 3 Dec 2024 10:49:31 +0000 Subject: dt-bindings: clock: renesas: Document RZ/G3E SoC CPG Document the device tree bindings for the Renesas RZ/G3E SoC Clock Pulse Generator (CPG). Also define constants for the core clocks of the RZ/G3E SoC. Acked-by: Conor Dooley Signed-off-by: Biju Das Reviewed-by: Geert Uytterhoeven Link: https://lore.kernel.org/20241203105005.103927-5-biju.das.jz@bp.renesas.com Signed-off-by: Geert Uytterhoeven --- .../bindings/clock/renesas,rzv2h-cpg.yaml | 15 +++++++++------ include/dt-bindings/clock/renesas,r9a09g047-cpg.h | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) create mode 100644 include/dt-bindings/clock/renesas,r9a09g047-cpg.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml b/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml index 926c503bed1f..c3fe76abd549 100644 --- a/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml +++ b/Documentation/devicetree/bindings/clock/renesas,rzv2h-cpg.yaml @@ -4,19 +4,22 @@ $id: http://devicetree.org/schemas/clock/renesas,rzv2h-cpg.yaml# $schema: http://devicetree.org/meta-schemas/core.yaml# -title: Renesas RZ/V2H(P) Clock Pulse Generator (CPG) +title: Renesas RZ/{G3E,V2H(P)} Clock Pulse Generator (CPG) maintainers: - Lad Prabhakar description: - On Renesas RZ/V2H(P) SoCs, the CPG (Clock Pulse Generator) handles generation - and control of clock signals for the IP modules, generation and control of resets, - and control over booting, low power consumption and power supply domains. + On Renesas RZ/{G3E,V2H(P)} SoCs, the CPG (Clock Pulse Generator) handles + generation and control of clock signals for the IP modules, generation and + control of resets, and control over booting, low power consumption and power + supply domains. properties: compatible: - const: renesas,r9a09g057-cpg + enum: + - renesas,r9a09g047-cpg # RZ/G3E + - renesas,r9a09g057-cpg # RZ/V2H reg: maxItems: 1 @@ -37,7 +40,7 @@ properties: description: | - For CPG core clocks, the two clock specifier cells must be "CPG_CORE" and a core clock reference, as defined in - , + , - For module clocks, the two clock specifier cells must be "CPG_MOD" and a module number. The module number is calculated as the CLKON register offset index multiplied by 16, plus the actual bit in the register diff --git a/include/dt-bindings/clock/renesas,r9a09g047-cpg.h b/include/dt-bindings/clock/renesas,r9a09g047-cpg.h new file mode 100644 index 000000000000..1d031bf6bf03 --- /dev/null +++ b/include/dt-bindings/clock/renesas,r9a09g047-cpg.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) + * + * Copyright (C) 2024 Renesas Electronics Corp. + */ +#ifndef __DT_BINDINGS_CLOCK_RENESAS_R9A09G047_CPG_H__ +#define __DT_BINDINGS_CLOCK_RENESAS_R9A09G047_CPG_H__ + +#include + +/* Core Clock list */ +#define R9A09G047_SYS_0_PCLK 0 +#define R9A09G047_CA55_0_CORECLK0 1 +#define R9A09G047_CA55_0_CORECLK1 2 +#define R9A09G047_CA55_0_CORECLK2 3 +#define R9A09G047_CA55_0_CORECLK3 4 +#define R9A09G047_CA55_0_PERIPHCLK 5 +#define R9A09G047_CM33_CLK0 6 +#define R9A09G047_CST_0_SWCLKTCK 7 +#define R9A09G047_IOTOP_0_SHCLK 8 + +#endif /* __DT_BINDINGS_CLOCK_RENESAS_R9A09G047_CPG_H__ */ -- cgit v1.2.3 From 62374ce1876be26b3f33575680e67ca69a59db54 Mon Sep 17 00:00:00 2001 From: Tao Zhang Date: Fri, 13 Dec 2024 18:07:29 +0800 Subject: coresight: Add a helper to check if a device is source Since there are a lot of places in the code to check whether the device is source, add a helper to check it. Signed-off-by: Tao Zhang Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20241213100731.25914-3-quic_taozha@quicinc.com --- drivers/hwtracing/coresight/coresight-tpda.c | 2 +- include/linux/coresight.h | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/hwtracing/coresight/coresight-tpda.c b/drivers/hwtracing/coresight/coresight-tpda.c index 4ec676bea1ce..ab94067c0ed3 100644 --- a/drivers/hwtracing/coresight/coresight-tpda.c +++ b/drivers/hwtracing/coresight/coresight-tpda.c @@ -24,7 +24,7 @@ DEFINE_CORESIGHT_DEVLIST(tpda_devs, "tpda"); static bool coresight_device_is_tpdm(struct coresight_device *csdev) { - return (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) && + return (coresight_is_device_source(csdev)) && (csdev->subtype.source_subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_TPDM); } diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 055ce5cd5c44..c50d128e8d93 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -588,9 +588,14 @@ static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 o } #endif /* CONFIG_64BIT */ +static inline bool coresight_is_device_source(struct coresight_device *csdev) +{ + return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE); +} + static inline bool coresight_is_percpu_source(struct coresight_device *csdev) { - return csdev && (csdev->type == CORESIGHT_DEV_TYPE_SOURCE) && + return csdev && coresight_is_device_source(csdev) && (csdev->subtype.source_subtype == CORESIGHT_DEV_SUBTYPE_SOURCE_PROC); } -- cgit v1.2.3 From ec9903d6cc34e61b77e609a0425e7a0a804fb95a Mon Sep 17 00:00:00 2001 From: Tao Zhang Date: Fri, 13 Dec 2024 18:07:30 +0800 Subject: coresight: Add support for trace filtering by source Some replicators have hard coded filtering of "trace" data, based on the source device. This is different from the trace filtering based on TraceID, available in the standard programmable replicators. e.g., Qualcomm replicators have filtering based on custom trace protocol format and is not programmable. The source device could be connected to the replicator via intermediate components (e.g., a funnel). Thus we need platform information from the firmware tables to decide the source device corresponding to a given output port from the replicator. Given this affects "trace path building" and traversing the path back from the sink to source, add the concept of "filtering by source" to the generic coresight connection. The specified source will be marked like below in the Devicetree. test-replicator { ... ... ... ... out-ports { ... ... ... ... port@0 { reg = <0>; xyz: endpoint { remote-endpoint = <&zyx>; filter-source = <&source_1>; <-- To specify the source to }; be filtered out here. }; port@1 { reg = <1>; abc: endpoint { remote-endpoint = <&cba>; filter-source = <&source_2>; <-- To specify the source to }; be filtered out here. }; }; }; Signed-off-by: Tao Zhang Signed-off-by: Suzuki K Poulose Link: https://lore.kernel.org/r/20241213100731.25914-4-quic_taozha@quicinc.com --- drivers/hwtracing/coresight/coresight-core.c | 113 +++++++++++++++++++---- drivers/hwtracing/coresight/coresight-platform.c | 21 +++++ include/linux/coresight.h | 5 + 3 files changed, 120 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index ea38ecf26fcb..0a9380350fb5 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -75,22 +75,54 @@ struct coresight_device *coresight_get_percpu_sink(int cpu) } EXPORT_SYMBOL_GPL(coresight_get_percpu_sink); +static struct coresight_device *coresight_get_source(struct list_head *path) +{ + struct coresight_device *csdev; + + if (!path) + return NULL; + + csdev = list_first_entry(path, struct coresight_node, link)->csdev; + if (!coresight_is_device_source(csdev)) + return NULL; + + return csdev; +} + +/** + * coresight_blocks_source - checks whether the connection matches the source + * of path if connection is bound to specific source. + * @src: The source device of the trace path + * @conn: The connection of one outport + * + * Return false if the connection doesn't have a source binded or source of the + * path matches the source binds to connection. + */ +static bool coresight_blocks_source(struct coresight_device *src, + struct coresight_connection *conn) +{ + return conn->filter_src_fwnode && (conn->filter_src_dev != src); +} + static struct coresight_connection * -coresight_find_out_connection(struct coresight_device *src_dev, - struct coresight_device *dest_dev) +coresight_find_out_connection(struct coresight_device *csdev, + struct coresight_device *out_dev, + struct coresight_device *trace_src) { int i; struct coresight_connection *conn; - for (i = 0; i < src_dev->pdata->nr_outconns; i++) { - conn = src_dev->pdata->out_conns[i]; - if (conn->dest_dev == dest_dev) + for (i = 0; i < csdev->pdata->nr_outconns; i++) { + conn = csdev->pdata->out_conns[i]; + if (coresight_blocks_source(trace_src, conn)) + continue; + if (conn->dest_dev == out_dev) return conn; } - dev_err(&src_dev->dev, - "couldn't find output connection, src_dev: %s, dest_dev: %s\n", - dev_name(&src_dev->dev), dev_name(&dest_dev->dev)); + dev_err(&csdev->dev, + "couldn't find output connection, csdev: %s, out_dev: %s\n", + dev_name(&csdev->dev), dev_name(&out_dev->dev)); return ERR_PTR(-ENODEV); } @@ -251,7 +283,8 @@ static void coresight_disable_sink(struct coresight_device *csdev) static int coresight_enable_link(struct coresight_device *csdev, struct coresight_device *parent, - struct coresight_device *child) + struct coresight_device *child, + struct coresight_device *source) { int link_subtype; struct coresight_connection *inconn, *outconn; @@ -259,8 +292,8 @@ static int coresight_enable_link(struct coresight_device *csdev, if (!parent || !child) return -EINVAL; - inconn = coresight_find_out_connection(parent, csdev); - outconn = coresight_find_out_connection(csdev, child); + inconn = coresight_find_out_connection(parent, csdev, source); + outconn = coresight_find_out_connection(csdev, child, source); link_subtype = csdev->subtype.link_subtype; if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG && IS_ERR(inconn)) @@ -273,15 +306,16 @@ static int coresight_enable_link(struct coresight_device *csdev, static void coresight_disable_link(struct coresight_device *csdev, struct coresight_device *parent, - struct coresight_device *child) + struct coresight_device *child, + struct coresight_device *source) { struct coresight_connection *inconn, *outconn; if (!parent || !child) return; - inconn = coresight_find_out_connection(parent, csdev); - outconn = coresight_find_out_connection(csdev, child); + inconn = coresight_find_out_connection(parent, csdev, source); + outconn = coresight_find_out_connection(csdev, child, source); link_ops(csdev)->disable(csdev, inconn, outconn); } @@ -375,7 +409,8 @@ static void coresight_disable_path_from(struct list_head *path, case CORESIGHT_DEV_TYPE_LINK: parent = list_prev_entry(nd, link)->csdev; child = list_next_entry(nd, link)->csdev; - coresight_disable_link(csdev, parent, child); + coresight_disable_link(csdev, parent, child, + coresight_get_source(path)); break; default: break; @@ -418,7 +453,9 @@ int coresight_enable_path(struct list_head *path, enum cs_mode mode, u32 type; struct coresight_node *nd; struct coresight_device *csdev, *parent, *child; + struct coresight_device *source; + source = coresight_get_source(path); list_for_each_entry_reverse(nd, path, link) { csdev = nd->csdev; type = csdev->type; @@ -456,7 +493,7 @@ int coresight_enable_path(struct list_head *path, enum cs_mode mode, case CORESIGHT_DEV_TYPE_LINK: parent = list_prev_entry(nd, link)->csdev; child = list_next_entry(nd, link)->csdev; - ret = coresight_enable_link(csdev, parent, child); + ret = coresight_enable_link(csdev, parent, child, source); if (ret) goto err; break; @@ -619,6 +656,7 @@ static void coresight_drop_device(struct coresight_device *csdev) /** * _coresight_build_path - recursively build a path from a @csdev to a sink. * @csdev: The device to start from. + * @source: The trace source device of the path. * @sink: The final sink we want in this path. * @path: The list to add devices to. * @@ -628,6 +666,7 @@ static void coresight_drop_device(struct coresight_device *csdev) * the source is the first device and the sink the last one. */ static int _coresight_build_path(struct coresight_device *csdev, + struct coresight_device *source, struct coresight_device *sink, struct list_head *path) { @@ -641,7 +680,7 @@ static int _coresight_build_path(struct coresight_device *csdev, if (coresight_is_percpu_source(csdev) && coresight_is_percpu_sink(sink) && sink == per_cpu(csdev_sink, source_ops(csdev)->cpu_id(csdev))) { - if (_coresight_build_path(sink, sink, path) == 0) { + if (_coresight_build_path(sink, source, sink, path) == 0) { found = true; goto out; } @@ -652,8 +691,12 @@ static int _coresight_build_path(struct coresight_device *csdev, struct coresight_device *child_dev; child_dev = csdev->pdata->out_conns[i]->dest_dev; + + if (coresight_blocks_source(source, csdev->pdata->out_conns[i])) + continue; + if (child_dev && - _coresight_build_path(child_dev, sink, path) == 0) { + _coresight_build_path(child_dev, source, sink, path) == 0) { found = true; break; } @@ -698,7 +741,7 @@ struct list_head *coresight_build_path(struct coresight_device *source, INIT_LIST_HEAD(path); - rc = _coresight_build_path(source, sink, path); + rc = _coresight_build_path(source, source, sink, path); if (rc) { kfree(path); return ERR_PTR(rc); @@ -927,6 +970,16 @@ static int coresight_orphan_match(struct device *dev, void *data) for (i = 0; i < src_csdev->pdata->nr_outconns; i++) { conn = src_csdev->pdata->out_conns[i]; + /* Fix filter source device before skip the port */ + if (conn->filter_src_fwnode && !conn->filter_src_dev) { + if (dst_csdev && + (conn->filter_src_fwnode == dst_csdev->dev.fwnode) && + !WARN_ON_ONCE(!coresight_is_device_source(dst_csdev))) + conn->filter_src_dev = dst_csdev; + else + still_orphan = true; + } + /* Skip the port if it's already connected. */ if (conn->dest_dev) continue; @@ -977,18 +1030,40 @@ static int coresight_fixup_orphan_conns(struct coresight_device *csdev) csdev, coresight_orphan_match); } +static int coresight_clear_filter_source(struct device *dev, void *data) +{ + int i; + struct coresight_device *source = data; + struct coresight_device *csdev = to_coresight_device(dev); + + for (i = 0; i < csdev->pdata->nr_outconns; ++i) { + if (csdev->pdata->out_conns[i]->filter_src_dev == source) + csdev->pdata->out_conns[i]->filter_src_dev = NULL; + } + return 0; +} + /* coresight_remove_conns - Remove other device's references to this device */ static void coresight_remove_conns(struct coresight_device *csdev) { int i, j; struct coresight_connection *conn; + if (coresight_is_device_source(csdev)) + bus_for_each_dev(&coresight_bustype, NULL, csdev, + coresight_clear_filter_source); + /* * Remove the input connection references from the destination device * for each output connection. */ for (i = 0; i < csdev->pdata->nr_outconns; i++) { conn = csdev->pdata->out_conns[i]; + if (conn->filter_src_fwnode) { + conn->filter_src_dev = NULL; + fwnode_handle_put(conn->filter_src_fwnode); + } + if (!conn->dest_dev) continue; diff --git a/drivers/hwtracing/coresight/coresight-platform.c b/drivers/hwtracing/coresight/coresight-platform.c index 633d96b9577a..8192ba3279f0 100644 --- a/drivers/hwtracing/coresight/coresight-platform.c +++ b/drivers/hwtracing/coresight/coresight-platform.c @@ -243,6 +243,27 @@ static int of_coresight_parse_endpoint(struct device *dev, conn.dest_fwnode = fwnode_handle_get(rdev_fwnode); conn.dest_port = rendpoint.port; + /* + * Get the firmware node of the filter source through the + * reference. This could be used to filter the source in + * building path. + */ + conn.filter_src_fwnode = + fwnode_find_reference(&ep->fwnode, "filter-source", 0); + if (IS_ERR(conn.filter_src_fwnode)) { + conn.filter_src_fwnode = NULL; + } else { + conn.filter_src_dev = + coresight_find_csdev_by_fwnode(conn.filter_src_fwnode); + if (conn.filter_src_dev && + !coresight_is_device_source(conn.filter_src_dev)) { + dev_warn(dev, "port %d: Filter handle is not a trace source : %s\n", + conn.src_port, dev_name(&conn.filter_src_dev->dev)); + conn.filter_src_dev = NULL; + conn.filter_src_fwnode = NULL; + } + } + new_conn = coresight_add_out_conn(dev, pdata, &conn); if (IS_ERR_VALUE(new_conn)) { fwnode_handle_put(conn.dest_fwnode); diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c50d128e8d93..17276965ff1d 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -172,6 +172,9 @@ struct coresight_desc { * @dest_dev: a @coresight_device representation of the component connected to @src_port. NULL until the device is created * @link: Representation of the connection as a sysfs link. + * @filter_src_fwnode: filter source component's fwnode handle. + * @filter_src_dev: a @coresight_device representation of the component that + needs to be filtered. * * The full connection structure looks like this, where in_conns store * references to same connection as the source device's out_conns. @@ -200,6 +203,8 @@ struct coresight_connection { struct coresight_device *dest_dev; struct coresight_sysfs_link *link; struct coresight_device *src_dev; + struct fwnode_handle *filter_src_fwnode; + struct coresight_device *filter_src_dev; int src_refcnt; int dest_refcnt; }; -- cgit v1.2.3 From 70a667d70cce338ab8552dd762ae114a5ab96500 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 13 Dec 2024 12:11:22 +0200 Subject: ASoC: SOF: Add support for pause supported tokens from topology New tokens are added to topology: 1202: SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED 1203: SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED The new tokens are used to advertise support for PAUSE/RESUME operation on a PCM device depending on firmware product, use case, pipeline topology. The snd_sof_pcm_stream.pause_supported is updated to reflect the advertised value for the PCM device. If the token does not exist then the pause_supported is set to false. Note: it is up to the platform code to use this flag to decide to advertise the PAUSE support for user space or not. Signed-off-by: Peter Ujfalusi Reviewed-by: Pierre-Louis Bossart Reviewed-by: Bard Liao Reviewed-by: Kai Vehmanen Reviewed-by: Liam Girdwood Link: https://patch.msgid.link/20241213101123.27318-2-peter.ujfalusi@linux.intel.com Signed-off-by: Mark Brown --- include/uapi/sound/sof/tokens.h | 2 ++ sound/soc/sof/sof-audio.h | 1 + sound/soc/sof/topology.c | 4 ++++ 3 files changed, 7 insertions(+) (limited to 'include') diff --git a/include/uapi/sound/sof/tokens.h b/include/uapi/sound/sof/tokens.h index 0a246bc218d3..c28c766270de 100644 --- a/include/uapi/sound/sof/tokens.h +++ b/include/uapi/sound/sof/tokens.h @@ -153,6 +153,8 @@ /* Stream */ #define SOF_TKN_STREAM_PLAYBACK_COMPATIBLE_D0I3 1200 #define SOF_TKN_STREAM_CAPTURE_COMPATIBLE_D0I3 1201 +#define SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED 1202 +#define SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED 1203 /* Led control for mute switches */ #define SOF_TKN_MUTE_LED_USE 1300 diff --git a/sound/soc/sof/sof-audio.h b/sound/soc/sof/sof-audio.h index 01b819dd8498..62f3c11a9216 100644 --- a/sound/soc/sof/sof-audio.h +++ b/sound/soc/sof/sof-audio.h @@ -332,6 +332,7 @@ struct snd_sof_pcm_stream { struct work_struct period_elapsed_work; struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */ bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */ + bool pause_supported; /* PCM device supports PAUSE operation */ unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */ /* * flag to indicate that the DSP pipelines should be kept diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index b3fca5fd87d6..688cc7ac1714 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -407,6 +407,10 @@ static const struct sof_topology_token stream_tokens[] = { offsetof(struct snd_sof_pcm, stream[0].d0i3_compatible)}, {SOF_TKN_STREAM_CAPTURE_COMPATIBLE_D0I3, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, offsetof(struct snd_sof_pcm, stream[1].d0i3_compatible)}, + {SOF_TKN_STREAM_PLAYBACK_PAUSE_SUPPORTED, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, + offsetof(struct snd_sof_pcm, stream[0].pause_supported)}, + {SOF_TKN_STREAM_CAPTURE_PAUSE_SUPPORTED, SND_SOC_TPLG_TUPLE_TYPE_BOOL, get_token_u16, + offsetof(struct snd_sof_pcm, stream[1].pause_supported)}, }; /* Leds */ -- cgit v1.2.3 From d54a3fc6bf3db0db0e16cfdf7f48a8bbb803f6b0 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 12 Dec 2024 14:37:14 +0000 Subject: firmware: cs_dsp: Add mock regmap for KUnit testing Add a mock regmap implementation to act as a simulated DSP for KUnit testing. This is built as a utility module so that it could be used by clients of cs_dsp to create a mock "DSP" for their own testing. cs_dsp interacts with the DSP only through registers. Most of the register space of the DSP is RAM. ADSP cores have a small set of control registers. HALO Core DSPs have a much larger set of control registers but only a small subset are used. Most writes are "blind" in the sense that cs_dsp does not expect to receive any sort of response from the DSP. So there isn't any need to emulate a "DSP", only a set of registers that can be written and read back. The idea of the mock regmap is to use the cache to accumulate writes which can then be tested against the values that are expected to be in the registers. Stray writes can be detected by dropping the cache entries for all addresses that should have been written and then issuing a regcache_sync(). If this causes bus writes it means there were writes to unexpected registers. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20241212143725.1381013-2-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- MAINTAINERS | 4 +- drivers/firmware/cirrus/Kconfig | 6 + drivers/firmware/cirrus/Makefile | 2 + drivers/firmware/cirrus/test/Makefile | 8 + drivers/firmware/cirrus/test/cs_dsp_mock_regmap.c | 367 ++++++++++++++++++++++ drivers/firmware/cirrus/test/cs_dsp_mock_utils.c | 13 + include/linux/firmware/cirrus/cs_dsp_test_utils.h | 46 +++ 7 files changed, 444 insertions(+), 2 deletions(-) create mode 100644 drivers/firmware/cirrus/test/Makefile create mode 100644 drivers/firmware/cirrus/test/cs_dsp_mock_regmap.c create mode 100644 drivers/firmware/cirrus/test/cs_dsp_mock_utils.c create mode 100644 include/linux/firmware/cirrus/cs_dsp_test_utils.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 17daa9ee9384..91bcdf21e7ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5501,8 +5501,8 @@ L: patches@opensource.cirrus.com S: Supported W: https://github.com/CirrusLogic/linux-drivers/wiki T: git https://github.com/CirrusLogic/linux-drivers.git -F: drivers/firmware/cirrus/* -F: include/linux/firmware/cirrus/* +F: drivers/firmware/cirrus/ +F: include/linux/firmware/cirrus/ CIRRUS LOGIC EP93XX ETHERNET DRIVER M: Hartley Sweeten diff --git a/drivers/firmware/cirrus/Kconfig b/drivers/firmware/cirrus/Kconfig index 3ccbe14e4b0c..35de3e490d98 100644 --- a/drivers/firmware/cirrus/Kconfig +++ b/drivers/firmware/cirrus/Kconfig @@ -3,3 +3,9 @@ config FW_CS_DSP tristate default n + +config FW_CS_DSP_KUNIT_TEST_UTILS + tristate + depends on KUNIT + select REGMAP + select FW_CS_DSP diff --git a/drivers/firmware/cirrus/Makefile b/drivers/firmware/cirrus/Makefile index b91318ca0ff4..b32dfa869491 100644 --- a/drivers/firmware/cirrus/Makefile +++ b/drivers/firmware/cirrus/Makefile @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 # obj-$(CONFIG_FW_CS_DSP) += cs_dsp.o + +obj-y += test/ diff --git a/drivers/firmware/cirrus/test/Makefile b/drivers/firmware/cirrus/test/Makefile new file mode 100644 index 000000000000..373d8844c085 --- /dev/null +++ b/drivers/firmware/cirrus/test/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +# + +cs_dsp_test_utils-objs := \ + cs_dsp_mock_regmap.o \ + cs_dsp_mock_utils.o + +obj-$(CONFIG_FW_CS_DSP_KUNIT_TEST_UTILS) += cs_dsp_test_utils.o diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_regmap.c b/drivers/firmware/cirrus/test/cs_dsp_mock_regmap.c new file mode 100644 index 000000000000..fb8e4a5d189a --- /dev/null +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_regmap.c @@ -0,0 +1,367 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Mock regmap for cs_dsp KUnit tests. +// +// Copyright (C) 2024 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include +#include +#include +#include +#include + +static int cs_dsp_mock_regmap_read(void *context, const void *reg_buf, + const size_t reg_size, void *val_buf, + size_t val_size) +{ + struct cs_dsp_test *priv = context; + + /* Should never get here because the regmap is cache-only */ + KUNIT_FAIL(priv->test, "Unexpected bus read @%#x", *(u32 *)reg_buf); + + return -EIO; +} + +static int cs_dsp_mock_regmap_gather_write(void *context, + const void *reg_buf, size_t reg_size, + const void *val_buf, size_t val_size) +{ + struct cs_dsp_test *priv = context; + + priv->saw_bus_write = true; + + /* Should never get here because the regmap is cache-only */ + KUNIT_FAIL(priv->test, "Unexpected bus gather_write @%#x", *(u32 *)reg_buf); + + return -EIO; +} + +static int cs_dsp_mock_regmap_write(void *context, const void *val_buf, size_t val_size) +{ + struct cs_dsp_test *priv = context; + + priv->saw_bus_write = true; + + /* Should never get here because the regmap is cache-only */ + KUNIT_FAIL(priv->test, "Unexpected bus write @%#x", *(u32 *)val_buf); + + return -EIO; +} + +static const struct regmap_bus cs_dsp_mock_regmap_bus = { + .read = cs_dsp_mock_regmap_read, + .write = cs_dsp_mock_regmap_write, + .gather_write = cs_dsp_mock_regmap_gather_write, + .reg_format_endian_default = REGMAP_ENDIAN_LITTLE, + .val_format_endian_default = REGMAP_ENDIAN_LITTLE, +}; + +static const struct reg_default adsp2_32bit_register_defaults[] = { + { 0xffe00, 0x0000 }, /* CONTROL */ + { 0xffe02, 0x0000 }, /* CLOCKING */ + { 0xffe04, 0x0001 }, /* STATUS1: RAM_RDY=1 */ + { 0xffe30, 0x0000 }, /* WDMW_CONFIG_1 */ + { 0xffe32, 0x0000 }, /* WDMA_CONFIG_2 */ + { 0xffe34, 0x0000 }, /* RDMA_CONFIG_1 */ + { 0xffe40, 0x0000 }, /* SCRATCH_0_1 */ + { 0xffe42, 0x0000 }, /* SCRATCH_2_3 */ +}; + +static const struct regmap_range adsp2_32bit_registers[] = { + regmap_reg_range(0x80000, 0x88ffe), /* PM */ + regmap_reg_range(0xa0000, 0xa9ffe), /* XM */ + regmap_reg_range(0xc0000, 0xc1ffe), /* YM */ + regmap_reg_range(0xe0000, 0xe1ffe), /* ZM */ + regmap_reg_range(0xffe00, 0xffe7c), /* CORE CTRL */ +}; + +const unsigned int cs_dsp_mock_adsp2_32bit_sysbase = 0xffe00; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_adsp2_32bit_sysbase, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static const struct regmap_access_table adsp2_32bit_rw = { + .yes_ranges = adsp2_32bit_registers, + .n_yes_ranges = ARRAY_SIZE(adsp2_32bit_registers), +}; + +static const struct regmap_config cs_dsp_mock_regmap_adsp2_32bit = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 2, + .reg_format_endian = REGMAP_ENDIAN_LITTLE, + .val_format_endian = REGMAP_ENDIAN_BIG, + .wr_table = &adsp2_32bit_rw, + .rd_table = &adsp2_32bit_rw, + .max_register = 0xffe7c, + .reg_defaults = adsp2_32bit_register_defaults, + .num_reg_defaults = ARRAY_SIZE(adsp2_32bit_register_defaults), + .cache_type = REGCACHE_MAPLE, +}; + +static const struct reg_default adsp2_16bit_register_defaults[] = { + { 0x1100, 0x0000 }, /* CONTROL */ + { 0x1101, 0x0000 }, /* CLOCKING */ + { 0x1104, 0x0001 }, /* STATUS1: RAM_RDY=1 */ + { 0x1130, 0x0000 }, /* WDMW_CONFIG_1 */ + { 0x1131, 0x0000 }, /* WDMA_CONFIG_2 */ + { 0x1134, 0x0000 }, /* RDMA_CONFIG_1 */ + { 0x1140, 0x0000 }, /* SCRATCH_0 */ + { 0x1141, 0x0000 }, /* SCRATCH_1 */ + { 0x1142, 0x0000 }, /* SCRATCH_2 */ + { 0x1143, 0x0000 }, /* SCRATCH_3 */ +}; + +static const struct regmap_range adsp2_16bit_registers[] = { + regmap_reg_range(0x001100, 0x001143), /* CORE CTRL */ + regmap_reg_range(0x100000, 0x105fff), /* PM */ + regmap_reg_range(0x180000, 0x1807ff), /* ZM */ + regmap_reg_range(0x190000, 0x1947ff), /* XM */ + regmap_reg_range(0x1a8000, 0x1a97ff), /* YM */ +}; + +const unsigned int cs_dsp_mock_adsp2_16bit_sysbase = 0x001100; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_adsp2_16bit_sysbase, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static const struct regmap_access_table adsp2_16bit_rw = { + .yes_ranges = adsp2_16bit_registers, + .n_yes_ranges = ARRAY_SIZE(adsp2_16bit_registers), +}; + +static const struct regmap_config cs_dsp_mock_regmap_adsp2_16bit = { + .reg_bits = 32, + .val_bits = 16, + .reg_stride = 1, + .reg_format_endian = REGMAP_ENDIAN_LITTLE, + .val_format_endian = REGMAP_ENDIAN_BIG, + .wr_table = &adsp2_16bit_rw, + .rd_table = &adsp2_16bit_rw, + .max_register = 0x1a97ff, + .reg_defaults = adsp2_16bit_register_defaults, + .num_reg_defaults = ARRAY_SIZE(adsp2_16bit_register_defaults), + .cache_type = REGCACHE_MAPLE, +}; + +static const struct reg_default halo_register_defaults[] = { + /* CORE */ + { 0x2b80010, 0 }, /* HALO_CORE_SOFT_RESET */ + { 0x2b805c0, 0 }, /* HALO_SCRATCH1 */ + { 0x2b805c8, 0 }, /* HALO_SCRATCH2 */ + { 0x2b805d0, 0 }, /* HALO_SCRATCH3 */ + { 0x2b805c8, 0 }, /* HALO_SCRATCH4 */ + { 0x2bc1000, 0 }, /* HALO_CCM_CORE_CONTROL */ + { 0x2bc7000, 0 }, /* HALO_WDT_CONTROL */ + + /* SYSINFO */ + { 0x25e2040, 0 }, /* HALO_AHBM_WINDOW_DEBUG_0 */ + { 0x25e2044, 0 }, /* HALO_AHBM_WINDOW_DEBUG_1 */ +}; + +static const struct regmap_range halo_readable_registers[] = { + regmap_reg_range(0x2000000, 0x2005fff), /* XM_PACKED */ + regmap_reg_range(0x25e0000, 0x25e004f), /* SYSINFO */ + regmap_reg_range(0x25e2000, 0x25e2047), /* SYSINFO */ + regmap_reg_range(0x2800000, 0x2807fff), /* XM */ + regmap_reg_range(0x2b80000, 0x2bc700b), /* CORE CTRL */ + regmap_reg_range(0x2c00000, 0x2c047f3), /* YM_PACKED */ + regmap_reg_range(0x3400000, 0x3405ff7), /* YM */ + regmap_reg_range(0x3800000, 0x3804fff), /* PM_PACKED */ +}; + +static const struct regmap_range halo_writeable_registers[] = { + regmap_reg_range(0x2000000, 0x2005fff), /* XM_PACKED */ + regmap_reg_range(0x2800000, 0x2807fff), /* XM */ + regmap_reg_range(0x2b80000, 0x2bc700b), /* CORE CTRL */ + regmap_reg_range(0x2c00000, 0x2c047f3), /* YM_PACKED */ + regmap_reg_range(0x3400000, 0x3405ff7), /* YM */ + regmap_reg_range(0x3800000, 0x3804fff), /* PM_PACKED */ +}; + +const unsigned int cs_dsp_mock_halo_core_base = 0x2b80000; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_halo_core_base, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +const unsigned int cs_dsp_mock_halo_sysinfo_base = 0x25e0000; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_halo_sysinfo_base, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static const struct regmap_access_table halo_readable = { + .yes_ranges = halo_readable_registers, + .n_yes_ranges = ARRAY_SIZE(halo_readable_registers), +}; + +static const struct regmap_access_table halo_writeable = { + .yes_ranges = halo_writeable_registers, + .n_yes_ranges = ARRAY_SIZE(halo_writeable_registers), +}; + +static const struct regmap_config cs_dsp_mock_regmap_halo = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .reg_format_endian = REGMAP_ENDIAN_LITTLE, + .val_format_endian = REGMAP_ENDIAN_BIG, + .wr_table = &halo_writeable, + .rd_table = &halo_readable, + .max_register = 0x3804ffc, + .reg_defaults = halo_register_defaults, + .num_reg_defaults = ARRAY_SIZE(halo_register_defaults), + .cache_type = REGCACHE_MAPLE, +}; + +/** + * cs_dsp_mock_regmap_drop_range() - drop a range of registers from the cache. + * + * @priv: Pointer to struct cs_dsp_test object. + * @first_reg: Address of first register to drop. + * @last_reg: Address of last register to drop. + */ +void cs_dsp_mock_regmap_drop_range(struct cs_dsp_test *priv, + unsigned int first_reg, unsigned int last_reg) +{ + regcache_drop_region(priv->dsp->regmap, first_reg, last_reg); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_regmap_drop_range, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_regmap_drop_regs() - drop a number of registers from the cache. + * + * @priv: Pointer to struct cs_dsp_test object. + * @first_reg: Address of first register to drop. + * @num_regs: Number of registers to drop. + */ +void cs_dsp_mock_regmap_drop_regs(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_regs) +{ + int stride = regmap_get_reg_stride(priv->dsp->regmap); + unsigned int last = first_reg + (stride * (num_regs - 1)); + + cs_dsp_mock_regmap_drop_range(priv, first_reg, last); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_regmap_drop_regs, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_regmap_drop_bytes() - drop a number of bytes from the cache. + * + * @priv: Pointer to struct cs_dsp_test object. + * @first_reg: Address of first register to drop. + * @num_bytes: Number of bytes to drop from the cache. Will be rounded + * down to a whole number of registers. Trailing bytes that + * are not a multiple of the register size will not be dropped. + * (This is intended to help detect math errors in test code.) + */ +void cs_dsp_mock_regmap_drop_bytes(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_bytes) +{ + size_t num_regs = num_bytes / regmap_get_val_bytes(priv->dsp->regmap); + + cs_dsp_mock_regmap_drop_regs(priv, first_reg, num_regs); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_regmap_drop_bytes, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_regmap_drop_system_regs() - Drop DSP system registers from the cache. + * + * @priv: Pointer to struct cs_dsp_test object. + * + * Drops all DSP system registers from the regmap cache. + */ +void cs_dsp_mock_regmap_drop_system_regs(struct cs_dsp_test *priv) +{ + switch (priv->dsp->type) { + case WMFW_ADSP2: + if (priv->dsp->base) { + regcache_drop_region(priv->dsp->regmap, + priv->dsp->base, + priv->dsp->base + 0x7c); + } + return; + case WMFW_HALO: + if (priv->dsp->base) { + regcache_drop_region(priv->dsp->regmap, + priv->dsp->base, + priv->dsp->base + 0x47000); + } + + /* sysinfo registers are read-only so don't drop them */ + return; + default: + return; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_regmap_drop_system_regs, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_regmap_is_dirty() - Test for dirty registers in the cache. + * + * @priv: Pointer to struct cs_dsp_test object. + * @drop_system_regs: If true the DSP system regs will be dropped from + * the cache before checking for dirty. + * + * All registers that are expected to be written must have been dropped + * from the cache (DSP system registers can be dropped by passing + * drop_system_regs == true). If any unexpected registers were written + * there will still be dirty entries in the cache and a cache sync will + * cause a write. + * + * Returns: true if there were dirty entries, false if not. + */ +bool cs_dsp_mock_regmap_is_dirty(struct cs_dsp_test *priv, bool drop_system_regs) +{ + if (drop_system_regs) + cs_dsp_mock_regmap_drop_system_regs(priv); + + priv->saw_bus_write = false; + regcache_cache_only(priv->dsp->regmap, false); + regcache_sync(priv->dsp->regmap); + regcache_cache_only(priv->dsp->regmap, true); + + return priv->saw_bus_write; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_regmap_is_dirty, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_regmap_init() - Initialize a mock regmap. + * + * @priv: Pointer to struct cs_dsp_test object. This must have a + * valid pointer to a struct cs_dsp in which the type and + * rev fields are set to the type of DSP to be simulated. + * + * On success the priv->dsp->regmap will point to the created + * regmap instance. + * + * Return: zero on success, else negative error code. + */ +int cs_dsp_mock_regmap_init(struct cs_dsp_test *priv) +{ + const struct regmap_config *config; + int ret; + + switch (priv->dsp->type) { + case WMFW_HALO: + config = &cs_dsp_mock_regmap_halo; + break; + case WMFW_ADSP2: + if (priv->dsp->rev == 0) + config = &cs_dsp_mock_regmap_adsp2_16bit; + else + config = &cs_dsp_mock_regmap_adsp2_32bit; + break; + default: + config = NULL; + break; + } + + priv->dsp->regmap = devm_regmap_init(priv->dsp->dev, + &cs_dsp_mock_regmap_bus, + priv, + config); + if (IS_ERR(priv->dsp->regmap)) { + ret = PTR_ERR(priv->dsp->regmap); + kunit_err(priv->test, "Failed to allocate register map: %d\n", ret); + return ret; + } + + /* Put regmap in cache-only so it accumulates the writes done by cs_dsp */ + regcache_cache_only(priv->dsp->regmap, true); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_regmap_init, "FW_CS_DSP_KUNIT_TEST_UTILS"); diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_utils.c b/drivers/firmware/cirrus/test/cs_dsp_mock_utils.c new file mode 100644 index 000000000000..cbd0bf72b7de --- /dev/null +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_utils.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Utility module for cs_dsp KUnit testing. +// +// Copyright (C) 2024 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include + +MODULE_DESCRIPTION("Utilities for Cirrus Logic DSP driver testing"); +MODULE_AUTHOR("Richard Fitzgerald "); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS("FW_CS_DSP"); diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h new file mode 100644 index 000000000000..ac6b03f4c084 --- /dev/null +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Support utilities for cs_dsp testing. + * + * Copyright (C) 2024 Cirrus Logic, Inc. and + * Cirrus Logic International Semiconductor Ltd. + */ + +#include +#include + +struct kunit; +struct cs_dsp_test; +struct cs_dsp_test_local; + +/** + * struct cs_dsp_test - base class for test utilities + * + * @test: Pointer to struct kunit instance. + * @dsp: Pointer to struct cs_dsp instance. + * @local: Private data for each test suite. + */ +struct cs_dsp_test { + struct kunit *test; + struct cs_dsp *dsp; + + struct cs_dsp_test_local *local; + + /* Following members are private */ + bool saw_bus_write; +}; + +extern const unsigned int cs_dsp_mock_adsp2_32bit_sysbase; +extern const unsigned int cs_dsp_mock_adsp2_16bit_sysbase; +extern const unsigned int cs_dsp_mock_halo_core_base; +extern const unsigned int cs_dsp_mock_halo_sysinfo_base; + +int cs_dsp_mock_regmap_init(struct cs_dsp_test *priv); +void cs_dsp_mock_regmap_drop_range(struct cs_dsp_test *priv, + unsigned int first_reg, unsigned int last_reg); +void cs_dsp_mock_regmap_drop_regs(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_regs); +void cs_dsp_mock_regmap_drop_bytes(struct cs_dsp_test *priv, + unsigned int first_reg, size_t num_bytes); +void cs_dsp_mock_regmap_drop_system_regs(struct cs_dsp_test *priv); +bool cs_dsp_mock_regmap_is_dirty(struct cs_dsp_test *priv, bool drop_system_regs); -- cgit v1.2.3 From 41e78c0f44f97c958afcda3f82b23f4f4a05b968 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 12 Dec 2024 14:37:15 +0000 Subject: firmware: cs_dsp: Add mock DSP memory map for KUnit testing Add helper functions to implement an emulation of the DSP memory map. There are three main groups of functionality: 1. Define a mock cs_dsp_region table. 2. Calculate the addresses of memory and algorithms from the firmware header in XM. 3. Build a mock XM header in emulated XM. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20241212143725.1381013-3-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/test/Makefile | 1 + .../firmware/cirrus/test/cs_dsp_mock_mem_maps.c | 751 +++++++++++++++++++++ include/linux/firmware/cirrus/cs_dsp_test_utils.h | 63 ++ 3 files changed, 815 insertions(+) create mode 100644 drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c (limited to 'include') diff --git a/drivers/firmware/cirrus/test/Makefile b/drivers/firmware/cirrus/test/Makefile index 373d8844c085..8d4f2eb7e5aa 100644 --- a/drivers/firmware/cirrus/test/Makefile +++ b/drivers/firmware/cirrus/test/Makefile @@ -2,6 +2,7 @@ # cs_dsp_test_utils-objs := \ + cs_dsp_mock_mem_maps.o \ cs_dsp_mock_regmap.o \ cs_dsp_mock_utils.o diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c new file mode 100644 index 000000000000..ae5d57bdcc2f --- /dev/null +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_mem_maps.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Mock DSP memory maps for cs_dsp KUnit tests. +// +// Copyright (C) 2024 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include +#include +#include +#include +#include + +const struct cs_dsp_region cs_dsp_mock_halo_dsp1_regions[] = { + { .type = WMFW_HALO_PM_PACKED, .base = 0x3800000 }, + { .type = WMFW_HALO_XM_PACKED, .base = 0x2000000 }, + { .type = WMFW_HALO_YM_PACKED, .base = 0x2C00000 }, + { .type = WMFW_ADSP2_XM, .base = 0x2800000 }, + { .type = WMFW_ADSP2_YM, .base = 0x3400000 }, +}; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_halo_dsp1_regions, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/* List of sizes in bytes, for each entry above */ +const unsigned int cs_dsp_mock_halo_dsp1_region_sizes[] = { + 0x5000, /* PM_PACKED */ + 0x6000, /* XM_PACKED */ + 0x47F4, /* YM_PACKED */ + 0x8000, /* XM_UNPACKED_24 */ + 0x5FF8, /* YM_UNPACKED_24 */ + + 0 /* terminator */ +}; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_halo_dsp1_region_sizes, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +const struct cs_dsp_region cs_dsp_mock_adsp2_32bit_dsp1_regions[] = { + { .type = WMFW_ADSP2_PM, .base = 0x080000 }, + { .type = WMFW_ADSP2_XM, .base = 0x0a0000 }, + { .type = WMFW_ADSP2_YM, .base = 0x0c0000 }, + { .type = WMFW_ADSP2_ZM, .base = 0x0e0000 }, +}; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_adsp2_32bit_dsp1_regions, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/* List of sizes in bytes, for each entry above */ +const unsigned int cs_dsp_mock_adsp2_32bit_dsp1_region_sizes[] = { + 0x9000, /* PM */ + 0xa000, /* ZM */ + 0x2000, /* XM */ + 0x2000, /* YM */ + + 0 /* terminator */ +}; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_adsp2_32bit_dsp1_region_sizes, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +const struct cs_dsp_region cs_dsp_mock_adsp2_16bit_dsp1_regions[] = { + { .type = WMFW_ADSP2_PM, .base = 0x100000 }, + { .type = WMFW_ADSP2_ZM, .base = 0x180000 }, + { .type = WMFW_ADSP2_XM, .base = 0x190000 }, + { .type = WMFW_ADSP2_YM, .base = 0x1a8000 }, +}; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_adsp2_16bit_dsp1_regions, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/* List of sizes in bytes, for each entry above */ +const unsigned int cs_dsp_mock_adsp2_16bit_dsp1_region_sizes[] = { + 0x6000, /* PM */ + 0x800, /* ZM */ + 0x800, /* XM */ + 0x800, /* YM */ + + 0 /* terminator */ +}; +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_adsp2_16bit_dsp1_region_sizes, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +int cs_dsp_mock_count_regions(const unsigned int *region_sizes) +{ + int i; + + for (i = 0; region_sizes[i]; ++i) + ; + + return i; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_count_regions, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_size_of_region() - Return size of given memory region. + * + * @dsp: Pointer to struct cs_dsp. + * @mem_type: Memory region type. + * + * Return: Size of region in bytes. + */ +unsigned int cs_dsp_mock_size_of_region(const struct cs_dsp *dsp, int mem_type) +{ + const unsigned int *sizes; + int i; + + if (dsp->mem == cs_dsp_mock_halo_dsp1_regions) + sizes = cs_dsp_mock_halo_dsp1_region_sizes; + else if (dsp->mem == cs_dsp_mock_adsp2_32bit_dsp1_regions) + sizes = cs_dsp_mock_adsp2_32bit_dsp1_region_sizes; + else if (dsp->mem == cs_dsp_mock_adsp2_16bit_dsp1_regions) + sizes = cs_dsp_mock_adsp2_16bit_dsp1_region_sizes; + else + return 0; + + for (i = 0; i < dsp->num_mems; ++i) { + if (dsp->mem[i].type == mem_type) + return sizes[i]; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_size_of_region, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_base_addr_for_mem() - Base register address for memory region. + * + * @priv: Pointer to struct cs_dsp_test. + * @mem_type: Memory region type. + * + * Return: Base register address of region. + */ +unsigned int cs_dsp_mock_base_addr_for_mem(struct cs_dsp_test *priv, int mem_type) +{ + int num_mems = priv->dsp->num_mems; + const struct cs_dsp_region *region = priv->dsp->mem; + int i; + + for (i = 0; i < num_mems; ++i) { + if (region[i].type == mem_type) + return region[i].base; + } + + KUNIT_FAIL(priv->test, "Unexpected region %d\n", mem_type); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_base_addr_for_mem, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_reg_addr_inc_per_unpacked_word() - Unpacked register address increment per DSP word. + * + * @priv: Pointer to struct cs_dsp_test. + * + * Return: Amount by which register address increments to move to the next + * DSP word in unpacked XM/YM/ZM. + */ +unsigned int cs_dsp_mock_reg_addr_inc_per_unpacked_word(struct cs_dsp_test *priv) +{ + switch (priv->dsp->type) { + case WMFW_ADSP2: + return 2; /* two 16-bit register indexes per XM/YM/ZM word */ + case WMFW_HALO: + return 4; /* one byte-addressed 32-bit register per XM/YM/ZM word */ + default: + KUNIT_FAIL(priv->test, "Unexpected DSP type\n"); + return -1; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_reg_addr_inc_per_unpacked_word, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_reg_block_length_bytes() - Number of bytes in an access block. + * + * @priv: Pointer to struct cs_dsp_test. + * @mem_type: Memory region type. + * + * Return: Total number of bytes in a group of registers forming the + * smallest bus access size (including any padding bits). For unpacked + * memory this is the number of registers containing one DSP word. + * For packed memory this is the number of registers in one packed + * access block. + */ +unsigned int cs_dsp_mock_reg_block_length_bytes(struct cs_dsp_test *priv, int mem_type) +{ + switch (priv->dsp->type) { + case WMFW_ADSP2: + switch (mem_type) { + case WMFW_ADSP2_PM: + return 3 * regmap_get_val_bytes(priv->dsp->regmap); + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + case WMFW_ADSP2_ZM: + return sizeof(u32); + default: + break; + } + break; + case WMFW_HALO: + switch (mem_type) { + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + return sizeof(u32); + case WMFW_HALO_PM_PACKED: + return 5 * sizeof(u32); + case WMFW_HALO_XM_PACKED: + case WMFW_HALO_YM_PACKED: + return 3 * sizeof(u32); + default: + break; + } + break; + default: + KUNIT_FAIL(priv->test, "Unexpected DSP type\n"); + return 0; + } + + KUNIT_FAIL(priv->test, "Unexpected mem type\n"); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_reg_block_length_bytes, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_reg_block_length_registers() - Number of registers in an access block. + * + * @priv: Pointer to struct cs_dsp_test. + * @mem_type: Memory region type. + * + * Return: Total number of register forming the smallest bus access size. + * For unpacked memory this is the number of registers containing one + * DSP word. For packed memory this is the number of registers in one + * packed access block. + */ +unsigned int cs_dsp_mock_reg_block_length_registers(struct cs_dsp_test *priv, int mem_type) +{ + return cs_dsp_mock_reg_block_length_bytes(priv, mem_type) / + regmap_get_val_bytes(priv->dsp->regmap); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_reg_block_length_registers, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_reg_block_length_dsp_words() - Number of dsp_words in an access block. + * + * @priv: Pointer to struct cs_dsp_test. + * @mem_type: Memory region type. + * + * Return: Total number of DSP words in a group of registers forming the + * smallest bus access size. + */ +unsigned int cs_dsp_mock_reg_block_length_dsp_words(struct cs_dsp_test *priv, int mem_type) +{ + switch (priv->dsp->type) { + case WMFW_ADSP2: + switch (mem_type) { + case WMFW_ADSP2_PM: + return regmap_get_val_bytes(priv->dsp->regmap) / 2; + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + case WMFW_ADSP2_ZM: + return 1; + default: + break; + } + break; + case WMFW_HALO: + switch (mem_type) { + case WMFW_ADSP2_XM: + case WMFW_ADSP2_YM: + return 1; + case WMFW_HALO_PM_PACKED: + case WMFW_HALO_XM_PACKED: + case WMFW_HALO_YM_PACKED: + return 4; + default: + break; + } + break; + default: + KUNIT_FAIL(priv->test, "Unexpected DSP type\n"); + return 0; + } + + KUNIT_FAIL(priv->test, "Unexpected mem type\n"); + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_reg_block_length_dsp_words, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_has_zm() - DSP has ZM + * + * @priv: Pointer to struct cs_dsp_test. + * + * Return: True if DSP has ZM. + */ +bool cs_dsp_mock_has_zm(struct cs_dsp_test *priv) +{ + switch (priv->dsp->type) { + case WMFW_ADSP2: + return true; + default: + return false; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_has_zm, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_packed_to_unpacked_mem_type() - Unpacked region that is + * the same memory as a packed region. + * + * @packed_mem_type: Type of packed memory region. + * + * Return: unpacked type that is the same memory as packed_mem_type. + */ +int cs_dsp_mock_packed_to_unpacked_mem_type(int packed_mem_type) +{ + switch (packed_mem_type) { + case WMFW_HALO_XM_PACKED: + return WMFW_ADSP2_XM; + case WMFW_HALO_YM_PACKED: + return WMFW_ADSP2_YM; + default: + return -1; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_packed_to_unpacked_mem_type, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_num_dsp_words_to_num_packed_regs() - Number of DSP words + * to number of packed registers. + * + * @num_dsp_words: Number of DSP words. + * + * Convert number of DSP words to number of packed registers rounded + * down to the nearest register. + * + * Return: Number of packed registers. + */ +unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_words) +{ + /* There are 3 registers for every 4 packed words */ + return (num_dsp_words * 3) / 4; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_num_dsp_words_to_num_packed_regs, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static const struct wmfw_halo_id_hdr cs_dsp_mock_halo_xm_hdr = { + .fw = { + .core_id = cpu_to_be32(WMFW_HALO << 16), + .block_rev = cpu_to_be32(3 << 16), + .vendor_id = cpu_to_be32(0x2), + .id = cpu_to_be32(0xabcdef), + .ver = cpu_to_be32(0x090101), + }, + + /* + * Leave enough space for this header and 40 algorithm descriptors. + * base and size are counted in DSP words. + */ + .xm_base = cpu_to_be32(((sizeof(struct wmfw_halo_id_hdr) + + (40 * sizeof(struct wmfw_halo_alg_hdr))) + / 4) * 3), + .xm_size = cpu_to_be32(0x20), + + /* Allocate a dummy word of YM */ + .ym_base = cpu_to_be32(0), + .ym_size = cpu_to_be32(1), + + .n_algs = 0, +}; + +static const struct wmfw_adsp2_id_hdr cs_dsp_mock_adsp2_xm_hdr = { + .fw = { + .core_id = cpu_to_be32(WMFW_ADSP2 << 16), + .core_rev = cpu_to_be32(2 << 16), + .id = cpu_to_be32(0xabcdef), + .ver = cpu_to_be32(0x090101), + }, + + /* + * Leave enough space for this header and 40 algorithm descriptors. + * base and size are counted in DSP words. + */ + .xm = cpu_to_be32(((sizeof(struct wmfw_adsp2_id_hdr) + + (40 * sizeof(struct wmfw_adsp2_alg_hdr))) + / 4) * 3), + + .ym = cpu_to_be32(0), + .zm = cpu_to_be32(0), + + .n_algs = 0, +}; + +/** + * cs_dsp_mock_xm_header_get_alg_base_in_words() - Algorithm base offset in DSP words. + * + * @priv: Pointer to struct cs_dsp_test. + * @alg_id: Algorithm ID. + * @mem_type: Memory region type. + * + * Lookup an algorithm in the XM header and return the base offset in + * DSP words of the algorithm data in the requested memory region. + * + * Return: Offset in DSP words. + */ +unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, + unsigned int alg_id, + int mem_type) +{ + unsigned int xm = cs_dsp_mock_base_addr_for_mem(priv, WMFW_ADSP2_XM); + union { + struct wmfw_adsp2_alg_hdr adsp2; + struct wmfw_halo_alg_hdr halo; + } alg; + unsigned int alg_hdr_addr; + unsigned int val, xm_base = 0, ym_base = 0, zm_base = 0; + int ret; + + switch (priv->dsp->type) { + case WMFW_ADSP2: + alg_hdr_addr = xm + (sizeof(struct wmfw_adsp2_id_hdr) / 2); + for (;; alg_hdr_addr += sizeof(alg.adsp2) / 2) { + ret = regmap_read(priv->dsp->regmap, alg_hdr_addr, &val); + KUNIT_ASSERT_GE(priv->test, ret, 0); + KUNIT_ASSERT_NE(priv->test, val, 0xbedead); + ret = regmap_raw_read(priv->dsp->regmap, alg_hdr_addr, + &alg.adsp2, sizeof(alg.adsp2)); + KUNIT_ASSERT_GE(priv->test, ret, 0); + if (be32_to_cpu(alg.adsp2.alg.id) == alg_id) { + xm_base = be32_to_cpu(alg.adsp2.xm); + ym_base = be32_to_cpu(alg.adsp2.ym); + zm_base = be32_to_cpu(alg.adsp2.zm); + break; + } + } + break; + case WMFW_HALO: + alg_hdr_addr = xm + sizeof(struct wmfw_halo_id_hdr); + for (;; alg_hdr_addr += sizeof(alg.halo)) { + ret = regmap_read(priv->dsp->regmap, alg_hdr_addr, &val); + KUNIT_ASSERT_GE(priv->test, ret, 0); + KUNIT_ASSERT_NE(priv->test, val, 0xbedead); + ret = regmap_raw_read(priv->dsp->regmap, alg_hdr_addr, + &alg.halo, sizeof(alg.halo)); + KUNIT_ASSERT_GE(priv->test, ret, 0); + if (be32_to_cpu(alg.halo.alg.id) == alg_id) { + xm_base = be32_to_cpu(alg.halo.xm_base); + ym_base = be32_to_cpu(alg.halo.ym_base); + break; + } + } + break; + default: + KUNIT_FAIL(priv->test, "Unexpected DSP type %d\n", priv->dsp->type); + return 0; + } + + switch (mem_type) { + case WMFW_ADSP2_XM: + case WMFW_HALO_XM_PACKED: + return xm_base; + case WMFW_ADSP2_YM: + case WMFW_HALO_YM_PACKED: + return ym_base; + case WMFW_ADSP2_ZM: + return zm_base; + default: + KUNIT_FAIL(priv->test, "Bad mem_type\n"); + return 0; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_alg_base_in_words, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_xm_header_get_fw_version_from_regmap() - Firmware version. + * + * @priv: Pointer to struct cs_dsp_test. + * + * Return: Firmware version word value. + */ +unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv) +{ + unsigned int xm = cs_dsp_mock_base_addr_for_mem(priv, WMFW_ADSP2_XM); + union { + struct wmfw_id_hdr adsp2; + struct wmfw_v3_id_hdr halo; + } hdr; + + switch (priv->dsp->type) { + case WMFW_ADSP2: + regmap_raw_read(priv->dsp->regmap, xm, &hdr.adsp2, sizeof(hdr.adsp2)); + return be32_to_cpu(hdr.adsp2.ver); + case WMFW_HALO: + regmap_raw_read(priv->dsp->regmap, xm, &hdr.halo, sizeof(hdr.halo)); + return be32_to_cpu(hdr.halo.ver); + default: + KUNIT_FAIL(priv->test, NULL); + return 0; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_fw_version_from_regmap, + "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_xm_header_get_fw_version() - Firmware version. + * + * @header: Pointer to struct cs_dsp_mock_xm_header. + * + * Return: Firmware version word value. + */ +unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header) +{ + const struct wmfw_id_hdr *adsp2_hdr; + const struct wmfw_v3_id_hdr *halo_hdr; + + switch (header->test_priv->dsp->type) { + case WMFW_ADSP2: + adsp2_hdr = header->blob_data; + return be32_to_cpu(adsp2_hdr->ver); + case WMFW_HALO: + halo_hdr = header->blob_data; + return be32_to_cpu(halo_hdr->ver); + default: + KUNIT_FAIL(header->test_priv->test, NULL); + return 0; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_get_fw_version, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_xm_header_drop_from_regmap_cache() - Drop XM header from regmap cache. + * + * @priv: Pointer to struct cs_dsp_test. + */ +void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv) +{ + unsigned int xm = cs_dsp_mock_base_addr_for_mem(priv, WMFW_ADSP2_XM); + unsigned int bytes; + u32 num_algs; + + switch (priv->dsp->type) { + case WMFW_ADSP2: + /* + * Could be one 32-bit register or two 16-bit registers. + * A raw read will read the requested number of bytes. + */ + regmap_raw_read(priv->dsp->regmap, + xm + (offsetof(struct wmfw_adsp2_id_hdr, n_algs) / 2), + &num_algs, sizeof(num_algs)); + num_algs = be32_to_cpu(num_algs); + bytes = sizeof(struct wmfw_adsp2_id_hdr) + + (num_algs * sizeof(struct wmfw_adsp2_alg_hdr)) + + 4 /* terminator word */; + + regcache_drop_region(priv->dsp->regmap, xm, xm + (bytes / 2) - 1); + break; + case WMFW_HALO: + regmap_read(priv->dsp->regmap, + xm + offsetof(struct wmfw_halo_id_hdr, n_algs), + &num_algs); + bytes = sizeof(struct wmfw_halo_id_hdr) + + (num_algs * sizeof(struct wmfw_halo_alg_hdr)) + + 4 /* terminator word */; + + regcache_drop_region(priv->dsp->regmap, xm, xm + bytes - 4); + break; + default: + break; + } +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_drop_from_regmap_cache, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static void cs_dsp_mock_xm_header_add_adsp2_algs(struct cs_dsp_mock_xm_header *builder, + const struct cs_dsp_mock_alg_def *algs, + size_t num_algs) +{ + struct wmfw_adsp2_id_hdr *hdr = builder->blob_data; + unsigned int next_free_xm_word, next_free_ym_word, next_free_zm_word; + + next_free_xm_word = be32_to_cpu(hdr->xm); + next_free_ym_word = be32_to_cpu(hdr->ym); + next_free_zm_word = be32_to_cpu(hdr->zm); + + /* Set num_algs in XM header. */ + hdr->n_algs = cpu_to_be32(num_algs); + + /* Create algorithm descriptor list */ + struct wmfw_adsp2_alg_hdr *alg_info = + (struct wmfw_adsp2_alg_hdr *)(&hdr[1]); + + for (; num_algs > 0; num_algs--, algs++, alg_info++) { + unsigned int alg_xm_last, alg_ym_last, alg_zm_last; + + alg_info->alg.id = cpu_to_be32(algs->id); + alg_info->alg.ver = cpu_to_be32(algs->ver); + alg_info->xm = cpu_to_be32(algs->xm_base_words); + alg_info->ym = cpu_to_be32(algs->ym_base_words); + alg_info->zm = cpu_to_be32(algs->zm_base_words); + + /* Check if we need to auto-allocate base addresses */ + if (!alg_info->xm && algs->xm_size_words) + alg_info->xm = cpu_to_be32(next_free_xm_word); + + if (!alg_info->ym && algs->ym_size_words) + alg_info->ym = cpu_to_be32(next_free_ym_word); + + if (!alg_info->zm && algs->zm_size_words) + alg_info->zm = cpu_to_be32(next_free_zm_word); + + alg_xm_last = be32_to_cpu(alg_info->xm) + algs->xm_size_words - 1; + if (alg_xm_last > next_free_xm_word) + next_free_xm_word = alg_xm_last; + + alg_ym_last = be32_to_cpu(alg_info->ym) + algs->ym_size_words - 1; + if (alg_ym_last > next_free_ym_word) + next_free_ym_word = alg_ym_last; + + alg_zm_last = be32_to_cpu(alg_info->zm) + algs->zm_size_words - 1; + if (alg_zm_last > next_free_zm_word) + next_free_zm_word = alg_zm_last; + } + + /* Write list terminator */ + *(__be32 *)(alg_info) = cpu_to_be32(0xbedead); +} + +static void cs_dsp_mock_xm_header_add_halo_algs(struct cs_dsp_mock_xm_header *builder, + const struct cs_dsp_mock_alg_def *algs, + size_t num_algs) +{ + struct wmfw_halo_id_hdr *hdr = builder->blob_data; + unsigned int next_free_xm_word, next_free_ym_word; + + /* Assume we're starting with bare header */ + next_free_xm_word = be32_to_cpu(hdr->xm_base) + be32_to_cpu(hdr->xm_size) - 1; + next_free_ym_word = be32_to_cpu(hdr->ym_base) + be32_to_cpu(hdr->ym_size) - 1; + + /* Set num_algs in XM header */ + hdr->n_algs = cpu_to_be32(num_algs); + + /* Create algorithm descriptor list */ + struct wmfw_halo_alg_hdr *alg_info = + (struct wmfw_halo_alg_hdr *)(&hdr[1]); + + for (; num_algs > 0; num_algs--, algs++, alg_info++) { + unsigned int alg_xm_last, alg_ym_last; + + alg_info->alg.id = cpu_to_be32(algs->id); + alg_info->alg.ver = cpu_to_be32(algs->ver); + alg_info->xm_base = cpu_to_be32(algs->xm_base_words); + alg_info->xm_size = cpu_to_be32(algs->xm_size_words); + alg_info->ym_base = cpu_to_be32(algs->ym_base_words); + alg_info->ym_size = cpu_to_be32(algs->ym_size_words); + + /* Check if we need to auto-allocate base addresses */ + if (!alg_info->xm_base && alg_info->xm_size) + alg_info->xm_base = cpu_to_be32(next_free_xm_word); + + if (!alg_info->ym_base && alg_info->ym_size) + alg_info->ym_base = cpu_to_be32(next_free_ym_word); + + alg_xm_last = be32_to_cpu(alg_info->xm_base) + be32_to_cpu(alg_info->xm_size) - 1; + if (alg_xm_last > next_free_xm_word) + next_free_xm_word = alg_xm_last; + + alg_ym_last = be32_to_cpu(alg_info->ym_base) + be32_to_cpu(alg_info->ym_size) - 1; + if (alg_ym_last > next_free_ym_word) + next_free_ym_word = alg_ym_last; + } + + /* Write list terminator */ + *(__be32 *)(alg_info) = cpu_to_be32(0xbedead); +} + +/** + * cs_dsp_mock_xm_header_write_to_regmap() - Write XM header to regmap. + * + * @header: Pointer to struct cs_dsp_mock_xm_header. + * + * The data in header is written to the XM addresses in the regmap. + * + * Return: 0 on success, else negative error code. + */ +int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header) +{ + struct cs_dsp_test *priv = header->test_priv; + unsigned int reg_addr = cs_dsp_mock_base_addr_for_mem(priv, WMFW_ADSP2_XM); + + /* + * One 32-bit word corresponds to one 32-bit unpacked XM word so the + * blob can be written directly to the regmap. + */ + return regmap_raw_write(priv->dsp->regmap, reg_addr, + header->blob_data, header->blob_size_bytes); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_xm_header_write_to_regmap, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_create_mock_xm_header() - Create a dummy XM header. + * + * @priv: Pointer to struct cs_dsp_test. + * @algs: Pointer to array of struct cs_dsp_mock_alg_def listing the + * dummy algorithm entries to include in the XM header. + * @num_algs: Number of entries in the algs array. + * + * Return: Pointer to created struct cs_dsp_mock_xm_header. + */ +struct cs_dsp_mock_xm_header *cs_dsp_create_mock_xm_header(struct cs_dsp_test *priv, + const struct cs_dsp_mock_alg_def *algs, + size_t num_algs) +{ + struct cs_dsp_mock_xm_header *builder; + size_t total_bytes_required; + const void *header; + size_t header_size_bytes; + + builder = kunit_kzalloc(priv->test, sizeof(*builder), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(priv->test, builder); + builder->test_priv = priv; + + switch (priv->dsp->type) { + case WMFW_ADSP2: + header = &cs_dsp_mock_adsp2_xm_hdr; + header_size_bytes = sizeof(cs_dsp_mock_adsp2_xm_hdr); + total_bytes_required = header_size_bytes + + (num_algs * sizeof(struct wmfw_adsp2_alg_hdr)) + + 4; /* terminator word */ + break; + case WMFW_HALO: + header = &cs_dsp_mock_halo_xm_hdr, + header_size_bytes = sizeof(cs_dsp_mock_halo_xm_hdr); + total_bytes_required = header_size_bytes + + (num_algs * sizeof(struct wmfw_halo_alg_hdr)) + + 4; /* terminator word */ + break; + default: + KUNIT_FAIL(priv->test, "%s unexpected DSP type %d\n", + __func__, priv->dsp->type); + return NULL; + } + + builder->blob_data = kunit_kzalloc(priv->test, total_bytes_required, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(priv->test, builder->blob_data); + builder->blob_size_bytes = total_bytes_required; + + memcpy(builder->blob_data, header, header_size_bytes); + + switch (priv->dsp->type) { + case WMFW_ADSP2: + cs_dsp_mock_xm_header_add_adsp2_algs(builder, algs, num_algs); + break; + case WMFW_HALO: + cs_dsp_mock_xm_header_add_halo_algs(builder, algs, num_algs); + break; + default: + break; + } + + return builder; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_create_mock_xm_header, "FW_CS_DSP_KUNIT_TEST_UTILS"); diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index ac6b03f4c084..899ae94198aa 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -30,11 +30,74 @@ struct cs_dsp_test { bool saw_bus_write; }; +/** + * struct cs_dsp_mock_alg_def - Info for creating a mock algorithm entry. + * + * @id Algorithm ID. + * @ver; Algorithm version. + * @xm_base_words XM base address in DSP words. + * @xm_size_words XM size in DSP words. + * @ym_base_words YM base address in DSP words. + * @ym_size_words YM size in DSP words. + * @zm_base_words ZM base address in DSP words. + * @zm_size_words ZM size in DSP words. + */ +struct cs_dsp_mock_alg_def { + unsigned int id; + unsigned int ver; + unsigned int xm_base_words; + unsigned int xm_size_words; + unsigned int ym_base_words; + unsigned int ym_size_words; + unsigned int zm_base_words; + unsigned int zm_size_words; +}; + +/** + * struct cs_dsp_mock_xm_header - XM header builder + * + * @test_priv: Pointer to the struct cs_dsp_test. + * @blob_data: Pointer to the created blob data. + * @blob_size_bytes: Size of the data at blob_data. + */ +struct cs_dsp_mock_xm_header { + struct cs_dsp_test *test_priv; + void *blob_data; + size_t blob_size_bytes; +}; + extern const unsigned int cs_dsp_mock_adsp2_32bit_sysbase; extern const unsigned int cs_dsp_mock_adsp2_16bit_sysbase; extern const unsigned int cs_dsp_mock_halo_core_base; extern const unsigned int cs_dsp_mock_halo_sysinfo_base; +extern const struct cs_dsp_region cs_dsp_mock_halo_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_halo_dsp1_region_sizes[]; +extern const struct cs_dsp_region cs_dsp_mock_adsp2_32bit_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_adsp2_32bit_dsp1_region_sizes[]; +extern const struct cs_dsp_region cs_dsp_mock_adsp2_16bit_dsp1_regions[]; +extern const unsigned int cs_dsp_mock_adsp2_16bit_dsp1_region_sizes[]; +int cs_dsp_mock_count_regions(const unsigned int *region_sizes); +unsigned int cs_dsp_mock_size_of_region(const struct cs_dsp *dsp, int mem_type); +unsigned int cs_dsp_mock_base_addr_for_mem(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_addr_inc_per_unpacked_word(struct cs_dsp_test *priv); +unsigned int cs_dsp_mock_reg_block_length_bytes(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_block_length_registers(struct cs_dsp_test *priv, int mem_type); +unsigned int cs_dsp_mock_reg_block_length_dsp_words(struct cs_dsp_test *priv, int mem_type); +bool cs_dsp_mock_has_zm(struct cs_dsp_test *priv); +int cs_dsp_mock_packed_to_unpacked_mem_type(int packed_mem_type); +unsigned int cs_dsp_mock_num_dsp_words_to_num_packed_regs(unsigned int num_dsp_words); +unsigned int cs_dsp_mock_xm_header_get_alg_base_in_words(struct cs_dsp_test *priv, + unsigned int alg_id, + int mem_type); +unsigned int cs_dsp_mock_xm_header_get_fw_version_from_regmap(struct cs_dsp_test *priv); +unsigned int cs_dsp_mock_xm_header_get_fw_version(struct cs_dsp_mock_xm_header *header); +void cs_dsp_mock_xm_header_drop_from_regmap_cache(struct cs_dsp_test *priv); +int cs_dsp_mock_xm_header_write_to_regmap(struct cs_dsp_mock_xm_header *header); +struct cs_dsp_mock_xm_header *cs_dsp_create_mock_xm_header(struct cs_dsp_test *priv, + const struct cs_dsp_mock_alg_def *algs, + size_t num_algs); + int cs_dsp_mock_regmap_init(struct cs_dsp_test *priv); void cs_dsp_mock_regmap_drop_range(struct cs_dsp_test *priv, unsigned int first_reg, unsigned int last_reg); -- cgit v1.2.3 From 5cf1b7b471803f7cc654a29ee16cb085ad69c097 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 12 Dec 2024 14:37:16 +0000 Subject: firmware: cs_dsp: Add mock wmfw file generator for KUnit testing Add a mock firmware file that emulates what the firmware build tools would normally create. This will be used by KUnit tests to generate a test wmfw file. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20241212143725.1381013-4-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/test/Makefile | 3 +- drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c | 473 ++++++++++++++++++++++ include/linux/firmware/cirrus/cs_dsp_test_utils.h | 33 ++ 3 files changed, 508 insertions(+), 1 deletion(-) create mode 100644 drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c (limited to 'include') diff --git a/drivers/firmware/cirrus/test/Makefile b/drivers/firmware/cirrus/test/Makefile index 8d4f2eb7e5aa..a67010cb6d66 100644 --- a/drivers/firmware/cirrus/test/Makefile +++ b/drivers/firmware/cirrus/test/Makefile @@ -4,6 +4,7 @@ cs_dsp_test_utils-objs := \ cs_dsp_mock_mem_maps.o \ cs_dsp_mock_regmap.o \ - cs_dsp_mock_utils.o + cs_dsp_mock_utils.o \ + cs_dsp_mock_wmfw.o obj-$(CONFIG_FW_CS_DSP_KUNIT_TEST_UTILS) += cs_dsp_test_utils.o diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c new file mode 100644 index 000000000000..d3d84da239a3 --- /dev/null +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c @@ -0,0 +1,473 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// wmfw file builder for cs_dsp KUnit tests. +// +// Copyright (C) 2024 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Buffer large enough for bin file content */ +#define CS_DSP_MOCK_WMFW_BUF_SIZE 131072 + +struct cs_dsp_mock_wmfw_builder { + struct cs_dsp_test *test_priv; + int format_version; + void *buf; + size_t buf_size_bytes; + void *write_p; + size_t bytes_used; + + void *alg_data_header; + unsigned int num_coeffs; +}; + +struct wmfw_adsp2_halo_header { + struct wmfw_header header; + struct wmfw_adsp2_sizes sizes; + struct wmfw_footer footer; +} __packed; + +struct wmfw_long_string { + __le16 len; + u8 data[] __nonstring __counted_by(len); +} __packed; + +struct wmfw_short_string { + u8 len; + u8 data[] __nonstring __counted_by(len); +} __packed; + +KUNIT_DEFINE_ACTION_WRAPPER(vfree_action_wrapper, vfree, void *) + +/** + * cs_dsp_mock_wmfw_format_version() - Return format version. + * + * @builder: Pointer to struct cs_dsp_mock_wmfw_builder. + * + * Return: Format version. + */ +int cs_dsp_mock_wmfw_format_version(struct cs_dsp_mock_wmfw_builder *builder) +{ + return builder->format_version; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_format_version, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_wmfw_get_firmware() - Get struct firmware wrapper for data. + * + * @builder: Pointer to struct cs_dsp_mock_wmfw_builder. + * + * Return: Pointer to a struct firmware wrapper for the data. + */ +struct firmware *cs_dsp_mock_wmfw_get_firmware(struct cs_dsp_mock_wmfw_builder *builder) +{ + struct firmware *fw; + + if (!builder) + return NULL; + + fw = kunit_kzalloc(builder->test_priv->test, sizeof(*fw), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, fw); + + fw->data = builder->buf; + fw->size = builder->bytes_used; + + return fw; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_get_firmware, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_wmfw_add_raw_block() - Add a block to the wmfw file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @block_type: Block type. + * @offset: Offset. + * @payload_data: Pointer to buffer containing the payload data, + * or NULL if no data. + * @payload_len_bytes: Length of payload data in bytes, or zero. + */ +void cs_dsp_mock_wmfw_add_raw_block(struct cs_dsp_mock_wmfw_builder *builder, + int block_type, unsigned int offset, + const void *payload_data, size_t payload_len_bytes) +{ + struct wmfw_region *header = builder->write_p; + unsigned int bytes_needed = struct_size_t(struct wmfw_region, data, payload_len_bytes); + + KUNIT_ASSERT_TRUE(builder->test_priv->test, + (builder->write_p + bytes_needed) < + (builder->buf + CS_DSP_MOCK_WMFW_BUF_SIZE)); + + header->offset = cpu_to_le32(offset | (block_type << 24)); + header->len = cpu_to_le32(payload_len_bytes); + if (payload_len_bytes > 0) + memcpy(header->data, payload_data, payload_len_bytes); + + builder->write_p += bytes_needed; + builder->bytes_used += bytes_needed; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_add_raw_block, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_wmfw_add_info() - Add an info block to the wmfw file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @info: Pointer to info string to be copied into the file. + * + * The string will be padded to a length that is a multiple of 4 bytes. + */ +void cs_dsp_mock_wmfw_add_info(struct cs_dsp_mock_wmfw_builder *builder, + const char *info) +{ + size_t info_len = strlen(info); + char *tmp = NULL; + + if (info_len % 4) { + /* Create a padded string with length a multiple of 4 */ + info_len = round_up(info_len, 4); + tmp = kunit_kzalloc(builder->test_priv->test, info_len, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, tmp); + memcpy(tmp, info, info_len); + info = tmp; + } + + cs_dsp_mock_wmfw_add_raw_block(builder, WMFW_INFO_TEXT, 0, info, info_len); + kunit_kfree(builder->test_priv->test, tmp); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_add_info, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_wmfw_add_data_block() - Add a data block to the wmfw file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @mem_region: Memory region for the block. + * @mem_offset_dsp_words: Offset to start of destination in DSP words. + * @payload_data: Pointer to buffer containing the payload data. + * @payload_len_bytes: Length of payload data in bytes. + */ +void cs_dsp_mock_wmfw_add_data_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes) +{ + /* Blob payload length must be a multiple of 4 */ + KUNIT_ASSERT_EQ(builder->test_priv->test, payload_len_bytes % 4, 0); + + cs_dsp_mock_wmfw_add_raw_block(builder, mem_region, mem_offset_dsp_words, + payload_data, payload_len_bytes); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_add_data_block, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +void cs_dsp_mock_wmfw_start_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder, + unsigned int alg_id, + const char *name, + const char *description) +{ + struct wmfw_region *rgn = builder->write_p; + struct wmfw_adsp_alg_data *v1; + struct wmfw_short_string *shortstring; + struct wmfw_long_string *longstring; + size_t bytes_needed, name_len, description_len; + int offset; + + /* Bytes needed for region header */ + bytes_needed = offsetof(struct wmfw_region, data); + + builder->alg_data_header = builder->write_p; + builder->num_coeffs = 0; + + switch (builder->format_version) { + case 0: + KUNIT_FAIL(builder->test_priv->test, "wmfwV0 does not have alg blocks\n"); + return; + case 1: + bytes_needed += offsetof(struct wmfw_adsp_alg_data, data); + KUNIT_ASSERT_TRUE(builder->test_priv->test, + (builder->write_p + bytes_needed) < + (builder->buf + CS_DSP_MOCK_WMFW_BUF_SIZE)); + + memset(builder->write_p, 0, bytes_needed); + + /* Create region header */ + rgn->offset = cpu_to_le32(WMFW_ALGORITHM_DATA << 24); + + /* Create algorithm entry */ + v1 = (struct wmfw_adsp_alg_data *)&rgn->data[0]; + v1->id = cpu_to_le32(alg_id); + if (name) + strscpy(v1->name, name, sizeof(v1->name)); + + if (description) + strscpy(v1->descr, description, sizeof(v1->descr)); + break; + default: + name_len = 0; + description_len = 0; + + if (name) + name_len = strlen(name); + + if (description) + description_len = strlen(description); + + bytes_needed += sizeof(__le32); /* alg id */ + bytes_needed += round_up(name_len + sizeof(u8), sizeof(__le32)); + bytes_needed += round_up(description_len + sizeof(__le16), sizeof(__le32)); + bytes_needed += sizeof(__le32); /* coeff count */ + + KUNIT_ASSERT_TRUE(builder->test_priv->test, + (builder->write_p + bytes_needed) < + (builder->buf + CS_DSP_MOCK_WMFW_BUF_SIZE)); + + memset(builder->write_p, 0, bytes_needed); + + /* Create region header */ + rgn->offset = cpu_to_le32(WMFW_ALGORITHM_DATA << 24); + + /* Create algorithm entry */ + *(__force __le32 *)&rgn->data[0] = cpu_to_le32(alg_id); + + shortstring = (struct wmfw_short_string *)&rgn->data[4]; + shortstring->len = name_len; + + if (name_len) + memcpy(shortstring->data, name, name_len); + + /* Round up to next __le32 */ + offset = round_up(4 + struct_size_t(struct wmfw_short_string, data, name_len), + sizeof(__le32)); + + longstring = (struct wmfw_long_string *)&rgn->data[offset]; + longstring->len = cpu_to_le16(description_len); + + if (description_len) + memcpy(longstring->data, description, description_len); + break; + } + + builder->write_p += bytes_needed; + builder->bytes_used += bytes_needed; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_start_alg_info_block, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +void cs_dsp_mock_wmfw_add_coeff_desc(struct cs_dsp_mock_wmfw_builder *builder, + const struct cs_dsp_mock_coeff_def *def) +{ + struct wmfw_adsp_coeff_data *v1; + struct wmfw_short_string *shortstring; + struct wmfw_long_string *longstring; + size_t bytes_needed, shortname_len, fullname_len, description_len; + __le32 *ple32; + + KUNIT_ASSERT_NOT_NULL(builder->test_priv->test, builder->alg_data_header); + + switch (builder->format_version) { + case 0: + return; + case 1: + bytes_needed = offsetof(struct wmfw_adsp_coeff_data, data); + KUNIT_ASSERT_TRUE(builder->test_priv->test, + (builder->write_p + bytes_needed) < + (builder->buf + CS_DSP_MOCK_WMFW_BUF_SIZE)); + + v1 = (struct wmfw_adsp_coeff_data *)builder->write_p; + memset(v1, 0, sizeof(*v1)); + v1->hdr.offset = cpu_to_le16(def->offset_dsp_words); + v1->hdr.type = cpu_to_le16(def->mem_type); + v1->hdr.size = cpu_to_le32(bytes_needed - sizeof(v1->hdr)); + v1->ctl_type = cpu_to_le16(def->type); + v1->flags = cpu_to_le16(def->flags); + v1->len = cpu_to_le32(def->length_bytes); + + if (def->fullname) + strscpy(v1->name, def->fullname, sizeof(v1->name)); + + if (def->description) + strscpy(v1->descr, def->description, sizeof(v1->descr)); + break; + default: + fullname_len = 0; + description_len = 0; + shortname_len = strlen(def->shortname); + + if (def->fullname) + fullname_len = strlen(def->fullname); + + if (def->description) + description_len = strlen(def->description); + + bytes_needed = sizeof(__le32) * 2; /* type, offset and size */ + bytes_needed += round_up(shortname_len + sizeof(u8), sizeof(__le32)); + bytes_needed += round_up(fullname_len + sizeof(u8), sizeof(__le32)); + bytes_needed += round_up(description_len + sizeof(__le16), sizeof(__le32)); + bytes_needed += sizeof(__le32) * 2; /* flags, type and length */ + KUNIT_ASSERT_TRUE(builder->test_priv->test, + (builder->write_p + bytes_needed) < + (builder->buf + CS_DSP_MOCK_WMFW_BUF_SIZE)); + + ple32 = (__force __le32 *)builder->write_p; + *ple32++ = cpu_to_le32(def->offset_dsp_words | (def->mem_type << 16)); + *ple32++ = cpu_to_le32(bytes_needed - sizeof(__le32) - sizeof(__le32)); + + shortstring = (__force struct wmfw_short_string *)ple32; + shortstring->len = shortname_len; + memcpy(shortstring->data, def->shortname, shortname_len); + + /* Round up to next __le32 multiple */ + ple32 += round_up(struct_size_t(struct wmfw_short_string, data, shortname_len), + sizeof(*ple32)) / sizeof(*ple32); + + shortstring = (__force struct wmfw_short_string *)ple32; + shortstring->len = fullname_len; + memcpy(shortstring->data, def->fullname, fullname_len); + + /* Round up to next __le32 multiple */ + ple32 += round_up(struct_size_t(struct wmfw_short_string, data, fullname_len), + sizeof(*ple32)) / sizeof(*ple32); + + longstring = (__force struct wmfw_long_string *)ple32; + longstring->len = description_len; + memcpy(longstring->data, def->description, description_len); + + /* Round up to next __le32 multiple */ + ple32 += round_up(struct_size_t(struct wmfw_long_string, data, description_len), + sizeof(*ple32)) / sizeof(*ple32); + + *ple32++ = cpu_to_le32(def->type | (def->flags << 16)); + *ple32 = cpu_to_le32(def->length_bytes); + break; + } + + builder->write_p += bytes_needed; + builder->bytes_used += bytes_needed; + builder->num_coeffs++; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_add_coeff_desc, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +void cs_dsp_mock_wmfw_end_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder) +{ + struct wmfw_region *rgn = builder->alg_data_header; + struct wmfw_adsp_alg_data *v1; + const struct wmfw_short_string *shortstring; + const struct wmfw_long_string *longstring; + size_t offset; + + KUNIT_ASSERT_NOT_NULL(builder->test_priv->test, rgn); + + /* Fill in data size */ + rgn->len = cpu_to_le32((u8 *)builder->write_p - (u8 *)rgn->data); + + /* Fill in coefficient count */ + switch (builder->format_version) { + case 0: + return; + case 1: + v1 = (struct wmfw_adsp_alg_data *)&rgn->data[0]; + v1->ncoeff = cpu_to_le32(builder->num_coeffs); + break; + default: + offset = 4; /* skip alg id */ + + /* Get name length and round up to __le32 multiple */ + shortstring = (const struct wmfw_short_string *)&rgn->data[offset]; + offset += round_up(struct_size_t(struct wmfw_short_string, data, shortstring->len), + sizeof(__le32)); + + /* Get description length and round up to __le32 multiple */ + longstring = (const struct wmfw_long_string *)&rgn->data[offset]; + offset += round_up(struct_size_t(struct wmfw_long_string, data, + le16_to_cpu(longstring->len)), + sizeof(__le32)); + + *(__force __le32 *)&rgn->data[offset] = cpu_to_le32(builder->num_coeffs); + break; + } + + builder->alg_data_header = NULL; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_end_alg_info_block, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static void cs_dsp_init_adsp2_halo_wmfw(struct cs_dsp_mock_wmfw_builder *builder) +{ + struct wmfw_adsp2_halo_header *hdr = builder->buf; + const struct cs_dsp *dsp = builder->test_priv->dsp; + + memcpy(hdr->header.magic, "WMFW", sizeof(hdr->header.magic)); + hdr->header.len = cpu_to_le32(sizeof(*hdr)); + hdr->header.ver = builder->format_version; + hdr->header.core = dsp->type; + hdr->header.rev = cpu_to_le16(dsp->rev); + + hdr->sizes.pm = cpu_to_le32(cs_dsp_mock_size_of_region(dsp, WMFW_ADSP2_PM)); + hdr->sizes.xm = cpu_to_le32(cs_dsp_mock_size_of_region(dsp, WMFW_ADSP2_XM)); + hdr->sizes.ym = cpu_to_le32(cs_dsp_mock_size_of_region(dsp, WMFW_ADSP2_YM)); + + switch (dsp->type) { + case WMFW_ADSP2: + hdr->sizes.zm = cpu_to_le32(cs_dsp_mock_size_of_region(dsp, WMFW_ADSP2_ZM)); + break; + default: + break; + } + + builder->write_p = &hdr[1]; + builder->bytes_used += sizeof(*hdr); +} + +/** + * cs_dsp_mock_wmfw_init() - Initialize a struct cs_dsp_mock_wmfw_builder. + * + * @priv: Pointer to struct cs_dsp_test. + * @format_version: Required wmfw format version. + * + * Return: Pointer to created struct cs_dsp_mock_wmfw_builder. + */ +struct cs_dsp_mock_wmfw_builder *cs_dsp_mock_wmfw_init(struct cs_dsp_test *priv, + int format_version) +{ + struct cs_dsp_mock_wmfw_builder *builder; + + /* If format version isn't given use the default for the target core */ + if (format_version < 0) { + switch (priv->dsp->type) { + case WMFW_ADSP2: + format_version = 2; + break; + default: + format_version = 3; + break; + } + } + + builder = kunit_kzalloc(priv->test, sizeof(*builder), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(priv->test, builder); + + builder->test_priv = priv; + builder->format_version = format_version; + + builder->buf = vmalloc(CS_DSP_MOCK_WMFW_BUF_SIZE); + KUNIT_ASSERT_NOT_NULL(priv->test, builder->buf); + kunit_add_action_or_reset(priv->test, vfree_action_wrapper, builder->buf); + + builder->buf_size_bytes = CS_DSP_MOCK_WMFW_BUF_SIZE; + + switch (priv->dsp->type) { + case WMFW_ADSP2: + case WMFW_HALO: + cs_dsp_init_adsp2_halo_wmfw(builder); + break; + default: + break; + } + + return builder; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_wmfw_init, "FW_CS_DSP_KUNIT_TEST_UTILS"); diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index 899ae94198aa..fde7e95a33e9 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -53,6 +53,17 @@ struct cs_dsp_mock_alg_def { unsigned int zm_size_words; }; +struct cs_dsp_mock_coeff_def { + const char *shortname; + const char *fullname; + const char *description; + u16 type; + u16 flags; + u16 mem_type; + unsigned int offset_dsp_words; + unsigned int length_bytes; +}; + /** * struct cs_dsp_mock_xm_header - XM header builder * @@ -66,6 +77,8 @@ struct cs_dsp_mock_xm_header { size_t blob_size_bytes; }; +struct cs_dsp_mock_wmfw_builder; + extern const unsigned int cs_dsp_mock_adsp2_32bit_sysbase; extern const unsigned int cs_dsp_mock_adsp2_16bit_sysbase; extern const unsigned int cs_dsp_mock_halo_core_base; @@ -107,3 +120,23 @@ void cs_dsp_mock_regmap_drop_bytes(struct cs_dsp_test *priv, unsigned int first_reg, size_t num_bytes); void cs_dsp_mock_regmap_drop_system_regs(struct cs_dsp_test *priv); bool cs_dsp_mock_regmap_is_dirty(struct cs_dsp_test *priv, bool drop_system_regs); + +struct cs_dsp_mock_wmfw_builder *cs_dsp_mock_wmfw_init(struct cs_dsp_test *priv, + int format_version); +void cs_dsp_mock_wmfw_add_raw_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_wmfw_add_info(struct cs_dsp_mock_wmfw_builder *builder, + const char *info); +void cs_dsp_mock_wmfw_add_data_block(struct cs_dsp_mock_wmfw_builder *builder, + int mem_region, unsigned int mem_offset_dsp_words, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_wmfw_start_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder, + unsigned int alg_id, + const char *name, + const char *description); +void cs_dsp_mock_wmfw_add_coeff_desc(struct cs_dsp_mock_wmfw_builder *builder, + const struct cs_dsp_mock_coeff_def *def); +void cs_dsp_mock_wmfw_end_alg_info_block(struct cs_dsp_mock_wmfw_builder *builder); +struct firmware *cs_dsp_mock_wmfw_get_firmware(struct cs_dsp_mock_wmfw_builder *builder); +int cs_dsp_mock_wmfw_format_version(struct cs_dsp_mock_wmfw_builder *builder); -- cgit v1.2.3 From 7c052c6615297ff32032105130cd5f02059f7ae4 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Thu, 12 Dec 2024 14:37:17 +0000 Subject: firmware: cs_dsp: Add mock bin file generator for KUnit testing Add a mock firmware file that emulates what the firmware build tools would normally create. This will be used by KUnit tests to generate a test bin file. The data payload in a bin is an opaque blob, so the mock bin only needs to generate the appropriate file header and description block for each payload blob. Signed-off-by: Richard Fitzgerald Link: https://patch.msgid.link/20241212143725.1381013-5-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/firmware/cirrus/test/Makefile | 1 + drivers/firmware/cirrus/test/cs_dsp_mock_bin.c | 199 ++++++++++++++++++++++ include/linux/firmware/cirrus/cs_dsp_test_utils.h | 18 ++ 3 files changed, 218 insertions(+) create mode 100644 drivers/firmware/cirrus/test/cs_dsp_mock_bin.c (limited to 'include') diff --git a/drivers/firmware/cirrus/test/Makefile b/drivers/firmware/cirrus/test/Makefile index a67010cb6d66..99aa120718f0 100644 --- a/drivers/firmware/cirrus/test/Makefile +++ b/drivers/firmware/cirrus/test/Makefile @@ -3,6 +3,7 @@ cs_dsp_test_utils-objs := \ cs_dsp_mock_mem_maps.o \ + cs_dsp_mock_bin.o \ cs_dsp_mock_regmap.o \ cs_dsp_mock_utils.o \ cs_dsp_mock_wmfw.o diff --git a/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c b/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c new file mode 100644 index 000000000000..1e271ccfd9b0 --- /dev/null +++ b/drivers/firmware/cirrus/test/cs_dsp_mock_bin.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// bin file builder for cs_dsp KUnit tests. +// +// Copyright (C) 2024 Cirrus Logic, Inc. and +// Cirrus Logic International Semiconductor Ltd. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Buffer large enough for bin file content */ +#define CS_DSP_MOCK_BIN_BUF_SIZE 32768 + +KUNIT_DEFINE_ACTION_WRAPPER(vfree_action_wrapper, vfree, void *) + +struct cs_dsp_mock_bin_builder { + struct cs_dsp_test *test_priv; + void *buf; + void *write_p; + size_t bytes_used; +}; + +/** + * cs_dsp_mock_bin_get_firmware() - Get struct firmware wrapper for data. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * + * Return: Pointer to a struct firmware wrapper for the data. + */ +struct firmware *cs_dsp_mock_bin_get_firmware(struct cs_dsp_mock_bin_builder *builder) +{ + struct firmware *fw; + + fw = kunit_kzalloc(builder->test_priv->test, sizeof(*fw), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, fw); + + fw->data = builder->buf; + fw->size = builder->bytes_used; + + return fw; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_bin_get_firmware, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_bin_add_raw_block() - Add a data block to the bin file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @alg_ig: Algorithm ID. + * @alg_ver: Algorithm version. + * @type: Type of the block. + * @offset: Offset. + * @payload_data: Pointer to buffer containing the payload data. + * @payload_len_bytes: Length of payload data in bytes. + */ +void cs_dsp_mock_bin_add_raw_block(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int type, unsigned int offset, + const void *payload_data, size_t payload_len_bytes) +{ + struct wmfw_coeff_item *item; + size_t bytes_needed = struct_size_t(struct wmfw_coeff_item, data, payload_len_bytes); + + KUNIT_ASSERT_TRUE(builder->test_priv->test, + (builder->write_p + bytes_needed) < + (builder->buf + CS_DSP_MOCK_BIN_BUF_SIZE)); + + item = builder->write_p; + + item->offset = cpu_to_le16(offset); + item->type = cpu_to_le16(type); + item->id = cpu_to_le32(alg_id); + item->ver = cpu_to_le32(alg_ver << 8); + item->len = cpu_to_le32(payload_len_bytes); + + if (payload_len_bytes) + memcpy(item->data, payload_data, payload_len_bytes); + + builder->write_p += bytes_needed; + builder->bytes_used += bytes_needed; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_bin_add_raw_block, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +static void cs_dsp_mock_bin_add_name_or_info(struct cs_dsp_mock_bin_builder *builder, + const char *info, int type) +{ + size_t info_len = strlen(info); + char *tmp = NULL; + + if (info_len % 4) { + /* Create a padded string with length a multiple of 4 */ + info_len = round_up(info_len, 4); + tmp = kunit_kzalloc(builder->test_priv->test, info_len, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, tmp); + memcpy(tmp, info, info_len); + info = tmp; + } + + cs_dsp_mock_bin_add_raw_block(builder, 0, 0, WMFW_INFO_TEXT, 0, info, info_len); + kunit_kfree(builder->test_priv->test, tmp); +} + +/** + * cs_dsp_mock_bin_add_info() - Add an info block to the bin file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @info: Pointer to info string to be copied into the file. + * + * The string will be padded to a length that is a multiple of 4 bytes. + */ +void cs_dsp_mock_bin_add_info(struct cs_dsp_mock_bin_builder *builder, + const char *info) +{ + cs_dsp_mock_bin_add_name_or_info(builder, info, WMFW_INFO_TEXT); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_bin_add_info, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_bin_add_name() - Add a name block to the bin file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @name: Pointer to name string to be copied into the file. + */ +void cs_dsp_mock_bin_add_name(struct cs_dsp_mock_bin_builder *builder, + const char *name) +{ + cs_dsp_mock_bin_add_name_or_info(builder, name, WMFW_NAME_TEXT); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_bin_add_name, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_bin_add_patch() - Add a patch data block to the bin file. + * + * @builder: Pointer to struct cs_dsp_mock_bin_builder. + * @alg_ig: Algorithm ID for the patch. + * @alg_ver: Algorithm version for the patch. + * @mem_region: Memory region for the patch. + * @reg_addr_offset: Offset to start of data in register addresses. + * @payload_data: Pointer to buffer containing the payload data. + * @payload_len_bytes: Length of payload data in bytes. + */ +void cs_dsp_mock_bin_add_patch(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int mem_region, unsigned int reg_addr_offset, + const void *payload_data, size_t payload_len_bytes) +{ + /* Payload length must be a multiple of 4 */ + KUNIT_ASSERT_EQ(builder->test_priv->test, payload_len_bytes % 4, 0); + + cs_dsp_mock_bin_add_raw_block(builder, alg_id, alg_ver, + mem_region, reg_addr_offset, + payload_data, payload_len_bytes); +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_bin_add_patch, "FW_CS_DSP_KUNIT_TEST_UTILS"); + +/** + * cs_dsp_mock_bin_init() - Initialize a struct cs_dsp_mock_bin_builder. + * + * @priv: Pointer to struct cs_dsp_test. + * @format_version: Required bin format version. + * @fw_version: Firmware version to put in bin file. + * + * Return: Pointer to created struct cs_dsp_mock_bin_builder. + */ +struct cs_dsp_mock_bin_builder *cs_dsp_mock_bin_init(struct cs_dsp_test *priv, + int format_version, + unsigned int fw_version) +{ + struct cs_dsp_mock_bin_builder *builder; + struct wmfw_coeff_hdr *hdr; + + builder = kunit_kzalloc(priv->test, sizeof(*builder), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(priv->test, builder); + builder->test_priv = priv; + + builder->buf = vmalloc(CS_DSP_MOCK_BIN_BUF_SIZE); + KUNIT_ASSERT_NOT_NULL(priv->test, builder->buf); + kunit_add_action_or_reset(priv->test, vfree_action_wrapper, builder->buf); + + /* Create header */ + hdr = builder->buf; + memcpy(hdr->magic, "WMDR", sizeof(hdr->magic)); + hdr->len = cpu_to_le32(offsetof(struct wmfw_coeff_hdr, data)); + hdr->ver = cpu_to_le32(fw_version | (format_version << 24)); + hdr->core_ver = cpu_to_le32(((u32)priv->dsp->type << 24) | priv->dsp->rev); + + builder->write_p = hdr->data; + builder->bytes_used = offsetof(struct wmfw_coeff_hdr, data); + + return builder; +} +EXPORT_SYMBOL_NS_GPL(cs_dsp_mock_bin_init, "FW_CS_DSP_KUNIT_TEST_UTILS"); diff --git a/include/linux/firmware/cirrus/cs_dsp_test_utils.h b/include/linux/firmware/cirrus/cs_dsp_test_utils.h index fde7e95a33e9..4f87a908ab4f 100644 --- a/include/linux/firmware/cirrus/cs_dsp_test_utils.h +++ b/include/linux/firmware/cirrus/cs_dsp_test_utils.h @@ -78,6 +78,7 @@ struct cs_dsp_mock_xm_header { }; struct cs_dsp_mock_wmfw_builder; +struct cs_dsp_mock_bin_builder; extern const unsigned int cs_dsp_mock_adsp2_32bit_sysbase; extern const unsigned int cs_dsp_mock_adsp2_16bit_sysbase; @@ -121,6 +122,23 @@ void cs_dsp_mock_regmap_drop_bytes(struct cs_dsp_test *priv, void cs_dsp_mock_regmap_drop_system_regs(struct cs_dsp_test *priv); bool cs_dsp_mock_regmap_is_dirty(struct cs_dsp_test *priv, bool drop_system_regs); +struct cs_dsp_mock_bin_builder *cs_dsp_mock_bin_init(struct cs_dsp_test *priv, + int format_version, + unsigned int fw_version); +void cs_dsp_mock_bin_add_raw_block(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int type, unsigned int offset, + const void *payload_data, size_t payload_len_bytes); +void cs_dsp_mock_bin_add_info(struct cs_dsp_mock_bin_builder *builder, + const char *info); +void cs_dsp_mock_bin_add_name(struct cs_dsp_mock_bin_builder *builder, + const char *name); +void cs_dsp_mock_bin_add_patch(struct cs_dsp_mock_bin_builder *builder, + unsigned int alg_id, unsigned int alg_ver, + int mem_region, unsigned int reg_addr_offset, + const void *payload_data, size_t payload_len_bytes); +struct firmware *cs_dsp_mock_bin_get_firmware(struct cs_dsp_mock_bin_builder *builder); + struct cs_dsp_mock_wmfw_builder *cs_dsp_mock_wmfw_init(struct cs_dsp_test *priv, int format_version); void cs_dsp_mock_wmfw_add_raw_block(struct cs_dsp_mock_wmfw_builder *builder, -- cgit v1.2.3 From b76d32422c09bc9310f61a5a89671975db34bd2a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 9 Dec 2024 16:09:44 +0000 Subject: kref: Improve documentation There is already kernel-doc written for many of the functions in kref.h but it's not linked into the html docs anywhere. Add it to kref.rst. Improve the kref documentation by using the standard Return: section, rewording some unclear verbiage and adding docs for some undocumented functions. Update Thomas' email address to his current one. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Randy Dunlap Acked-by: Randy Dunlap Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20241209160953.757673-1-willy@infradead.org --- Documentation/core-api/kref.rst | 7 +++++- include/linux/kref.h | 48 +++++++++++++++++++++++++++++------------ 2 files changed, 40 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/Documentation/core-api/kref.rst b/Documentation/core-api/kref.rst index c61eea6f1bf2..8db9ff03d952 100644 --- a/Documentation/core-api/kref.rst +++ b/Documentation/core-api/kref.rst @@ -3,7 +3,7 @@ Adding reference counters (krefs) to kernel objects =================================================== :Author: Corey Minyard -:Author: Thomas Hellstrom +:Author: Thomas Hellström A lot of this was lifted from Greg Kroah-Hartman's 2004 OLS paper and presentation on krefs, which can be found at: @@ -321,3 +321,8 @@ rcu grace period after release_entry_rcu was called. That can be accomplished by using kfree_rcu(entry, rhead) as done above, or by calling synchronize_rcu() before using kfree, but note that synchronize_rcu() may sleep for a substantial amount of time. + +Functions and structures +======================== + +.. kernel-doc:: include/linux/kref.h diff --git a/include/linux/kref.h b/include/linux/kref.h index d32e21a2538c..88e82ab1367c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -46,18 +46,18 @@ static inline void kref_get(struct kref *kref) } /** - * kref_put - decrement refcount for object. - * @kref: object. - * @release: pointer to the function that will clean up the object when the + * kref_put - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the * last reference to the object is released. - * This pointer is required, and it is not acceptable to pass kfree - * in as this function. * - * Decrement the refcount, and if 0, call release(). - * Return 1 if the object was removed, otherwise return 0. Beware, if this - * function returns 0, you still can not count on the kref from remaining in - * memory. Only use the return value if you want to see if the kref is now - * gone, not present. + * Decrement the refcount, and if 0, call @release. The caller may not + * pass NULL or kfree() as the release function. + * + * Return: 1 if this call removed the object, otherwise return 0. Beware, + * if this function returns 0, another caller may have removed the object + * by the time this function returns. The return value is only certain + * if you want to see if the object is definitely released. */ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { @@ -68,17 +68,37 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) return 0; } +/** + * kref_put_mutex - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @mutex: Mutex which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @mutex + * held. The @release function will release the mutex. + */ static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), - struct mutex *lock) + struct mutex *mutex) { - if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { + if (refcount_dec_and_mutex_lock(&kref->refcount, mutex)) { release(kref); return 1; } return 0; } +/** + * kref_put_lock - Decrement refcount for object + * @kref: Object + * @release: Pointer to the function that will clean up the object when the + * last reference to the object is released. + * @lock: Spinlock which protects the release function. + * + * This variant of kref_lock() calls the @release function with the @lock + * held. The @release function will release the lock. + */ static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) @@ -94,8 +114,6 @@ static inline int kref_put_lock(struct kref *kref, * kref_get_unless_zero - Increment refcount for object unless it is zero. * @kref: object. * - * Return non-zero if the increment succeeded. Otherwise return 0. - * * This function is intended to simplify locking around refcounting for * objects that can be looked up from a lookup structure, and which are * removed from that lookup structure in the object destructor. @@ -105,6 +123,8 @@ static inline int kref_put_lock(struct kref *kref, * With a lookup followed by a kref_get_unless_zero *with return value check* * locking in the kref_put path can be deferred to the actual removal from * the lookup structure and RCU lookups become trivial. + * + * Return: non-zero if the increment succeeded. Otherwise return 0. */ static inline int __must_check kref_get_unless_zero(struct kref *kref) { -- cgit v1.2.3 From 7f78c081d44ce0f9d73dcef3204df5936ddbfea7 Mon Sep 17 00:00:00 2001 From: Chun-Kuang Hu Date: Sun, 1 Sep 2024 14:32:59 +0000 Subject: soc: mediatek: cmdq: Remove cmdq_pkt_finalize() helper function In order to have fine-grained control, use cmdq_pkt_eoc() and cmdq_pkt_jump_rel() to replace cmdq_pkt_finalize(). Signed-off-by: Chun-Kuang Hu Acked-by: Matthias Brugger Reviewed-by: AngeloGioacchino Del Regno Signed-off-by: Sebastian Fricke Signed-off-by: Mauro Carvalho Chehab --- drivers/soc/mediatek/mtk-cmdq-helper.c | 18 ------------------ include/linux/soc/mediatek/mtk-cmdq.h | 13 ------------- 2 files changed, 31 deletions(-) (limited to 'include') diff --git a/drivers/soc/mediatek/mtk-cmdq-helper.c b/drivers/soc/mediatek/mtk-cmdq-helper.c index 0a05ee87a0fc..455221e8de24 100644 --- a/drivers/soc/mediatek/mtk-cmdq-helper.c +++ b/drivers/soc/mediatek/mtk-cmdq-helper.c @@ -524,23 +524,5 @@ int cmdq_pkt_eoc(struct cmdq_pkt *pkt) } EXPORT_SYMBOL(cmdq_pkt_eoc); -int cmdq_pkt_finalize(struct cmdq_pkt *pkt) -{ - struct cmdq_instruction inst = { {0} }; - int err; - - /* insert EOC and generate IRQ for each command iteration */ - err = cmdq_pkt_eoc(pkt); - if (err < 0) - return err; - - /* JUMP to end */ - inst.op = CMDQ_CODE_JUMP; - inst.value = CMDQ_JUMP_PASS >> - cmdq_get_shift_pa(((struct cmdq_client *)pkt->cl)->chan); - return cmdq_pkt_append_command(pkt, inst); -} -EXPORT_SYMBOL(cmdq_pkt_finalize); - MODULE_DESCRIPTION("MediaTek Command Queue (CMDQ) driver"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/soc/mediatek/mtk-cmdq.h b/include/linux/soc/mediatek/mtk-cmdq.h index 5bee6f7fc400..0c3906e8ad19 100644 --- a/include/linux/soc/mediatek/mtk-cmdq.h +++ b/include/linux/soc/mediatek/mtk-cmdq.h @@ -391,14 +391,6 @@ int cmdq_pkt_jump_rel(struct cmdq_pkt *pkt, s32 offset, u8 shift_pa); */ int cmdq_pkt_eoc(struct cmdq_pkt *pkt); -/** - * cmdq_pkt_finalize() - Append EOC and jump command to pkt. - * @pkt: the CMDQ packet - * - * Return: 0 for success; else the error code is returned - */ -int cmdq_pkt_finalize(struct cmdq_pkt *pkt); - #else /* IS_ENABLED(CONFIG_MTK_CMDQ) */ static inline int cmdq_dev_get_client_reg(struct device *dev, @@ -519,11 +511,6 @@ static inline int cmdq_pkt_eoc(struct cmdq_pkt *pkt) return -EINVAL; } -static inline int cmdq_pkt_finalize(struct cmdq_pkt *pkt) -{ - return -EINVAL; -} - #endif /* IS_ENABLED(CONFIG_MTK_CMDQ) */ #endif /* __MTK_CMDQ_H__ */ -- cgit v1.2.3 From 4e885fab7164689f031a6c73522a3d91674c5bdc Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:28 +0000 Subject: bpf: Add a __btf_get_by_fd helper Add a new helper to get a pointer to a struct btf from a file descriptor. This helper doesn't increase a refcnt. Add a comment explaining this and pointing to a corresponding function which does take a reference. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-2-aspsk@isovalent.com --- include/linux/bpf.h | 17 +++++++++++++++++ kernel/bpf/btf.c | 11 +++-------- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eaee2a819f4c..ac44b857b2f9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2301,6 +2301,14 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu); struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); +/* + * The __bpf_map_get() and __btf_get_by_fd() functions parse a file + * descriptor and return a corresponding map or btf object. + * Their names are double underscored to emphasize the fact that they + * do not increase refcnt. To also increase refcnt use corresponding + * bpf_map_get() and btf_get_by_fd() functions. + */ + static inline struct bpf_map *__bpf_map_get(struct fd f) { if (fd_empty(f)) @@ -2310,6 +2318,15 @@ static inline struct bpf_map *__bpf_map_get(struct fd f) return fd_file(f)->private_data; } +static inline struct btf *__btf_get_by_fd(struct fd f) +{ + if (fd_empty(f)) + return ERR_PTR(-EBADF); + if (unlikely(fd_file(f)->f_op != &btf_fops)) + return ERR_PTR(-EINVAL); + return fd_file(f)->private_data; +} + void bpf_map_inc(struct bpf_map *map); void bpf_map_inc_with_uref(struct bpf_map *map); struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e7a59e6462a9..2ef4fef71910 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7746,14 +7746,9 @@ struct btf *btf_get_by_fd(int fd) struct btf *btf; CLASS(fd, f)(fd); - if (fd_empty(f)) - return ERR_PTR(-EBADF); - - if (fd_file(f)->f_op != &btf_fops) - return ERR_PTR(-EINVAL); - - btf = fd_file(f)->private_data; - refcount_inc(&btf->refcnt); + btf = __btf_get_by_fd(f); + if (!IS_ERR(btf)) + refcount_inc(&btf->refcnt); return btf; } -- cgit v1.2.3 From 4d3ae294f900fb7232fb6c890dbd3176b8a5f121 Mon Sep 17 00:00:00 2001 From: Anton Protopopov Date: Fri, 13 Dec 2024 13:09:31 +0000 Subject: bpf: Add fd_array_cnt attribute for prog_load The fd_array attribute of the BPF_PROG_LOAD syscall may contain a set of file descriptors: maps or btfs. This field was introduced as a sparse array. Introduce a new attribute, fd_array_cnt, which, if present, indicates that the fd_array is a continuous array of the corresponding length. If fd_array_cnt is non-zero, then every map in the fd_array will be bound to the program, as if it was used by the program. This functionality is similar to the BPF_PROG_BIND_MAP syscall, but such maps can be used by the verifier during the program load. Signed-off-by: Anton Protopopov Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20241213130934.1087929-5-aspsk@isovalent.com --- include/uapi/linux/bpf.h | 10 ++++ kernel/bpf/syscall.c | 2 +- kernel/bpf/verifier.c | 106 +++++++++++++++++++++++++++++++++++------ tools/include/uapi/linux/bpf.h | 10 ++++ 4 files changed, 112 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4162afc6b5d0..2acf9b336371 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1573,6 +1573,16 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5684e8ce132d..4e88797fdbeb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2730,7 +2730,7 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD prog_token_fd +#define BPF_PROG_LOAD_LAST_FIELD fd_array_cnt static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 296765ffbdc5..c0e772996c52 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -19505,22 +19505,10 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return 0; } -/* Add map behind fd to used maps list, if it's not already there, and return - * its index. - * Returns <0 on error, or >= 0 index, on success. - */ -static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) +static int __add_used_map(struct bpf_verifier_env *env, struct bpf_map *map) { - CLASS(fd, f)(fd); - struct bpf_map *map; int i, err; - map = __bpf_map_get(f); - if (IS_ERR(map)) { - verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); - return PTR_ERR(map); - } - /* check whether we recorded this map already */ for (i = 0; i < env->used_map_cnt; i++) if (env->used_maps[i] == map) @@ -19551,6 +19539,24 @@ static int add_used_map_from_fd(struct bpf_verifier_env *env, int fd) return env->used_map_cnt - 1; } +/* Add map behind fd to used maps list, if it's not already there, and return + * its index. + * Returns <0 on error, or >= 0 index, on success. + */ +static int add_used_map(struct bpf_verifier_env *env, int fd) +{ + struct bpf_map *map; + CLASS(fd, f)(fd); + + map = __bpf_map_get(f); + if (IS_ERR(map)) { + verbose(env, "fd %d is not pointing to valid bpf_map\n", fd); + return PTR_ERR(map); + } + + return __add_used_map(env, map); +} + /* find and rewrite pseudo imm in ld_imm64 instructions: * * 1. if it accesses map FD, replace it with actual map pointer. @@ -19642,7 +19648,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) break; } - map_idx = add_used_map_from_fd(env, fd); + map_idx = add_used_map(env, fd); if (map_idx < 0) return map_idx; map = env->used_maps[map_idx]; @@ -22850,6 +22856,73 @@ struct btf *bpf_get_btf_vmlinux(void) return btf_vmlinux; } +/* + * The add_fd_from_fd_array() is executed only if fd_array_cnt is non-zero. In + * this case expect that every file descriptor in the array is either a map or + * a BTF. Everything else is considered to be trash. + */ +static int add_fd_from_fd_array(struct bpf_verifier_env *env, int fd) +{ + struct bpf_map *map; + struct btf *btf; + CLASS(fd, f)(fd); + int err; + + map = __bpf_map_get(f); + if (!IS_ERR(map)) { + err = __add_used_map(env, map); + if (err < 0) + return err; + return 0; + } + + btf = __btf_get_by_fd(f); + if (!IS_ERR(btf)) { + err = __add_used_btf(env, btf); + if (err < 0) + return err; + return 0; + } + + verbose(env, "fd %d is not pointing to valid bpf_map or btf\n", fd); + return PTR_ERR(map); +} + +static int process_fd_array(struct bpf_verifier_env *env, union bpf_attr *attr, bpfptr_t uattr) +{ + size_t size = sizeof(int); + int ret; + int fd; + u32 i; + + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); + + /* + * The only difference between old (no fd_array_cnt is given) and new + * APIs is that in the latter case the fd_array is expected to be + * continuous and is scanned for map fds right away + */ + if (!attr->fd_array_cnt) + return 0; + + /* Check for integer overflow */ + if (attr->fd_array_cnt >= (U32_MAX / size)) { + verbose(env, "fd_array_cnt is too big (%u)\n", attr->fd_array_cnt); + return -EINVAL; + } + + for (i = 0; i < attr->fd_array_cnt; i++) { + if (copy_from_bpfptr_offset(&fd, env->fd_array, i * size, size)) + return -EFAULT; + + ret = add_fd_from_fd_array(env, fd); + if (ret) + return ret; + } + + return 0; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size) { u64 start_time = ktime_get_ns(); @@ -22881,7 +22954,6 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; - env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); env->allow_ptr_leaks = bpf_allow_ptr_leaks(env->prog->aux->token); env->allow_uninit_stack = bpf_allow_uninit_stack(env->prog->aux->token); @@ -22904,6 +22976,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 if (ret) goto err_unlock; + ret = process_fd_array(env, attr, uattr); + if (ret) + goto skip_full_check; + mark_verifier_state_clean(env); if (IS_ERR(btf_vmlinux)) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4162afc6b5d0..2acf9b336371 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1573,6 +1573,16 @@ union bpf_attr { * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. */ __s32 prog_token_fd; + /* The fd_array_cnt can be used to pass the length of the + * fd_array array. In this case all the [map] file descriptors + * passed in this array will be bound to the program, even if + * the maps are not referenced directly. The functionality is + * similar to the BPF_PROG_BIND_MAP syscall, but maps can be + * used by the verifier during the program load. If provided, + * then the fd_array[0,...,fd_array_cnt-1] is expected to be + * continuous. + */ + __u32 fd_array_cnt; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ -- cgit v1.2.3 From 6037802bbae892f3ad0c7b4c4faee39b967e32b0 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Dec 2024 20:57:55 +0100 Subject: power: supply: core: implement extension API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various drivers, mostly in platform/x86 extend the ACPI battery driver with additional sysfs attributes to implement more UAPIs than are exposed through ACPI by using various side-channels, like WMI, nonstandard ACPI or EC communication. While the created sysfs attributes look similar to the attributes provided by the powersupply core, there are various deficiencies: * They don't show up in uevent payload. * They can't be queried with the standard in-kernel APIs. * They don't work with triggers. * The extending driver has to reimplement all of the parsing, formatting and sysfs display logic. * Writing a extension driver is completely different from writing a normal power supply driver. This extension API avoids all of these issues. An extension is just a "struct power_supply_ext" with the same kind of callbacks as in a normal "struct power_supply_desc". The API is meant to be used via battery_hook_register(), the same way as the current extensions. Signed-off-by: Thomas Weißschuh Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/20241211-power-supply-extensions-v6-1-9d9dc3f3d387@weissschuh.net Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply.h | 17 ++++ drivers/power/supply/power_supply_core.c | 162 ++++++++++++++++++++++++++++-- drivers/power/supply/power_supply_sysfs.c | 26 ++++- include/linux/power_supply.h | 33 ++++++ 4 files changed, 228 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/power/supply/power_supply.h b/drivers/power/supply/power_supply.h index 5dabbd895538..531785516d2a 100644 --- a/drivers/power/supply/power_supply.h +++ b/drivers/power/supply/power_supply.h @@ -9,6 +9,8 @@ * Modified: 2004, Oct Szabolcs Gyurko */ +#include + struct device; struct device_type; struct power_supply; @@ -17,6 +19,21 @@ extern int power_supply_property_is_writeable(struct power_supply *psy, enum power_supply_property psp); extern bool power_supply_has_property(struct power_supply *psy, enum power_supply_property psp); +extern bool power_supply_ext_has_property(const struct power_supply_ext *ext, + enum power_supply_property psp); + +struct power_supply_ext_registration { + struct list_head list_head; + const struct power_supply_ext *ext; + void *data; +}; + +/* Make sure that the macro is a single expression */ +#define power_supply_for_each_extension(pos, psy) \ + if ( ({ lockdep_assert_held(&(psy)->extensions_sem); 0; }) ) \ + ; \ + else \ + list_for_each_entry(pos, &(psy)->extensions, list_head) \ #ifdef CONFIG_SYSFS diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 1db27afb7017..29209c19ea67 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -80,6 +80,7 @@ static int __power_supply_changed_work(struct power_supply *pst, void *data) static void power_supply_changed_work(struct work_struct *work) { + int ret; unsigned long flags; struct power_supply *psy = container_of(work, struct power_supply, changed_work); @@ -87,6 +88,16 @@ static void power_supply_changed_work(struct work_struct *work) dev_dbg(&psy->dev, "%s\n", __func__); spin_lock_irqsave(&psy->changed_lock, flags); + + if (unlikely(psy->update_groups)) { + psy->update_groups = false; + spin_unlock_irqrestore(&psy->changed_lock, flags); + ret = sysfs_update_groups(&psy->dev.kobj, power_supply_dev_type.groups); + if (ret) + dev_warn(&psy->dev, "failed to update sysfs groups: %pe\n", ERR_PTR(ret)); + spin_lock_irqsave(&psy->changed_lock, flags); + } + /* * Check 'changed' here to avoid issues due to race between * power_supply_changed() and this routine. In worst case @@ -1208,15 +1219,34 @@ static bool psy_desc_has_property(const struct power_supply_desc *psy_desc, return found; } +bool power_supply_ext_has_property(const struct power_supply_ext *psy_ext, + enum power_supply_property psp) +{ + int i; + + for (i = 0; i < psy_ext->num_properties; i++) + if (psy_ext->properties[i] == psp) + return true; + + return false; +} + bool power_supply_has_property(struct power_supply *psy, enum power_supply_property psp) { + struct power_supply_ext_registration *reg; + if (psy_desc_has_property(psy->desc, psp)) return true; if (power_supply_battery_info_has_prop(psy->battery_info, psp)) return true; + power_supply_for_each_extension(reg, psy) { + if (power_supply_ext_has_property(reg->ext, psp)) + return true; + } + return false; } @@ -1224,12 +1254,21 @@ int power_supply_get_property(struct power_supply *psy, enum power_supply_property psp, union power_supply_propval *val) { + struct power_supply_ext_registration *reg; + if (atomic_read(&psy->use_cnt) <= 0) { if (!psy->initialized) return -EAGAIN; return -ENODEV; } + scoped_guard(rwsem_read, &psy->extensions_sem) { + power_supply_for_each_extension(reg, psy) { + if (power_supply_ext_has_property(reg->ext, psp)) + return reg->ext->get_property(psy, reg->ext, reg->data, psp, val); + } + } + if (psy_desc_has_property(psy->desc, psp)) return psy->desc->get_property(psy, psp, val); else if (power_supply_battery_info_has_prop(psy->battery_info, psp)) @@ -1243,7 +1282,24 @@ int power_supply_set_property(struct power_supply *psy, enum power_supply_property psp, const union power_supply_propval *val) { - if (atomic_read(&psy->use_cnt) <= 0 || !psy->desc->set_property) + struct power_supply_ext_registration *reg; + + if (atomic_read(&psy->use_cnt) <= 0) + return -ENODEV; + + scoped_guard(rwsem_read, &psy->extensions_sem) { + power_supply_for_each_extension(reg, psy) { + if (power_supply_ext_has_property(reg->ext, psp)) { + if (reg->ext->set_property) + return reg->ext->set_property(psy, reg->ext, reg->data, + psp, val); + else + return -ENODEV; + } + } + } + + if (!psy->desc->set_property) return -ENODEV; return psy->desc->set_property(psy, psp, val); @@ -1253,7 +1309,22 @@ EXPORT_SYMBOL_GPL(power_supply_set_property); int power_supply_property_is_writeable(struct power_supply *psy, enum power_supply_property psp) { - return psy->desc->property_is_writeable && psy->desc->property_is_writeable(psy, psp); + struct power_supply_ext_registration *reg; + + power_supply_for_each_extension(reg, psy) { + if (power_supply_ext_has_property(reg->ext, psp)) { + if (reg->ext->property_is_writeable) + return reg->ext->property_is_writeable(psy, reg->ext, + reg->data, psp); + else + return 0; + } + } + + if (!psy->desc->property_is_writeable) + return 0; + + return psy->desc->property_is_writeable(psy, psp); } void power_supply_external_power_changed(struct power_supply *psy) @@ -1272,6 +1343,76 @@ int power_supply_powers(struct power_supply *psy, struct device *dev) } EXPORT_SYMBOL_GPL(power_supply_powers); +static int power_supply_update_sysfs_and_hwmon(struct power_supply *psy) +{ + unsigned long flags; + + spin_lock_irqsave(&psy->changed_lock, flags); + psy->update_groups = true; + spin_unlock_irqrestore(&psy->changed_lock, flags); + + power_supply_changed(psy); + + power_supply_remove_hwmon_sysfs(psy); + return power_supply_add_hwmon_sysfs(psy); +} + +int power_supply_register_extension(struct power_supply *psy, const struct power_supply_ext *ext, + void *data) +{ + struct power_supply_ext_registration *reg; + size_t i; + int ret; + + if (!psy || !ext || !ext->properties || !ext->num_properties) + return -EINVAL; + + guard(rwsem_write)(&psy->extensions_sem); + + for (i = 0; i < ext->num_properties; i++) + if (power_supply_has_property(psy, ext->properties[i])) + return -EEXIST; + + reg = kmalloc(sizeof(*reg), GFP_KERNEL); + if (!reg) + return -ENOMEM; + + reg->ext = ext; + reg->data = data; + list_add(®->list_head, &psy->extensions); + + ret = power_supply_update_sysfs_and_hwmon(psy); + if (ret) + goto sysfs_hwmon_failed; + + return 0; + +sysfs_hwmon_failed: + list_del(®->list_head); + kfree(reg); + return ret; +} +EXPORT_SYMBOL_GPL(power_supply_register_extension); + +void power_supply_unregister_extension(struct power_supply *psy, const struct power_supply_ext *ext) +{ + struct power_supply_ext_registration *reg; + + guard(rwsem_write)(&psy->extensions_sem); + + power_supply_for_each_extension(reg, psy) { + if (reg->ext == ext) { + list_del(®->list_head); + kfree(reg); + power_supply_update_sysfs_and_hwmon(psy); + return; + } + } + + dev_warn(&psy->dev, "Trying to unregister invalid extension"); +} +EXPORT_SYMBOL_GPL(power_supply_unregister_extension); + static void power_supply_dev_release(struct device *dev) { struct power_supply *psy = to_power_supply(dev); @@ -1426,6 +1567,9 @@ __power_supply_register(struct device *parent, } spin_lock_init(&psy->changed_lock); + init_rwsem(&psy->extensions_sem); + INIT_LIST_HEAD(&psy->extensions); + rc = device_add(dev); if (rc) goto device_add_failed; @@ -1438,13 +1582,15 @@ __power_supply_register(struct device *parent, if (rc) goto register_thermal_failed; - rc = power_supply_create_triggers(psy); - if (rc) - goto create_triggers_failed; + scoped_guard(rwsem_read, &psy->extensions_sem) { + rc = power_supply_create_triggers(psy); + if (rc) + goto create_triggers_failed; - rc = power_supply_add_hwmon_sysfs(psy); - if (rc) - goto add_hwmon_sysfs_failed; + rc = power_supply_add_hwmon_sysfs(psy); + if (rc) + goto add_hwmon_sysfs_failed; + } /* * Update use_cnt after any uevents (most notably from device_add()). diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index e18f1ee53f21..b8f73bc89b05 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -299,6 +299,27 @@ static ssize_t power_supply_show_enum_with_available( return count; } +static ssize_t power_supply_show_charge_behaviour(struct device *dev, + struct power_supply *psy, + union power_supply_propval *value, + char *buf) +{ + struct power_supply_ext_registration *reg; + + scoped_guard(rwsem_read, &psy->extensions_sem) { + power_supply_for_each_extension(reg, psy) { + if (power_supply_ext_has_property(reg->ext, + POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR)) + return power_supply_charge_behaviour_show(dev, + reg->ext->charge_behaviours, + value->intval, buf); + } + } + + return power_supply_charge_behaviour_show(dev, psy->desc->charge_behaviours, + value->intval, buf); +} + static ssize_t power_supply_format_property(struct device *dev, bool uevent, struct device_attribute *attr, @@ -338,8 +359,7 @@ static ssize_t power_supply_format_property(struct device *dev, case POWER_SUPPLY_PROP_CHARGE_BEHAVIOUR: if (uevent) /* no possible values in uevents */ goto default_format; - ret = power_supply_charge_behaviour_show(dev, psy->desc->charge_behaviours, - value.intval, buf); + ret = power_supply_show_charge_behaviour(dev, psy, &value, buf); break; case POWER_SUPPLY_PROP_CHARGE_TYPES: if (uevent) /* no possible values in uevents */ @@ -422,6 +442,8 @@ static umode_t power_supply_attr_is_visible(struct kobject *kobj, if (attrno == POWER_SUPPLY_PROP_TYPE) return mode; + guard(rwsem_read)(&psy->extensions_sem); + if (power_supply_has_property(psy, attrno)) { if (power_supply_property_is_writeable(psy, attrno) > 0) mode |= S_IWUSR; diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 0d96657d1a2b..a877518cd963 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include @@ -283,6 +285,27 @@ struct power_supply_desc { int use_for_apm; }; +struct power_supply_ext { + u8 charge_behaviours; + const enum power_supply_property *properties; + size_t num_properties; + + int (*get_property)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp, + union power_supply_propval *val); + int (*set_property)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp, + const union power_supply_propval *val); + int (*property_is_writeable)(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data, + enum power_supply_property psp); +}; + struct power_supply { const struct power_supply_desc *desc; @@ -302,10 +325,13 @@ struct power_supply { struct delayed_work deferred_register_work; spinlock_t changed_lock; bool changed; + bool update_groups; bool initialized; bool removing; atomic_t use_cnt; struct power_supply_battery_info *battery_info; + struct rw_semaphore extensions_sem; /* protects "extensions" */ + struct list_head extensions; #ifdef CONFIG_THERMAL struct thermal_zone_device *tzd; struct thermal_cooling_device *tcd; @@ -882,6 +908,13 @@ devm_power_supply_register(struct device *parent, extern void power_supply_unregister(struct power_supply *psy); extern int power_supply_powers(struct power_supply *psy, struct device *dev); +extern int __must_check +power_supply_register_extension(struct power_supply *psy, + const struct power_supply_ext *ext, + void *data); +extern void power_supply_unregister_extension(struct power_supply *psy, + const struct power_supply_ext *ext); + #define to_power_supply(device) container_of(device, struct power_supply, dev) extern void *power_supply_get_drvdata(struct power_supply *psy); -- cgit v1.2.3 From 07d58e0a60f70b3cc176c9427d1ea856d1756820 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 7 Dec 2024 11:05:03 -0800 Subject: crypto: skcipher - remove support for physical address walks Since the physical address support in skcipher_walk is not used anymore, remove all the code associated with it. This includes: - The skcipher_walk_async() and skcipher_walk_complete() functions; - The SKCIPHER_WALK_PHYS flag and everything conditional on it; - The buffers, phys, and virt.page fields in struct skcipher_walk; - struct skcipher_walk_buffer. As a result, skcipher_walk now just supports virtual addresses. Physical address support in skcipher_walk is unneeded because drivers that need physical addresses just use the scatterlists directly. Signed-off-by: Eric Biggers Signed-off-by: Herbert Xu --- crypto/skcipher.c | 187 ++++++------------------------------- include/crypto/internal/skcipher.h | 12 --- 2 files changed, 26 insertions(+), 173 deletions(-) (limited to 'include') diff --git a/crypto/skcipher.c b/crypto/skcipher.c index f74e4d0d87a2..d5fe0eca3826 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -29,19 +28,10 @@ #define CRYPTO_ALG_TYPE_SKCIPHER_MASK 0x0000000e enum { - SKCIPHER_WALK_PHYS = 1 << 0, - SKCIPHER_WALK_SLOW = 1 << 1, - SKCIPHER_WALK_COPY = 1 << 2, - SKCIPHER_WALK_DIFF = 1 << 3, - SKCIPHER_WALK_SLEEP = 1 << 4, -}; - -struct skcipher_walk_buffer { - struct list_head entry; - struct scatter_walk dst; - unsigned int len; - u8 *data; - u8 buffer[]; + SKCIPHER_WALK_SLOW = 1 << 0, + SKCIPHER_WALK_COPY = 1 << 1, + SKCIPHER_WALK_DIFF = 1 << 2, + SKCIPHER_WALK_SLEEP = 1 << 3, }; static const struct crypto_type crypto_skcipher_type; @@ -95,8 +85,7 @@ static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize) addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = skcipher_get_spot(addr, bsize); - scatterwalk_copychunks(addr, &walk->out, bsize, - (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1); + scatterwalk_copychunks(addr, &walk->out, bsize, 1); return 0; } @@ -113,8 +102,7 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err) nbytes = walk->total - n; } - if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS | - SKCIPHER_WALK_SLOW | + if (likely(!(walk->flags & (SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY | SKCIPHER_WALK_DIFF)))) { unmap_src: @@ -162,9 +150,6 @@ finish: if (!((unsigned long)walk->buffer | (unsigned long)walk->page)) goto out; - if (walk->flags & SKCIPHER_WALK_PHYS) - goto out; - if (walk->iv != walk->oiv) memcpy(walk->oiv, walk->iv, walk->ivsize); if (walk->buffer != walk->page) @@ -177,97 +162,33 @@ out: } EXPORT_SYMBOL_GPL(skcipher_walk_done); -void skcipher_walk_complete(struct skcipher_walk *walk, int err) -{ - struct skcipher_walk_buffer *p, *tmp; - - list_for_each_entry_safe(p, tmp, &walk->buffers, entry) { - u8 *data; - - if (err) - goto done; - - data = p->data; - if (!data) { - data = PTR_ALIGN(&p->buffer[0], walk->alignmask + 1); - data = skcipher_get_spot(data, walk->stride); - } - - scatterwalk_copychunks(data, &p->dst, p->len, 1); - - if (offset_in_page(p->data) + p->len + walk->stride > - PAGE_SIZE) - free_page((unsigned long)p->data); - -done: - list_del(&p->entry); - kfree(p); - } - - if (!err && walk->iv != walk->oiv) - memcpy(walk->oiv, walk->iv, walk->ivsize); - if (walk->buffer != walk->page) - kfree(walk->buffer); - if (walk->page) - free_page((unsigned long)walk->page); -} -EXPORT_SYMBOL_GPL(skcipher_walk_complete); - -static void skcipher_queue_write(struct skcipher_walk *walk, - struct skcipher_walk_buffer *p) -{ - p->dst = walk->out; - list_add_tail(&p->entry, &walk->buffers); -} - static int skcipher_next_slow(struct skcipher_walk *walk, unsigned int bsize) { - bool phys = walk->flags & SKCIPHER_WALK_PHYS; unsigned alignmask = walk->alignmask; - struct skcipher_walk_buffer *p; unsigned a; unsigned n; u8 *buffer; - void *v; - - if (!phys) { - if (!walk->buffer) - walk->buffer = walk->page; - buffer = walk->buffer; - if (buffer) - goto ok; - } + + if (!walk->buffer) + walk->buffer = walk->page; + buffer = walk->buffer; + if (buffer) + goto ok; /* Start with the minimum alignment of kmalloc. */ a = crypto_tfm_ctx_alignment() - 1; n = bsize; - if (phys) { - /* Calculate the minimum alignment of p->buffer. */ - a &= (sizeof(*p) ^ (sizeof(*p) - 1)) >> 1; - n += sizeof(*p); - } - - /* Minimum size to align p->buffer by alignmask. */ + /* Minimum size to align buffer by alignmask. */ n += alignmask & ~a; - /* Minimum size to ensure p->buffer does not straddle a page. */ + /* Minimum size to ensure buffer does not straddle a page. */ n += (bsize - 1) & ~(alignmask | a); - v = kzalloc(n, skcipher_walk_gfp(walk)); - if (!v) + buffer = kzalloc(n, skcipher_walk_gfp(walk)); + if (!buffer) return skcipher_walk_done(walk, -ENOMEM); - - if (phys) { - p = v; - p->len = bsize; - skcipher_queue_write(walk, p); - buffer = p->buffer; - } else { - walk->buffer = v; - buffer = v; - } - + walk->buffer = buffer; ok: walk->dst.virt.addr = PTR_ALIGN(buffer, alignmask + 1); walk->dst.virt.addr = skcipher_get_spot(walk->dst.virt.addr, bsize); @@ -283,7 +204,6 @@ ok: static int skcipher_next_copy(struct skcipher_walk *walk) { - struct skcipher_walk_buffer *p; u8 *tmp = walk->page; skcipher_map_src(walk); @@ -292,24 +212,6 @@ static int skcipher_next_copy(struct skcipher_walk *walk) walk->src.virt.addr = tmp; walk->dst.virt.addr = tmp; - - if (!(walk->flags & SKCIPHER_WALK_PHYS)) - return 0; - - p = kmalloc(sizeof(*p), skcipher_walk_gfp(walk)); - if (!p) - return -ENOMEM; - - p->data = walk->page; - p->len = walk->nbytes; - skcipher_queue_write(walk, p); - - if (offset_in_page(walk->page) + walk->nbytes + walk->stride > - PAGE_SIZE) - walk->page = NULL; - else - walk->page += walk->nbytes; - return 0; } @@ -317,16 +219,10 @@ static int skcipher_next_fast(struct skcipher_walk *walk) { unsigned long diff; - walk->src.phys.page = scatterwalk_page(&walk->in); - walk->src.phys.offset = offset_in_page(walk->in.offset); - walk->dst.phys.page = scatterwalk_page(&walk->out); - walk->dst.phys.offset = offset_in_page(walk->out.offset); - - if (walk->flags & SKCIPHER_WALK_PHYS) - return 0; - - diff = walk->src.phys.offset - walk->dst.phys.offset; - diff |= walk->src.virt.page - walk->dst.virt.page; + diff = offset_in_page(walk->in.offset) - + offset_in_page(walk->out.offset); + diff |= (u8 *)scatterwalk_page(&walk->in) - + (u8 *)scatterwalk_page(&walk->out); skcipher_map_src(walk); walk->dst.virt.addr = walk->src.virt.addr; @@ -343,7 +239,6 @@ static int skcipher_walk_next(struct skcipher_walk *walk) { unsigned int bsize; unsigned int n; - int err; walk->flags &= ~(SKCIPHER_WALK_SLOW | SKCIPHER_WALK_COPY | SKCIPHER_WALK_DIFF); @@ -358,8 +253,7 @@ static int skcipher_walk_next(struct skcipher_walk *walk) return skcipher_walk_done(walk, -EINVAL); slow_path: - err = skcipher_next_slow(walk, bsize); - goto set_phys_lowmem; + return skcipher_next_slow(walk, bsize); } if (unlikely((walk->in.offset | walk->out.offset) & walk->alignmask)) { @@ -374,22 +268,12 @@ slow_path: walk->nbytes = min_t(unsigned, n, PAGE_SIZE - offset_in_page(walk->page)); walk->flags |= SKCIPHER_WALK_COPY; - err = skcipher_next_copy(walk); - goto set_phys_lowmem; + return skcipher_next_copy(walk); } walk->nbytes = n; return skcipher_next_fast(walk); - -set_phys_lowmem: - if (!err && (walk->flags & SKCIPHER_WALK_PHYS)) { - walk->src.phys.page = virt_to_page(walk->src.virt.addr); - walk->dst.phys.page = virt_to_page(walk->dst.virt.addr); - walk->src.phys.offset &= PAGE_SIZE - 1; - walk->dst.phys.offset &= PAGE_SIZE - 1; - } - return err; } static int skcipher_copy_iv(struct skcipher_walk *walk) @@ -407,14 +291,10 @@ static int skcipher_copy_iv(struct skcipher_walk *walk) /* Minimum size to align buffer by alignmask. */ size = alignmask & ~a; - if (walk->flags & SKCIPHER_WALK_PHYS) - size += ivsize; - else { - size += aligned_bs + ivsize; + size += aligned_bs + ivsize; - /* Minimum size to ensure buffer does not straddle a page. */ - size += (bs - 1) & ~(alignmask | a); - } + /* Minimum size to ensure buffer does not straddle a page. */ + size += (bs - 1) & ~(alignmask | a); walk->buffer = kmalloc(size, skcipher_walk_gfp(walk)); if (!walk->buffer) @@ -484,8 +364,6 @@ int skcipher_walk_virt(struct skcipher_walk *walk, might_sleep_if(req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP); - walk->flags &= ~SKCIPHER_WALK_PHYS; - err = skcipher_walk_skcipher(walk, req); walk->flags &= atomic ? ~SKCIPHER_WALK_SLEEP : ~0; @@ -494,17 +372,6 @@ int skcipher_walk_virt(struct skcipher_walk *walk, } EXPORT_SYMBOL_GPL(skcipher_walk_virt); -int skcipher_walk_async(struct skcipher_walk *walk, - struct skcipher_request *req) -{ - walk->flags |= SKCIPHER_WALK_PHYS; - - INIT_LIST_HEAD(&walk->buffers); - - return skcipher_walk_skcipher(walk, req); -} -EXPORT_SYMBOL_GPL(skcipher_walk_async); - static int skcipher_walk_aead_common(struct skcipher_walk *walk, struct aead_request *req, bool atomic) { @@ -518,8 +385,6 @@ static int skcipher_walk_aead_common(struct skcipher_walk *walk, if (unlikely(!walk->total)) return 0; - walk->flags &= ~SKCIPHER_WALK_PHYS; - scatterwalk_start(&walk->in, req->src); scatterwalk_start(&walk->out, req->dst); diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h index 7ae42afdcf3e..08d1e8c63afc 100644 --- a/include/crypto/internal/skcipher.h +++ b/include/crypto/internal/skcipher.h @@ -11,7 +11,6 @@ #include #include #include -#include #include /* @@ -58,12 +57,6 @@ struct crypto_lskcipher_spawn { struct skcipher_walk { union { struct { - struct page *page; - unsigned long offset; - } phys; - - struct { - u8 *page; void *addr; } virt; } src, dst; @@ -74,8 +67,6 @@ struct skcipher_walk { struct scatter_walk out; unsigned int total; - struct list_head buffers; - u8 *page; u8 *buffer; u8 *oiv; @@ -209,13 +200,10 @@ int skcipher_walk_done(struct skcipher_walk *walk, int err); int skcipher_walk_virt(struct skcipher_walk *walk, struct skcipher_request *req, bool atomic); -int skcipher_walk_async(struct skcipher_walk *walk, - struct skcipher_request *req); int skcipher_walk_aead_encrypt(struct skcipher_walk *walk, struct aead_request *req, bool atomic); int skcipher_walk_aead_decrypt(struct skcipher_walk *walk, struct aead_request *req, bool atomic); -void skcipher_walk_complete(struct skcipher_walk *walk, int err); static inline void skcipher_walk_abort(struct skcipher_walk *walk) { -- cgit v1.2.3 From 5feae3e79dbe2d357b223fc48ae907ba0aedb271 Mon Sep 17 00:00:00 2001 From: Igor Belwon Date: Mon, 9 Dec 2024 15:45:21 +0100 Subject: dt-bindings: clock: samsung: Add Exynos990 SoC CMU bindings Add dt-schema documentation for the Exynos990 SoC CMU. This clock management unit has a topmost block (CMU_TOP) that generates top clocks for other blocks. Currently the only other block implemented is CMU_HSI0, which provides clocks for the USB part of the SoC. Also, device-tree binding definitions added for these blocks: - CMU_TOP - CMU_HSI0 Reviewed-by: Krzysztof Kozlowski Signed-off-by: Igor Belwon Link: https://lore.kernel.org/r/20241209-exynos990-cmu-v4-1-57f07080f9e4@mentallysanemainliners.org Signed-off-by: Krzysztof Kozlowski --- .../bindings/clock/samsung,exynos990-clock.yaml | 121 +++++++++++ include/dt-bindings/clock/samsung,exynos990.h | 236 +++++++++++++++++++++ 2 files changed, 357 insertions(+) create mode 100644 Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml create mode 100644 include/dt-bindings/clock/samsung,exynos990.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml new file mode 100644 index 000000000000..9e7944b5f13b --- /dev/null +++ b/Documentation/devicetree/bindings/clock/samsung,exynos990-clock.yaml @@ -0,0 +1,121 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/clock/samsung,exynos990-clock.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Samsung Exynos990 SoC clock controller + +maintainers: + - Igor Belwon + - Chanwoo Choi + - Krzysztof Kozlowski + +description: | + Exynos990 clock controller is comprised of several CMU units, generating + clocks for different domains. Those CMU units are modeled as separate device + tree nodes, and might depend on each other. The root clock in that root tree + is an external clock: OSCCLK (26 MHz). This external clock must be defined + as a fixed-rate clock in dts. + + CMU_TOP is a top-level CMU, where all base clocks are prepared using PLLs and + dividers; all other clocks of function blocks (other CMUs) are usually + derived from CMU_TOP. + + Each clock is assigned an identifier and client nodes can use this identifier + to specify the clock which they consume. All clocks available for usage + in clock consumer nodes are defined as preprocessor macros in + 'include/dt-bindings/clock/samsung,exynos990.h' header. + +properties: + compatible: + enum: + - samsung,exynos990-cmu-hsi0 + - samsung,exynos990-cmu-top + + clocks: + minItems: 1 + maxItems: 5 + + clock-names: + minItems: 1 + maxItems: 5 + + "#clock-cells": + const: 1 + + reg: + maxItems: 1 + +required: + - compatible + - clocks + - clock-names + - "#clock-cells" + - reg + +allOf: + - if: + properties: + compatible: + contains: + const: samsung,exynos990-cmu-hsi0 + + then: + properties: + clocks: + items: + - description: External reference clock (26 MHz) + - description: CMU_HSI0 BUS clock (from CMU_TOP) + - description: CMU_HSI0 USB31DRD clock (from CMU_TOP) + - description: CMU_HSI0 USBDP_DEBUG clock (from CMU_TOP) + - description: CMU_HSI0 DPGTC clock (from CMU_TOP) + + clock-names: + items: + - const: oscclk + - const: bus + - const: usb31drd + - const: usbdp_debug + - const: dpgtc + + - if: + properties: + compatible: + contains: + const: samsung,exynos990-cmu-top + + then: + properties: + clocks: + items: + - description: External reference clock (26 MHz) + + clock-names: + items: + - const: oscclk + +additionalProperties: false + +examples: + - | + #include + + cmu_hsi0: clock-controller@10a00000 { + compatible = "samsung,exynos990-cmu-hsi0"; + reg = <0x10a00000 0x8000>; + #clock-cells = <1>; + + clocks = <&oscclk>, + <&cmu_top CLK_DOUT_CMU_HSI0_BUS>, + <&cmu_top CLK_DOUT_CMU_HSI0_USB31DRD>, + <&cmu_top CLK_DOUT_CMU_HSI0_USBDP_DEBUG>, + <&cmu_top CLK_DOUT_CMU_HSI0_DPGTC>; + clock-names = "oscclk", + "bus", + "usb31drd", + "usbdp_debug", + "dpgtc"; + }; + +... diff --git a/include/dt-bindings/clock/samsung,exynos990.h b/include/dt-bindings/clock/samsung,exynos990.h new file mode 100644 index 000000000000..307215a3f3ed --- /dev/null +++ b/include/dt-bindings/clock/samsung,exynos990.h @@ -0,0 +1,236 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (C) 2024 Igor Belwon + * + * Device Tree binding constants for Exynos990 clock controller. + */ + +#ifndef _DT_BINDINGS_CLOCK_EXYNOS_990_H +#define _DT_BINDINGS_CLOCK_EXYNOS_990_H + +/* CMU_TOP */ +#define CLK_FOUT_SHARED0_PLL 1 +#define CLK_FOUT_SHARED1_PLL 2 +#define CLK_FOUT_SHARED2_PLL 3 +#define CLK_FOUT_SHARED3_PLL 4 +#define CLK_FOUT_SHARED4_PLL 5 +#define CLK_FOUT_G3D_PLL 6 +#define CLK_FOUT_MMC_PLL 7 +#define CLK_MOUT_PLL_SHARED0 8 +#define CLK_MOUT_PLL_SHARED1 9 +#define CLK_MOUT_PLL_SHARED2 10 +#define CLK_MOUT_PLL_SHARED3 11 +#define CLK_MOUT_PLL_SHARED4 12 +#define CLK_MOUT_PLL_MMC 13 +#define CLK_MOUT_PLL_G3D 14 +#define CLK_MOUT_CMU_APM_BUS 15 +#define CLK_MOUT_CMU_AUD_CPU 16 +#define CLK_MOUT_CMU_BUS0_BUS 17 +#define CLK_MOUT_CMU_BUS1_BUS 18 +#define CLK_MOUT_CMU_BUS1_SSS 19 +#define CLK_MOUT_CMU_CIS_CLK0 20 +#define CLK_MOUT_CMU_CIS_CLK1 21 +#define CLK_MOUT_CMU_CIS_CLK2 22 +#define CLK_MOUT_CMU_CIS_CLK3 23 +#define CLK_MOUT_CMU_CIS_CLK4 24 +#define CLK_MOUT_CMU_CIS_CLK5 25 +#define CLK_MOUT_CMU_CMU_BOOST 26 +#define CLK_MOUT_CMU_CORE_BUS 27 +#define CLK_MOUT_CMU_CPUCL0_DBG_BUS 28 +#define CLK_MOUT_CMU_CPUCL0_SWITCH 29 +#define CLK_MOUT_CMU_CPUCL1_SWITCH 30 +#define CLK_MOUT_CMU_CPUCL2_BUSP 31 +#define CLK_MOUT_CMU_CPUCL2_SWITCH 32 +#define CLK_MOUT_CMU_CSIS_BUS 33 +#define CLK_MOUT_CMU_CSIS_OIS_MCU 34 +#define CLK_MOUT_CMU_DNC_BUS 35 +#define CLK_MOUT_CMU_DNC_BUSM 36 +#define CLK_MOUT_CMU_DNS_BUS 37 +#define CLK_MOUT_CMU_DPU 38 +#define CLK_MOUT_CMU_DPU_ALT 39 +#define CLK_MOUT_CMU_DSP_BUS 40 +#define CLK_MOUT_CMU_G2D_G2D 41 +#define CLK_MOUT_CMU_G2D_MSCL 42 +#define CLK_MOUT_CMU_HPM 43 +#define CLK_MOUT_CMU_HSI0_BUS 44 +#define CLK_MOUT_CMU_HSI0_DPGTC 45 +#define CLK_MOUT_CMU_HSI0_USB31DRD 46 +#define CLK_MOUT_CMU_HSI0_USBDP_DEBUG 47 +#define CLK_MOUT_CMU_HSI1_BUS 48 +#define CLK_MOUT_CMU_HSI1_MMC_CARD 49 +#define CLK_MOUT_CMU_HSI1_PCIE 50 +#define CLK_MOUT_CMU_HSI1_UFS_CARD 51 +#define CLK_MOUT_CMU_HSI1_UFS_EMBD 52 +#define CLK_MOUT_CMU_HSI2_BUS 53 +#define CLK_MOUT_CMU_HSI2_PCIE 54 +#define CLK_MOUT_CMU_IPP_BUS 55 +#define CLK_MOUT_CMU_ITP_BUS 56 +#define CLK_MOUT_CMU_MCSC_BUS 57 +#define CLK_MOUT_CMU_MCSC_GDC 58 +#define CLK_MOUT_CMU_CMU_BOOST_CPU 59 +#define CLK_MOUT_CMU_MFC0_MFC0 60 +#define CLK_MOUT_CMU_MFC0_WFD 61 +#define CLK_MOUT_CMU_MIF_BUSP 62 +#define CLK_MOUT_CMU_MIF_SWITCH 63 +#define CLK_MOUT_CMU_NPU_BUS 64 +#define CLK_MOUT_CMU_PERIC0_BUS 65 +#define CLK_MOUT_CMU_PERIC0_IP 66 +#define CLK_MOUT_CMU_PERIC1_BUS 67 +#define CLK_MOUT_CMU_PERIC1_IP 68 +#define CLK_MOUT_CMU_PERIS_BUS 69 +#define CLK_MOUT_CMU_SSP_BUS 70 +#define CLK_MOUT_CMU_TNR_BUS 71 +#define CLK_MOUT_CMU_VRA_BUS 72 +#define CLK_DOUT_CMU_APM_BUS 73 +#define CLK_DOUT_CMU_AUD_CPU 74 +#define CLK_DOUT_CMU_BUS0_BUS 75 +#define CLK_DOUT_CMU_BUS1_BUS 76 +#define CLK_DOUT_CMU_BUS1_SSS 77 +#define CLK_DOUT_CMU_CIS_CLK0 78 +#define CLK_DOUT_CMU_CIS_CLK1 79 +#define CLK_DOUT_CMU_CIS_CLK2 80 +#define CLK_DOUT_CMU_CIS_CLK3 81 +#define CLK_DOUT_CMU_CIS_CLK4 82 +#define CLK_DOUT_CMU_CIS_CLK5 83 +#define CLK_DOUT_CMU_CMU_BOOST 84 +#define CLK_DOUT_CMU_CORE_BUS 85 +#define CLK_DOUT_CMU_CPUCL0_DBG_BUS 86 +#define CLK_DOUT_CMU_CPUCL0_SWITCH 87 +#define CLK_DOUT_CMU_CPUCL1_SWITCH 88 +#define CLK_DOUT_CMU_CPUCL2_BUSP 89 +#define CLK_DOUT_CMU_CPUCL2_SWITCH 90 +#define CLK_DOUT_CMU_CSIS_BUS 91 +#define CLK_DOUT_CMU_CSIS_OIS_MCU 92 +#define CLK_DOUT_CMU_DNC_BUS 93 +#define CLK_DOUT_CMU_DNC_BUSM 94 +#define CLK_DOUT_CMU_DNS_BUS 95 +#define CLK_DOUT_CMU_DSP_BUS 96 +#define CLK_DOUT_CMU_G2D_G2D 97 +#define CLK_DOUT_CMU_G2D_MSCL 98 +#define CLK_DOUT_CMU_G3D_SWITCH 99 +#define CLK_DOUT_CMU_HPM 100 +#define CLK_DOUT_CMU_HSI0_BUS 101 +#define CLK_DOUT_CMU_HSI0_DPGTC 102 +#define CLK_DOUT_CMU_HSI0_USB31DRD 103 +#define CLK_DOUT_CMU_HSI0_USBDP_DEBUG 104 +#define CLK_DOUT_CMU_HSI1_BUS 105 +#define CLK_DOUT_CMU_HSI1_MMC_CARD 106 +#define CLK_DOUT_CMU_HSI1_PCIE 107 +#define CLK_DOUT_CMU_HSI1_UFS_CARD 108 +#define CLK_DOUT_CMU_HSI1_UFS_EMBD 109 +#define CLK_DOUT_CMU_HSI2_BUS 110 +#define CLK_DOUT_CMU_HSI2_PCIE 111 +#define CLK_DOUT_CMU_IPP_BUS 112 +#define CLK_DOUT_CMU_ITP_BUS 113 +#define CLK_DOUT_CMU_MCSC_BUS 114 +#define CLK_DOUT_CMU_MCSC_GDC 115 +#define CLK_DOUT_CMU_CMU_BOOST_CPU 116 +#define CLK_DOUT_CMU_MFC0_MFC0 117 +#define CLK_DOUT_CMU_MFC0_WFD 118 +#define CLK_DOUT_CMU_MIF_BUSP 119 +#define CLK_DOUT_CMU_NPU_BUS 120 +#define CLK_DOUT_CMU_OTP 121 +#define CLK_DOUT_CMU_PERIC0_BUS 122 +#define CLK_DOUT_CMU_PERIC0_IP 123 +#define CLK_DOUT_CMU_PERIC1_BUS 124 +#define CLK_DOUT_CMU_PERIC1_IP 125 +#define CLK_DOUT_CMU_PERIS_BUS 126 +#define CLK_DOUT_CMU_SSP_BUS 127 +#define CLK_DOUT_CMU_TNR_BUS 128 +#define CLK_DOUT_CMU_VRA_BUS 129 +#define CLK_DOUT_CMU_DPU 130 +#define CLK_DOUT_CMU_DPU_ALT 131 +#define CLK_DOUT_CMU_SHARED0_DIV2 132 +#define CLK_DOUT_CMU_SHARED0_DIV3 133 +#define CLK_DOUT_CMU_SHARED0_DIV4 134 +#define CLK_DOUT_CMU_SHARED1_DIV2 135 +#define CLK_DOUT_CMU_SHARED1_DIV3 136 +#define CLK_DOUT_CMU_SHARED1_DIV4 137 +#define CLK_DOUT_CMU_SHARED2_DIV2 138 +#define CLK_DOUT_CMU_SHARED4_DIV2 139 +#define CLK_DOUT_CMU_SHARED4_DIV3 140 +#define CLK_DOUT_CMU_SHARED4_DIV4 141 +#define CLK_GOUT_CMU_G3D_BUS 142 +#define CLK_GOUT_CMU_MIF_SWITCH 143 +#define CLK_GOUT_CMU_APM_BUS 144 +#define CLK_GOUT_CMU_AUD_CPU 145 +#define CLK_GOUT_CMU_BUS0_BUS 146 +#define CLK_GOUT_CMU_BUS1_BUS 147 +#define CLK_GOUT_CMU_BUS1_SSS 148 +#define CLK_GOUT_CMU_CIS_CLK0 149 +#define CLK_GOUT_CMU_CIS_CLK1 150 +#define CLK_GOUT_CMU_CIS_CLK2 151 +#define CLK_GOUT_CMU_CIS_CLK3 152 +#define CLK_GOUT_CMU_CIS_CLK4 153 +#define CLK_GOUT_CMU_CIS_CLK5 154 +#define CLK_GOUT_CMU_CORE_BUS 155 +#define CLK_GOUT_CMU_CPUCL0_DBG_BUS 156 +#define CLK_GOUT_CMU_CPUCL0_SWITCH 157 +#define CLK_GOUT_CMU_CPUCL1_SWITCH 158 +#define CLK_GOUT_CMU_CPUCL2_BUSP 159 +#define CLK_GOUT_CMU_CPUCL2_SWITCH 160 +#define CLK_GOUT_CMU_CSIS_BUS 161 +#define CLK_GOUT_CMU_CSIS_OIS_MCU 162 +#define CLK_GOUT_CMU_DNC_BUS 163 +#define CLK_GOUT_CMU_DNC_BUSM 164 +#define CLK_GOUT_CMU_DNS_BUS 165 +#define CLK_GOUT_CMU_DPU 166 +#define CLK_GOUT_CMU_DPU_BUS 167 +#define CLK_GOUT_CMU_DSP_BUS 168 +#define CLK_GOUT_CMU_G2D_G2D 169 +#define CLK_GOUT_CMU_G2D_MSCL 170 +#define CLK_GOUT_CMU_G3D_SWITCH 171 +#define CLK_GOUT_CMU_HPM 172 +#define CLK_GOUT_CMU_HSI0_BUS 173 +#define CLK_GOUT_CMU_HSI0_DPGTC 174 +#define CLK_GOUT_CMU_HSI0_USB31DRD 175 +#define CLK_GOUT_CMU_HSI0_USBDP_DEBUG 176 +#define CLK_GOUT_CMU_HSI1_BUS 177 +#define CLK_GOUT_CMU_HSI1_MMC_CARD 178 +#define CLK_GOUT_CMU_HSI1_PCIE 179 +#define CLK_GOUT_CMU_HSI1_UFS_CARD 180 +#define CLK_GOUT_CMU_HSI1_UFS_EMBD 181 +#define CLK_GOUT_CMU_HSI2_BUS 182 +#define CLK_GOUT_CMU_HSI2_PCIE 183 +#define CLK_GOUT_CMU_IPP_BUS 184 +#define CLK_GOUT_CMU_ITP_BUS 185 +#define CLK_GOUT_CMU_MCSC_BUS 186 +#define CLK_GOUT_CMU_MCSC_GDC 187 +#define CLK_GOUT_CMU_MFC0_MFC0 188 +#define CLK_GOUT_CMU_MFC0_WFD 189 +#define CLK_GOUT_CMU_MIF_BUSP 190 +#define CLK_GOUT_CMU_NPU_BUS 191 +#define CLK_GOUT_CMU_PERIC0_BUS 192 +#define CLK_GOUT_CMU_PERIC0_IP 193 +#define CLK_GOUT_CMU_PERIC1_BUS 194 +#define CLK_GOUT_CMU_PERIC1_IP 195 +#define CLK_GOUT_CMU_PERIS_BUS 196 +#define CLK_GOUT_CMU_SSP_BUS 197 +#define CLK_GOUT_CMU_TNR_BUS 198 +#define CLK_GOUT_CMU_VRA_BUS 199 + +/* CMU_HSI0 */ +#define CLK_MOUT_HSI0_BUS_USER 1 +#define CLK_MOUT_HSI0_USB31DRD_USER 2 +#define CLK_MOUT_HSI0_USBDP_DEBUG_USER 3 +#define CLK_MOUT_HSI0_DPGTC_USER 4 +#define CLK_GOUT_HSI0_DP_LINK_DP_GTC_CLK 5 +#define CLK_GOUT_HSI0_DP_LINK_PCLK 6 +#define CLK_GOUT_HSI0_D_TZPC_HSI0_PCLK 7 +#define CLK_GOUT_HSI0_LHM_AXI_P_HSI0_CLK 8 +#define CLK_GOUT_HSI0_PPMU_HSI0_BUS1_ACLK 9 +#define CLK_GOUT_HSI0_PPMU_HSI0_BUS1_PCLK 10 +#define CLK_GOUT_HSI0_CLK_HSI0_BUS_CLK 11 +#define CLK_GOUT_HSI0_SYSMMU_USB_CLK_S2 12 +#define CLK_GOUT_HSI0_SYSREG_HSI0_PCLK 13 +#define CLK_GOUT_HSI0_USB31DRD_ACLK_PHYCTRL 14 +#define CLK_GOUT_HSI0_USB31DRD_BUS_CLK_EARLY 15 +#define CLK_GOUT_HSI0_USB31DRD_USB31DRD_REF_CLK_40 16 +#define CLK_GOUT_HSI0_USB31DRD_USBDPPHY_REF_SOC_PLL 17 +#define CLK_GOUT_HSI0_USB31DRD_USBDPPHY_SCL_APB 18 +#define CLK_GOUT_HSI0_USB31DRD_USBPCS_APB_CLK 19 +#define CLK_GOUT_HSI0_VGEN_LITE_HSI0_CLK 20 +#define CLK_GOUT_HSI0_CMU_HSI0_PCLK 21 +#define CLK_GOUT_HSI0_XIU_D_HSI0_ACLK 22 + +#endif -- cgit v1.2.3 From 5119e6b44f8ada5f5cea19935a7f005fee062aef Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Wed, 11 Dec 2024 21:42:27 +0000 Subject: memory: omap-gpmc: deadcode a pair of functions gpmc_get_client_irq() last use was removed by commit ac28e47ccc3f ("ARM: OMAP2+: Remove legacy gpmc-nand.c") gpmc_ticks_to_ns() last use was removed by commit 2514830b8b8c ("ARM: OMAP2+: Remove gpmc-onenand") Remove them. gpmc_clk_ticks_to_ns() is now only used in some DEBUG code; move inside the ifdef to avoid unused warnings. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Roger Quadros Acked-by: Kevin Hilman Link: https://lore.kernel.org/r/20241211214227.107980-1-linux@treblig.org Signed-off-by: Krzysztof Kozlowski --- drivers/memory/omap-gpmc.c | 33 +++++++-------------------------- include/linux/omap-gpmc.h | 4 ---- 2 files changed, 7 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index c8a0d82f9c27..1bab3f88c1f0 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -358,17 +358,6 @@ static unsigned int gpmc_ps_to_ticks(unsigned int time_ps) return (time_ps + tick_ps - 1) / tick_ps; } -static unsigned int gpmc_clk_ticks_to_ns(unsigned int ticks, int cs, - enum gpmc_clk_domain cd) -{ - return ticks * gpmc_get_clk_period(cs, cd) / 1000; -} - -unsigned int gpmc_ticks_to_ns(unsigned int ticks) -{ - return gpmc_clk_ticks_to_ns(ticks, /* any CS */ 0, GPMC_CD_FCLK); -} - static unsigned int gpmc_ticks_to_ps(unsigned int ticks) { return ticks * gpmc_get_fclk_period(); @@ -415,6 +404,13 @@ static void gpmc_cs_bool_timings(int cs, const struct gpmc_bool_timings *p) } #ifdef CONFIG_OMAP_GPMC_DEBUG + +static unsigned int gpmc_clk_ticks_to_ns(unsigned int ticks, int cs, + enum gpmc_clk_domain cd) +{ + return ticks * gpmc_get_clk_period(cs, cd) / 1000; +} + /** * get_gpmc_timing_reg - read a timing parameter and print DTS settings for it. * @cs: Chip Select Region @@ -1295,21 +1291,6 @@ int gpmc_omap_onenand_set_timings(struct device *dev, int cs, int freq, } EXPORT_SYMBOL_GPL(gpmc_omap_onenand_set_timings); -int gpmc_get_client_irq(unsigned int irq_config) -{ - if (!gpmc_irq_domain) { - pr_warn("%s called before GPMC IRQ domain available\n", - __func__); - return 0; - } - - /* we restrict this to NAND IRQs only */ - if (irq_config >= GPMC_NR_NAND_IRQS) - return 0; - - return irq_create_mapping(gpmc_irq_domain, irq_config); -} - static int gpmc_irq_endis(unsigned long hwirq, bool endis) { u32 regval; diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index c9e3843d2dd5..263b915df1fb 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -66,10 +66,6 @@ extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct device_node; -extern int gpmc_get_client_irq(unsigned irq_config); - -extern unsigned int gpmc_ticks_to_ns(unsigned int ticks); - extern void gpmc_cs_write_reg(int cs, int idx, u32 val); extern int gpmc_calc_divider(unsigned int sync_clk); extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t, -- cgit v1.2.3 From 9698d5a4836549d394e6efd858b5200878c9f255 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:02:23 +0100 Subject: pidfs: rework inode number allocation Recently we received a patchset that aims to enable file handle encoding and decoding via name_to_handle_at(2) and open_by_handle_at(2). A crucical step in the patch series is how to go from inode number to struct pid without leaking information into unprivileged contexts. The issue is that in order to find a struct pid the pid number in the initial pid namespace must be encoded into the file handle via name_to_handle_at(2). This can be used by containers using a separate pid namespace to learn what the pid number of a given process in the initial pid namespace is. While this is a weak information leak it could be used in various exploits and in general is an ugly wart in the design. To solve this problem a new way is needed to lookup a struct pid based on the inode number allocated for that struct pid. The other part is to remove the custom inode number allocation on 32bit systems that is also an ugly wart that should go away. So, a new scheme is used that I was discusssing with Tejun some time back. A cyclic ida is used for the lower 32 bits and a the high 32 bits are used for the generation number. This gives a 64 bit inode number that is unique on both 32 bit and 64 bit. The lower 32 bit number is recycled slowly and can be used to lookup struct pids. Link: https://lore.kernel.org/r/20241129-work-pidfs-v2-1-61043d66fbce@kernel.org Reviewed-by: Jeff Layton Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/pidfs.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++- include/linux/pidfs.h | 2 ++ kernel/pid.c | 14 ++++----- 3 files changed, 86 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/fs/pidfs.c b/fs/pidfs.c index 618abb1fa1b8..abfe96be9ffe 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -23,6 +23,79 @@ #include "internal.h" #include "mount.h" +static DEFINE_IDR(pidfs_ino_idr); + +static u32 pidfs_ino_upper_32_bits = 0; + +#if BITS_PER_LONG == 32 +/* + * On 32 bit systems the lower 32 bits are the inode number and + * the higher 32 bits are the generation number. The starting + * value for the inode number and the generation number is one. + */ +static u32 pidfs_ino_lower_32_bits = 1; + +static inline unsigned long pidfs_ino(u64 ino) +{ + return lower_32_bits(ino); +} + +/* On 32 bit the generation number are the upper 32 bits. */ +static inline u32 pidfs_gen(u64 ino) +{ + return upper_32_bits(ino); +} + +#else + +static u32 pidfs_ino_lower_32_bits = 0; + +/* On 64 bit simply return ino. */ +static inline unsigned long pidfs_ino(u64 ino) +{ + return ino; +} + +/* On 64 bit the generation number is 1. */ +static inline u32 pidfs_gen(u64 ino) +{ + return 1; +} +#endif + +/* + * Construct an inode number for struct pid in a way that we can use the + * lower 32bit to lookup struct pid independent of any pid numbers that + * could be leaked into userspace (e.g., via file handle encoding). + */ +int pidfs_add_pid(struct pid *pid) +{ + u32 upper; + int lower; + + /* + * Inode numbering for pidfs start at 2. This avoids collisions + * with the root inode which is 1 for pseudo filesystems. + */ + lower = idr_alloc_cyclic(&pidfs_ino_idr, pid, 2, 0, GFP_ATOMIC); + if (lower >= 0 && lower < pidfs_ino_lower_32_bits) + pidfs_ino_upper_32_bits++; + upper = pidfs_ino_upper_32_bits; + pidfs_ino_lower_32_bits = lower; + if (lower < 0) + return lower; + + pid->ino = ((u64)upper << 32) | lower; + pid->stashed = NULL; + return 0; +} + +/* The idr number to remove is the lower 32 bits of the inode. */ +void pidfs_remove_pid(struct pid *pid) +{ + idr_remove(&pidfs_ino_idr, lower_32_bits(pid->ino)); +} + #ifdef CONFIG_PROC_FS /** * pidfd_show_fdinfo - print information about a pidfd @@ -346,7 +419,7 @@ static inline void pidfs_free_inum(unsigned long ino) #else static inline int pidfs_inum(struct pid *pid, unsigned long *ino) { - *ino = pid->ino; + *ino = pidfs_ino(pid->ino); return 0; } #define pidfs_free_inum(ino) ((void)(ino)) @@ -429,11 +502,14 @@ static const struct dentry_operations pidfs_dentry_operations = { static int pidfs_init_inode(struct inode *inode, void *data) { + const struct pid *pid = data; + inode->i_private = data; inode->i_flags |= S_PRIVATE; inode->i_mode |= S_IRWXU; inode->i_op = &pidfs_inode_operations; inode->i_fop = &pidfs_file_operations; + inode->i_generation = pidfs_gen(pid->ino); /* * Inode numbering for pidfs start at RESERVED_PIDS + 1. This * avoids collisions with the root inode which is 1 for pseudo diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 75bdf9807802..2958652bb108 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -4,5 +4,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); +int pidfs_add_pid(struct pid *pid); +void pidfs_remove_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ diff --git a/kernel/pid.c b/kernel/pid.c index 115448e89c3e..58567d6904b2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -64,11 +64,6 @@ int pid_max = PID_MAX_DEFAULT; int pid_max_min = RESERVED_PIDS + 1; int pid_max_max = PID_MAX_LIMIT; -/* - * Pseudo filesystems start inode numbering after one. We use Reserved - * PIDs as a natural offset. - */ -static u64 pidfs_ino = RESERVED_PIDS; /* * PID-map pages start out as NULL, they get allocated upon @@ -158,6 +153,7 @@ void free_pid(struct pid *pid) idr_remove(&ns->idr, upid->nr); } + pidfs_remove_pid(pid); spin_unlock_irqrestore(&pidmap_lock, flags); call_rcu(&pid->rcu, delayed_put_pid); @@ -273,22 +269,26 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, INIT_HLIST_HEAD(&pid->inodes); upid = pid->numbers + ns->level; + idr_preload(GFP_KERNEL); spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; - pid->stashed = NULL; - pid->ino = ++pidfs_ino; + retval = pidfs_add_pid(pid); + if (retval) + goto out_unlock; for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); upid->ns->pid_allocated++; } spin_unlock_irq(&pidmap_lock); + idr_preload_end(); return pid; out_unlock: spin_unlock_irq(&pidmap_lock); + idr_preload_end(); put_pid_ns(ns); out_free: -- cgit v1.2.3 From d2ab36bb115b720c9c738184d4007e1ca01c53da Mon Sep 17 00:00:00 2001 From: Erin Shepherd Date: Fri, 29 Nov 2024 14:38:00 +0100 Subject: pseudofs: add support for export_ops Pseudo-filesystems might reasonably wish to implement the export ops (particularly for name_to_handle_at/open_by_handle_at); plumb this through pseudo_fs_context Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Erin Shepherd Link: https://lore.kernel.org/r/20241113-pidfs_fh-v2-1-9a4d28155a37@e43.eu Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-1-87d803a42495@kernel.org Signed-off-by: Christian Brauner --- fs/libfs.c | 1 + include/linux/pseudo_fs.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/fs/libfs.c b/fs/libfs.c index 748ac5923154..2890a9c4a414 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -673,6 +673,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc) s->s_blocksize_bits = PAGE_SHIFT; s->s_magic = ctx->magic; s->s_op = ctx->ops ?: &simple_super_operations; + s->s_export_op = ctx->eops; s->s_xattr = ctx->xattr; s->s_time_gran = 1; root = new_inode(s); diff --git a/include/linux/pseudo_fs.h b/include/linux/pseudo_fs.h index 730f77381d55..2503f7625d65 100644 --- a/include/linux/pseudo_fs.h +++ b/include/linux/pseudo_fs.h @@ -5,6 +5,7 @@ struct pseudo_fs_context { const struct super_operations *ops; + const struct export_operations *eops; const struct xattr_handler * const *xattr; const struct dentry_operations *dops; unsigned long magic; -- cgit v1.2.3 From 50166d57ea8c5042ecba0ee22532617d72ed085a Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:02 +0100 Subject: exportfs: add open method This allows filesystems such as pidfs to provide their custom open. Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-3-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fhandle.c | 7 ++++++- include/linux/exportfs.h | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/fs/fhandle.c b/fs/fhandle.c index c00d88fb14e1..f0b818f08aaa 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -401,6 +401,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, long retval = 0; struct path path __free(path_put) = {}; struct file *file; + const struct export_operations *eops; retval = handle_to_path(mountdirfd, ufh, &path, open_flag); if (retval) @@ -410,7 +411,11 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, if (fd < 0) return fd; - file = file_open_root(&path, "", open_flag, 0); + eops = path.mnt->mnt_sb->s_export_op; + if (eops->open) + file = eops->open(&path, open_flag); + else + file = file_open_root(&path, "", open_flag, 0); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 4cc8801e50e3..c69b79b64466 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -10,6 +10,7 @@ struct inode; struct iomap; struct super_block; struct vfsmount; +struct path; /* limit the handle size to NFSv4 handle size now */ #define MAX_HANDLE_SZ 128 @@ -225,6 +226,9 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * open: + * Allow filesystems to specify a custom open function. + * * commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * @@ -251,6 +255,7 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ #define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */ -- cgit v1.2.3 From 0203b485d26d5b403ff4ed21e4cc85ba9ec0fe67 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 9 Oct 2024 16:42:37 -0700 Subject: torture: Add dowarn argument to torture_sched_setaffinity() Current use cases of torture_sched_setaffinity() are well served by its unconditional warning on error. However, an upcoming use case for a preemption kthread needs to avoid warnings that might otherwise arise when that kthread attempted to bind itself to a CPU on its way offline. This commit therefore adds a dowarn argument that, when false, suppresses the warning. Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/torture.h | 2 +- kernel/locking/locktorture.c | 6 +++--- kernel/rcu/rcutorture.c | 2 +- kernel/rcu/update.c | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/torture.h b/include/linux/torture.h index c2e979f82f8d..0134e7221cae 100644 --- a/include/linux/torture.h +++ b/include/linux/torture.h @@ -130,7 +130,7 @@ void _torture_stop_kthread(char *m, struct task_struct **tp); #endif #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); #endif #endif /* __LINUX_TORTURE_H */ diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index de95ec07e477..cc33470f4de9 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -106,7 +106,7 @@ static const struct kernel_param_ops lt_bind_ops = { module_param_cb(bind_readers, <_bind_ops, &bind_readers, 0644); module_param_cb(bind_writers, <_bind_ops, &bind_writers, 0644); -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn); static struct task_struct *stats_task; static struct task_struct **writer_tasks; @@ -1358,7 +1358,7 @@ static int __init lock_torture_init(void) if (torture_init_error(firsterr)) goto unwind; if (cpumask_nonempty(bind_writers)) - torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers); + torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers, true); create_reader: if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress)) @@ -1369,7 +1369,7 @@ static int __init lock_torture_init(void) if (torture_init_error(firsterr)) goto unwind; if (cpumask_nonempty(bind_readers)) - torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers); + torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers, true); } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 612d27690335..908506b68c41 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -857,7 +857,7 @@ static void synchronize_rcu_trivial(void) int cpu; for_each_online_cpu(cpu) { - torture_sched_setaffinity(current->pid, cpumask_of(cpu)); + torture_sched_setaffinity(current->pid, cpumask_of(cpu), true); WARN_ON_ONCE(raw_smp_processor_id() != cpu); } } diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f8436969e0c8..c912b594ba98 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -527,12 +527,12 @@ EXPORT_SYMBOL_GPL(do_trace_rcu_torture_read); #if IS_ENABLED(CONFIG_RCU_TORTURE_TEST) || IS_MODULE(CONFIG_RCU_TORTURE_TEST) || IS_ENABLED(CONFIG_LOCK_TORTURE_TEST) || IS_MODULE(CONFIG_LOCK_TORTURE_TEST) /* Get rcutorture access to sched_setaffinity(). */ -long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask) +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask, bool dowarn) { int ret; ret = sched_setaffinity(pid, in_mask); - WARN_ONCE(ret, "%s: sched_setaffinity(%d) returned %d\n", __func__, pid, ret); + WARN_ONCE(dowarn && ret, "%s: sched_setaffinity(%d) returned %d\n", __func__, pid, ret); return ret; } EXPORT_SYMBOL_GPL(torture_sched_setaffinity); -- cgit v1.2.3 From 868dc3cd1105bd328be864bf2c409891438df44a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 18 Nov 2024 07:15:58 +0100 Subject: thermal: core: Add stub for thermal_zone_device_update() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To simplify the !CONFIG_THERMAL case in the hwmon core, add a !CONFIG_THERMAL stub for thermal_zone_device_update(). Signed-off-by: Thomas Weißschuh Reviewed-by: Guenter Roeck Acked-by: Rafael J. Wysocki Signed-off-by: Guenter Roeck --- include/linux/thermal.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 754802478b96..69f9bedd0ee8 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -295,6 +295,10 @@ static inline struct thermal_zone_device *thermal_tripless_zone_device_register( static inline void thermal_zone_device_unregister(struct thermal_zone_device *tz) { } +static inline void thermal_zone_device_update(struct thermal_zone_device *tz, + enum thermal_notify_event event) +{ } + static inline struct thermal_cooling_device * thermal_cooling_device_register(const char *type, void *devdata, const struct thermal_cooling_device_ops *ops) -- cgit v1.2.3 From f40452577557caf0e5d0ff182da8479c3d492ac5 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Mon, 2 Dec 2024 11:28:00 +0100 Subject: hwmon: (pmbus/core) improve handling of write protected regulators Writing PMBus protected registers does succeed from the smbus perspective, even if the write is ignored by the device and a communication fault is raised. This fault will silently be caught and cleared by pmbus irq if one has been registered. This means that the regulator call may return succeed although the operation was ignored. With this change, the operation which are not supported will be properly flagged as such and the regulator framework won't even try to execute them. Signed-off-by: Jerome Brunet [groeck: Adjust to EXPORT_SYMBOL_NS_GPL API change] Signed-off-by: Guenter Roeck --- Documentation/hwmon/pmbus-core.rst | 14 ++++++++++ drivers/hwmon/pmbus/pmbus.h | 4 +++ drivers/hwmon/pmbus/pmbus_core.c | 52 +++++++++++++++++++++++++++++++++----- include/linux/pmbus.h | 14 ++++++++++ 4 files changed, 78 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/Documentation/hwmon/pmbus-core.rst b/Documentation/hwmon/pmbus-core.rst index 686a00265bf7..0a251960f891 100644 --- a/Documentation/hwmon/pmbus-core.rst +++ b/Documentation/hwmon/pmbus-core.rst @@ -312,6 +312,10 @@ currently provides a flags field with four bits used:: #define PMBUS_USE_COEFFICIENTS_CMD BIT(5) + #define PMBUS_OP_PROTECTED BIT(6) + + #define PMBUS_VOUT_PROTECTED BIT(7) + struct pmbus_platform_data { u32 flags; /* Device specific flags */ @@ -373,3 +377,13 @@ PMBUS_USE_COEFFICIENTS_CMD When this flag is set the PMBus core driver will use the COEFFICIENTS register to initialize the coefficients for the direct mode format. + +PMBUS_OP_PROTECTED + +Set if the chip OPERATION command is protected and protection is not +determined by the standard WRITE_PROTECT command. + +PMBUS_VOUT_PROTECTED + +Set if the chip VOUT_COMMAND command is protected and protection is not +determined by the standard WRITE_PROTECT command. diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index d605412a3173..ddb19c9726d6 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -487,6 +487,8 @@ struct pmbus_driver_info { /* Regulator ops */ extern const struct regulator_ops pmbus_regulator_ops; +int pmbus_regulator_init_cb(struct regulator_dev *rdev, + struct regulator_config *config); /* Macros for filling in array of struct regulator_desc */ #define PMBUS_REGULATOR_STEP(_name, _id, _voltages, _step, _min_uV) \ @@ -501,6 +503,7 @@ extern const struct regulator_ops pmbus_regulator_ops; .n_voltages = _voltages, \ .uV_step = _step, \ .min_uV = _min_uV, \ + .init_cb = pmbus_regulator_init_cb, \ } #define PMBUS_REGULATOR(_name, _id) PMBUS_REGULATOR_STEP(_name, _id, 0, 0, 0) @@ -516,6 +519,7 @@ extern const struct regulator_ops pmbus_regulator_ops; .n_voltages = _voltages, \ .uV_step = _step, \ .min_uV = _min_uV, \ + .init_cb = pmbus_regulator_init_cb, \ } #define PMBUS_REGULATOR_ONE(_name) PMBUS_REGULATOR_STEP_ONE(_name, 0, 0, 0) diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index a1375cb6b648..5976dd819b3c 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -2665,6 +2665,30 @@ static void pmbus_remove_pec(void *dev) device_remove_file(dev, &dev_attr_pec); } +static void pmbus_init_wp(struct i2c_client *client, struct pmbus_data *data) +{ + int ret; + + ret = _pmbus_read_byte_data(client, -1, PMBUS_WRITE_PROTECT); + if (ret < 0) + return; + + switch (ret & PB_WP_ANY) { + case PB_WP_ALL: + data->flags |= PMBUS_OP_PROTECTED; + fallthrough; + case PB_WP_OP: + data->flags |= PMBUS_VOUT_PROTECTED; + fallthrough; + case PB_WP_VOUT: + data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK; + break; + + default: + break; + } +} + static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, struct pmbus_driver_info *info) { @@ -2718,12 +2742,8 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data, * faults, and we should not try it. Also, in that case, writes into * limit registers need to be disabled. */ - if (!(data->flags & PMBUS_NO_WRITE_PROTECT)) { - ret = _pmbus_read_byte_data(client, -1, PMBUS_WRITE_PROTECT); - - if (ret > 0 && (ret & PB_WP_ANY)) - data->flags |= PMBUS_WRITE_PROTECTED | PMBUS_SKIP_STATUS_CHECK; - } + if (!(data->flags & PMBUS_NO_WRITE_PROTECT)) + pmbus_init_wp(client, data); ret = i2c_smbus_read_byte_data(client, PMBUS_REVISION); if (ret >= 0) @@ -3183,8 +3203,12 @@ static int pmbus_regulator_list_voltage(struct regulator_dev *rdev, { struct device *dev = rdev_get_dev(rdev); struct i2c_client *client = to_i2c_client(dev->parent); + struct pmbus_data *data = i2c_get_clientdata(client); int val, low, high; + if (data->flags & PMBUS_VOUT_PROTECTED) + return 0; + if (selector >= rdev->desc->n_voltages || selector < rdev->desc->linear_min_sel) return -EINVAL; @@ -3219,6 +3243,22 @@ const struct regulator_ops pmbus_regulator_ops = { }; EXPORT_SYMBOL_NS_GPL(pmbus_regulator_ops, "PMBUS"); +int pmbus_regulator_init_cb(struct regulator_dev *rdev, + struct regulator_config *config) +{ + struct pmbus_data *data = config->driver_data; + struct regulation_constraints *constraints = rdev->constraints; + + if (data->flags & PMBUS_OP_PROTECTED) + constraints->valid_ops_mask &= ~REGULATOR_CHANGE_STATUS; + + if (data->flags & PMBUS_VOUT_PROTECTED) + constraints->valid_ops_mask &= ~REGULATOR_CHANGE_VOLTAGE; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(pmbus_regulator_init_cb, "PMBUS"); + static int pmbus_regulator_register(struct pmbus_data *data) { struct device *dev = data->dev; diff --git a/include/linux/pmbus.h b/include/linux/pmbus.h index fa9f08164c36..884040e1383b 100644 --- a/include/linux/pmbus.h +++ b/include/linux/pmbus.h @@ -73,6 +73,20 @@ */ #define PMBUS_USE_COEFFICIENTS_CMD BIT(5) +/* + * PMBUS_OP_PROTECTED + * Set if the chip OPERATION command is protected and protection is not + * determined by the standard WRITE_PROTECT command. + */ +#define PMBUS_OP_PROTECTED BIT(6) + +/* + * PMBUS_VOUT_PROTECTED + * Set if the chip VOUT_COMMAND command is protected and protection is not + * determined by the standard WRITE_PROTECT command. + */ +#define PMBUS_VOUT_PROTECTED BIT(7) + struct pmbus_platform_data { u32 flags; /* Device specific flags */ -- cgit v1.2.3 From 0f38c06cab7712fc82c314fe4264a8897f3e6365 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 28 Oct 2024 13:07:11 -0700 Subject: rcutorture: Check preemption for failing reader This commit checks to see if the RCU reader has been preempted within its read-side critical section for RCU flavors supporting this notion (currently only preemptible RCU). If such a preemption occurred, then this is printed at the end of the "Failure/close-call rcutorture reader segments" list at the end of the rcutorture run. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Paul E. McKenney Cc: Frederic Weisbecker Tested-by: kernel test robot Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/rcupdate_wait.h | 11 +++++++++++ kernel/rcu/rcutorture.c | 11 +++++++++++ 2 files changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 303ab9bee155..f9bed3d3f78d 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -65,4 +65,15 @@ static inline void cond_resched_rcu(void) #endif } +// Has the current task blocked within its current RCU read-side +// critical section? +static inline bool has_rcu_reader_blocked(void) +{ +#ifdef CONFIG_PREEMPT_RCU + return !list_empty(¤t->rcu_node_entry); +#else + return false; +#endif +} + #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 658ac46581d8..9b81e21c75d1 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -267,6 +267,7 @@ struct rt_read_seg { static int err_segs_recorded; static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS]; static int rt_read_nsegs; +static int rt_read_preempted; static const char *rcu_torture_writer_state_getname(void) { @@ -394,6 +395,7 @@ struct rcu_torture_ops { void (*get_gp_data)(int *flags, unsigned long *gp_seq); void (*gp_slow_register)(atomic_t *rgssp); void (*gp_slow_unregister)(atomic_t *rgssp); + bool (*reader_blocked)(void); long cbflood_max; int irq_capable; int can_boost; @@ -587,6 +589,9 @@ static struct rcu_torture_ops rcu_ops = { .get_gp_data = rcutorture_get_gp_data, .gp_slow_register = rcu_gp_slow_register, .gp_slow_unregister = rcu_gp_slow_unregister, + .reader_blocked = IS_ENABLED(CONFIG_RCU_TORTURE_TEST_LOG_CPU) + ? has_rcu_reader_blocked + : NULL, .irq_capable = 1, .can_boost = IS_ENABLED(CONFIG_RCU_BOOST), .extendables = RCUTORTURE_MAX_EXTEND, @@ -2035,6 +2040,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) int newstate; struct rcu_torture *p; int pipe_count; + bool preempted = false; int readstate = 0; struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } }; struct rt_read_seg *rtrsp = &rtseg[0]; @@ -2100,6 +2106,8 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) rcu_torture_writer_state, cpumask_pr_args(cpu_online_mask)); } + if (cur_ops->reader_blocked) + preempted = cur_ops->reader_blocked(); rcutorture_one_extend(&readstate, 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially @@ -2112,6 +2120,7 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++) err_segs[i++] = *rtrsp1; rt_read_nsegs = i; + rt_read_preempted = preempted; } return true; @@ -3569,6 +3578,8 @@ rcu_torture_cleanup(void) pr_cont("\n"); } + if (rt_read_preempted) + pr_alert("\tReader was preempted.\n"); } if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); -- cgit v1.2.3 From 0fef924e3918e72768357a220c84e6b4dd2b6180 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 14 Nov 2024 11:11:18 -0800 Subject: rcutorture: Use symbols for SRCU reader flavors This commit converts rcutorture.c values for the reader_flavor module parameter from hexadecimal to the SRCU_READ_FLAVOR_* C-preprocessor macros. The actual modprobe or kernel-boot-parameter values for read_flavor must still be entered in hexadecimal. Link: https://lore.kernel.org/all/c48c9dca-fe07-4833-acaa-28c827e5a79e@amd.com/ Suggested-by: Neeraj Upadhyay Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcu.h | 6 ++++++ include/linux/srcutree.h | 6 +----- kernel/rcu/rcutorture.c | 14 +++++++------- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..da8224d0f71c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,6 +43,12 @@ int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Values for SRCU Tree srcu_data ->srcu_reader_flavor, but also used by rcutorture. */ +#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). +#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). +#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). +#define SRCU_READ_FLAVOR_ALL 0x7 // All of the above. + #ifdef CONFIG_TINY_SRCU #include #elif defined(CONFIG_TREE_SRCU) diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..80016bbed672 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -26,6 +26,7 @@ struct srcu_data { atomic_long_t srcu_lock_count[2]; /* Locks per CPU. */ atomic_long_t srcu_unlock_count[2]; /* Unlocks per CPU. */ int srcu_reader_flavor; /* Reader flavor for srcu_struct structure? */ + /* Values: SRCU_READ_FLAVOR_.* */ /* Update-side state. */ spinlock_t __private lock ____cacheline_internodealigned_in_smp; @@ -43,11 +44,6 @@ struct srcu_data { struct srcu_struct *ssp; }; -/* Values for ->srcu_reader_flavor. */ -#define SRCU_READ_FLAVOR_NORMAL 0x1 // srcu_read_lock(). -#define SRCU_READ_FLAVOR_NMI 0x2 // srcu_read_lock_nmisafe(). -#define SRCU_READ_FLAVOR_LITE 0x4 // srcu_read_lock_lite(). - /* * Node in SRCU combining tree, similar in function to rcu_data. */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 41b661bf000a..d26fb1d33ed9 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -121,7 +121,7 @@ torture_param(int, preempt_duration, 0, "Preemption duration (ms), zero to disab torture_param(int, preempt_interval, MSEC_PER_SEC, "Interval between preemptions (ms)"); torture_param(int, read_exit_delay, 13, "Delay between read-then-exit episodes (s)"); torture_param(int, read_exit_burst, 16, "# of read-then-exit bursts per episode, zero to disable"); -torture_param(int, reader_flavor, 0x1, "Reader flavors to use, one per bit."); +torture_param(int, reader_flavor, SRCU_READ_FLAVOR_NORMAL, "Reader flavors to use, one per bit."); torture_param(int, shuffle_interval, 3, "Number of seconds between shuffles"); torture_param(int, shutdown_secs, 0, "Shutdown time (s), <= zero to disable."); torture_param(int, stall_cpu, 0, "Stall duration (s), zero to disable."); @@ -679,17 +679,17 @@ static int srcu_torture_read_lock(void) int idx; int ret = 0; - if ((reader_flavor & 0x1) || !(reader_flavor & 0x7)) { + if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) { idx = srcu_read_lock(srcu_ctlp); WARN_ON_ONCE(idx & ~0x1); ret += idx; } - if (reader_flavor & 0x2) { + if (reader_flavor & SRCU_READ_FLAVOR_NMI) { idx = srcu_read_lock_nmisafe(srcu_ctlp); WARN_ON_ONCE(idx & ~0x1); ret += idx << 1; } - if (reader_flavor & 0x4) { + if (reader_flavor & SRCU_READ_FLAVOR_LITE) { idx = srcu_read_lock_lite(srcu_ctlp); WARN_ON_ONCE(idx & ~0x1); ret += idx << 2; @@ -719,11 +719,11 @@ srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp) static void srcu_torture_read_unlock(int idx) { WARN_ON_ONCE((reader_flavor && (idx & ~reader_flavor)) || (!reader_flavor && (idx & ~0x1))); - if (reader_flavor & 0x4) + if (reader_flavor & SRCU_READ_FLAVOR_LITE) srcu_read_unlock_lite(srcu_ctlp, (idx & 0x4) >> 2); - if (reader_flavor & 0x2) + if (reader_flavor & SRCU_READ_FLAVOR_NMI) srcu_read_unlock_nmisafe(srcu_ctlp, (idx & 0x2) >> 1); - if ((reader_flavor & 0x1) || !(reader_flavor & 0x7)) + if ((reader_flavor & SRCU_READ_FLAVOR_NORMAL) || !(reader_flavor & SRCU_READ_FLAVOR_ALL)) srcu_read_unlock(srcu_ctlp, idx & 0x1); } -- cgit v1.2.3 From d465492a224b2409508224cf6970d7b97e2285cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 21 Oct 2024 15:09:39 -0700 Subject: srcu: Guarantee non-negative return value from srcu_read_lock() For almost 20 years, the int return value from srcu_read_lock() has been always either zero or one. This commit therefore documents the fact that it will be non-negative, and does the same for the underlying __srcu_read_lock(). [ paulmck: Apply Andrii Nakryiko feedback. ] Signed-off-by: Paul E. McKenney Acked-by: Andrii Nakryiko Acked-by: Peter Zijlstra (Intel) Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcu.h | 15 ++++++++------- kernel/rcu/srcutree.c | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 08339eb8a01c..abaddd7e6ddf 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -232,13 +232,14 @@ static inline int srcu_read_lock_held(const struct srcu_struct *ssp) * a mutex that is held elsewhere while calling synchronize_srcu() or * synchronize_srcu_expedited(). * - * The return value from srcu_read_lock() must be passed unaltered - * to the matching srcu_read_unlock(). Note that srcu_read_lock() and - * the matching srcu_read_unlock() must occur in the same context, for - * example, it is illegal to invoke srcu_read_unlock() in an irq handler - * if the matching srcu_read_lock() was invoked in process context. Or, - * for that matter to invoke srcu_read_unlock() from one task and the - * matching srcu_read_lock() from another. + * The return value from srcu_read_lock() is guaranteed to be + * non-negative. This value must be passed unaltered to the matching + * srcu_read_unlock(). Note that srcu_read_lock() and the matching + * srcu_read_unlock() must occur in the same context, for example, it is + * illegal to invoke srcu_read_unlock() in an irq handler if the matching + * srcu_read_lock() was invoked in process context. Or, for that matter to + * invoke srcu_read_unlock() from one task and the matching srcu_read_lock() + * from another. */ static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index 5e2e53464794..26ef58b481aa 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -738,7 +738,8 @@ EXPORT_SYMBOL_GPL(__srcu_check_read_flavor); /* * Counts the new reader in the appropriate per-CPU element of the * srcu_struct. - * Returns an index that must be passed to the matching srcu_read_unlock(). + * Returns a guaranteed non-negative index that must be passed to the + * matching __srcu_read_unlock(). */ int __srcu_read_lock(struct srcu_struct *ssp) { -- cgit v1.2.3 From cfb07b07dda2a17feed96c80c5af85937fcd2e9c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 12 Nov 2024 16:53:53 -0800 Subject: srcu: Fix typo s/srcu_check_read_flavor()/__srcu_check_read_flavor()/ This commit fixes a typo in which a comment needed to have been updated from srcu_check_read_flavor() to __srcu_check_read_flavor(). Reported-by: Neeraj Upadhyay Closes: https://lore.kernel.org/all/b75d1fcd-6fcd-4619-bb5c-507fa599ee28@amd.com/ Signed-off-by: Paul E. McKenney Signed-off-by: Uladzislau Rezki (Sony) --- include/linux/srcutree.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 490aeecc6bb4..4e69f88bcab9 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -258,7 +258,7 @@ static inline void srcu_check_read_flavor_lite(struct srcu_struct *ssp) if (likely(READ_ONCE(sdp->srcu_reader_flavor) & SRCU_READ_FLAVOR_LITE)) return; - // Note that the cmpxchg() in srcu_check_read_flavor() is fully ordered. + // Note that the cmpxchg() in __srcu_check_read_flavor() is fully ordered. __srcu_check_read_flavor(ssp, SRCU_READ_FLAVOR_LITE); } -- cgit v1.2.3 From 288a2cabcf6bb35532e8b2708829bdc2b85bc690 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 11 Dec 2024 20:57:58 +0100 Subject: power: supply: core: add UAPI to discover currently used extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Userspace wants to now about the used power supply extensions, for example to handle a device extended by a certain extension differently or to discover information about the extending device. Add a sysfs directory to the power supply device. This directory contains links which are named after the used extension and point to the device implementing that extension. Signed-off-by: Thomas Weißschuh Reviewed-by: Armin Wolf Link: https://lore.kernel.org/r/20241211-power-supply-extensions-v6-4-9d9dc3f3d387@weissschuh.net Signed-off-by: Sebastian Reichel --- Documentation/ABI/testing/sysfs-class-power | 9 +++++++++ drivers/power/supply/cros_charge-control.c | 5 ++++- drivers/power/supply/power_supply.h | 2 ++ drivers/power/supply/power_supply_core.c | 19 +++++++++++++++++-- drivers/power/supply/power_supply_sysfs.c | 10 ++++++++++ drivers/power/supply/test_power.c | 4 +++- include/linux/power_supply.h | 2 ++ 7 files changed, 47 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index 74050dfb5fc0..cb53dde20a80 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -813,3 +813,12 @@ Description: Access: Read Valid values: 1-31 + +What: /sys/class/power_supply//extensions/ +Date: March 2025 +Contact: linux-pm@vger.kernel.org +Description: + Reports the extensions registered to the power supply. + Each entry is a link to the device which registered the extension. + + Access: Read diff --git a/drivers/power/supply/cros_charge-control.c b/drivers/power/supply/cros_charge-control.c index fb4af232721d..02d5bdbe2e8d 100644 --- a/drivers/power/supply/cros_charge-control.c +++ b/drivers/power/supply/cros_charge-control.c @@ -31,6 +31,7 @@ */ struct cros_chctl_priv { + struct device *dev; struct cros_ec_device *cros_ec; struct acpi_battery_hook battery_hook; struct power_supply *hooked_battery; @@ -202,6 +203,7 @@ static int cros_chctl_psy_prop_is_writeable(struct power_supply *psy, }; \ \ static const struct power_supply_ext _name = { \ + .name = "cros-charge-control", \ .properties = _name ## _props, \ .num_properties = ARRAY_SIZE(_name ## _props), \ .charge_behaviours = EC_CHARGE_CONTROL_BEHAVIOURS, \ @@ -233,7 +235,7 @@ static int cros_chctl_add_battery(struct power_supply *battery, struct acpi_batt return 0; priv->hooked_battery = battery; - return power_supply_register_extension(battery, priv->psy_ext, priv); + return power_supply_register_extension(battery, priv->psy_ext, priv->dev, priv); } static int cros_chctl_remove_battery(struct power_supply *battery, struct acpi_battery_hook *hook) @@ -299,6 +301,7 @@ static int cros_chctl_probe(struct platform_device *pdev) dev_dbg(dev, "Command version: %u\n", (unsigned int)priv->cmd_version); + priv->dev = dev; priv->cros_ec = cros_ec; if (priv->cmd_version == 1) diff --git a/drivers/power/supply/power_supply.h b/drivers/power/supply/power_supply.h index 531785516d2a..9ed749cd0936 100644 --- a/drivers/power/supply/power_supply.h +++ b/drivers/power/supply/power_supply.h @@ -25,6 +25,7 @@ extern bool power_supply_ext_has_property(const struct power_supply_ext *ext, struct power_supply_ext_registration { struct list_head list_head; const struct power_supply_ext *ext; + struct device *dev; void *data; }; @@ -39,6 +40,7 @@ struct power_supply_ext_registration { extern void __init power_supply_init_attrs(void); extern int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env); +extern const struct attribute_group power_supply_extension_group; extern const struct attribute_group *power_supply_attr_groups[]; #else diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 29209c19ea67..76ebaef403ed 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -1358,17 +1358,21 @@ static int power_supply_update_sysfs_and_hwmon(struct power_supply *psy) } int power_supply_register_extension(struct power_supply *psy, const struct power_supply_ext *ext, - void *data) + struct device *dev, void *data) { struct power_supply_ext_registration *reg; size_t i; int ret; - if (!psy || !ext || !ext->properties || !ext->num_properties) + if (!psy || !dev || !ext || !ext->name || !ext->properties || !ext->num_properties) return -EINVAL; guard(rwsem_write)(&psy->extensions_sem); + power_supply_for_each_extension(reg, psy) + if (strcmp(ext->name, reg->ext->name) == 0) + return -EEXIST; + for (i = 0; i < ext->num_properties; i++) if (power_supply_has_property(psy, ext->properties[i])) return -EEXIST; @@ -1378,9 +1382,15 @@ int power_supply_register_extension(struct power_supply *psy, const struct power return -ENOMEM; reg->ext = ext; + reg->dev = dev; reg->data = data; list_add(®->list_head, &psy->extensions); + ret = sysfs_add_link_to_group(&psy->dev.kobj, power_supply_extension_group.name, + &dev->kobj, ext->name); + if (ret) + goto sysfs_link_failed; + ret = power_supply_update_sysfs_and_hwmon(psy); if (ret) goto sysfs_hwmon_failed; @@ -1388,6 +1398,8 @@ int power_supply_register_extension(struct power_supply *psy, const struct power return 0; sysfs_hwmon_failed: + sysfs_remove_link_from_group(&psy->dev.kobj, power_supply_extension_group.name, ext->name); +sysfs_link_failed: list_del(®->list_head); kfree(reg); return ret; @@ -1403,6 +1415,9 @@ void power_supply_unregister_extension(struct power_supply *psy, const struct po power_supply_for_each_extension(reg, psy) { if (reg->ext == ext) { list_del(®->list_head); + sysfs_remove_link_from_group(&psy->dev.kobj, + power_supply_extension_group.name, + reg->ext->name); kfree(reg); power_supply_update_sysfs_and_hwmon(psy); return; diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index b8f73bc89b05..261bf7bf6e22 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -458,8 +458,18 @@ static const struct attribute_group power_supply_attr_group = { .is_visible = power_supply_attr_is_visible, }; +static struct attribute *power_supply_extension_attrs[] = { + NULL +}; + +const struct attribute_group power_supply_extension_group = { + .name = "extensions", + .attrs = power_supply_extension_attrs, +}; + const struct attribute_group *power_supply_attr_groups[] = { &power_supply_attr_group, + &power_supply_extension_group, NULL }; diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c index 66f9ef52e0f3..2a975a110f48 100644 --- a/drivers/power/supply/test_power.c +++ b/drivers/power/supply/test_power.c @@ -293,6 +293,7 @@ static int test_power_battery_extproperty_is_writeable(struct power_supply *psy, } static const struct power_supply_ext test_power_battery_ext = { + .name = "test_power", .properties = test_power_battery_extprops, .num_properties = ARRAY_SIZE(test_power_battery_extprops), .get_property = test_power_battery_extget_property, @@ -307,7 +308,8 @@ static void test_power_configure_battery_extension(bool enable) psy = test_power_supplies[TEST_BATTERY]; if (enable) { - if (power_supply_register_extension(psy, &test_power_battery_ext, NULL)) { + if (power_supply_register_extension(psy, &test_power_battery_ext, &psy->dev, + NULL)) { pr_err("registering battery extension failed\n"); return; } diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index a877518cd963..c3ce9f2b17d4 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -286,6 +286,7 @@ struct power_supply_desc { }; struct power_supply_ext { + const char *const name; u8 charge_behaviours; const enum power_supply_property *properties; size_t num_properties; @@ -911,6 +912,7 @@ extern int power_supply_powers(struct power_supply *psy, struct device *dev); extern int __must_check power_supply_register_extension(struct power_supply *psy, const struct power_supply_ext *ext, + struct device *dev, void *data); extern void power_supply_unregister_extension(struct power_supply *psy, const struct power_supply_ext *ext); -- cgit v1.2.3 From 2c2b61d2138f472e50b5531ec0cb4a1485837e21 Mon Sep 17 00:00:00 2001 From: Yuyang Huang Date: Wed, 11 Dec 2024 17:22:41 +0900 Subject: netlink: add IGMP/MLD join/leave notifications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change introduces netlink notifications for multicast address changes. The following features are included: * Addition and deletion of multicast addresses are reported using RTM_NEWMULTICAST and RTM_DELMULTICAST messages with AF_INET and AF_INET6. * Two new notification groups: RTNLGRP_IPV4_MCADDR and RTNLGRP_IPV6_MCADDR are introduced for receiving these events. This change allows user space applications (e.g., ip monitor) to efficiently track multicast group memberships by listening for netlink events. Previously, applications relied on inefficient polling of procfs, introducing delays. With netlink notifications, applications receive realtime updates on multicast group membership changes, enabling more precise metrics collection and system monitoring.  This change also unlocks the potential for implementing a wide range of sophisticated multicast related features in user space by allowing applications to combine kernel provided multicast address information with user space data and communicate decisions back to the kernel for more fine grained control. This mechanism can be used for various purposes, including multicast filtering, IGMP/MLD offload, and IGMP/MLD snooping. Cc: Maciej Żenczykowski Cc: Lorenzo Colitti Co-developed-by: Patrick Ruddy Signed-off-by: Patrick Ruddy Link: https://lore.kernel.org/r/20180906091056.21109-1-pruddy@vyatta.att-mail.com Signed-off-by: Yuyang Huang Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 ++ include/net/addrconf.h | 21 ++++++++++++++ include/uapi/linux/rtnetlink.h | 10 ++++++- net/ipv4/igmp.c | 64 ++++++++++++++++++++++++++++++++++++++++++ net/ipv6/addrconf.c | 29 ++++++------------- net/ipv6/mcast.c | 39 +++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 5171231f70a8..073b30a9b850 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -87,6 +87,8 @@ struct ip_mc_list { char loaded; unsigned char gsquery; /* check source marks? */ unsigned char crcount; + unsigned long mca_cstamp; + unsigned long mca_tstamp; struct rcu_head rcu; }; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 363dd63babe7..58337898fa21 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -88,6 +88,23 @@ struct ifa6_config { u16 scope; }; +enum addr_type_t { + UNICAST_ADDR, + MULTICAST_ADDR, + ANYCAST_ADDR, +}; + +struct inet6_fill_args { + u32 portid; + u32 seq; + int event; + unsigned int flags; + int netnsid; + int ifindex; + enum addr_type_t type; + bool force_rt_scope_universe; +}; + int addrconf_init(void); void addrconf_cleanup(void); @@ -525,4 +542,8 @@ int if6_proc_init(void); void if6_proc_exit(void); #endif +int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, + struct inet6_fill_args *args); + #endif diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index db7254d52d93..eccc0e7dcb7d 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -93,7 +93,11 @@ enum { RTM_NEWPREFIX = 52, #define RTM_NEWPREFIX RTM_NEWPREFIX - RTM_GETMULTICAST = 58, + RTM_NEWMULTICAST = 56, +#define RTM_NEWMULTICAST RTM_NEWMULTICAST + RTM_DELMULTICAST, +#define RTM_DELMULTICAST RTM_DELMULTICAST + RTM_GETMULTICAST, #define RTM_GETMULTICAST RTM_GETMULTICAST RTM_GETANYCAST = 62, @@ -774,6 +778,10 @@ enum rtnetlink_groups { #define RTNLGRP_TUNNEL RTNLGRP_TUNNEL RTNLGRP_STATS, #define RTNLGRP_STATS RTNLGRP_STATS + RTNLGRP_IPV4_MCADDR, +#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR + RTNLGRP_IPV6_MCADDR, +#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6a238398acc9..8a370ef37d3f 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -88,6 +88,8 @@ #include #include +#include +#include #include #include #include @@ -1430,6 +1432,63 @@ static void ip_mc_hash_remove(struct in_device *in_dev, *mc_hash = im->next_hash; } +static int inet_fill_ifmcaddr(struct sk_buff *skb, struct net_device *dev, + const struct ip_mc_list *im, int event) +{ + struct ifa_cacheinfo ci; + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct ifaddrmsg), 0); + if (!nlh) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_INET; + ifm->ifa_prefixlen = 32; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + ifm->ifa_index = dev->ifindex; + + ci.cstamp = (READ_ONCE(im->mca_cstamp) - INITIAL_JIFFIES) * 100UL / HZ; + ci.tstamp = ci.cstamp; + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + + if (nla_put_in_addr(skb, IFA_MULTICAST, im->multiaddr) < 0 || + nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci) < 0) { + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; + } + + nlmsg_end(skb, nlh); + return 0; +} + +static void inet_ifmcaddr_notify(struct net_device *dev, + const struct ip_mc_list *im, int event) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(sizeof(__be32)), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet_fill_ifmcaddr(skb, dev, im, event); + if (err < 0) { + WARN_ON_ONCE(err == -EMSGSIZE); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV4_MCADDR, err); +} /* * A socket has joined a multicast group on device dev. @@ -1473,6 +1532,8 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, im->interface = in_dev; in_dev_hold(in_dev); im->multiaddr = addr; + im->mca_cstamp = jiffies; + im->mca_tstamp = im->mca_cstamp; /* initial mode is (EX, empty) */ im->sfmode = mode; im->sfcount[mode] = 1; @@ -1492,6 +1553,7 @@ static void ____ip_mc_inc_group(struct in_device *in_dev, __be32 addr, igmpv3_del_delrec(in_dev, im); #endif igmp_group_added(im); + inet_ifmcaddr_notify(in_dev->dev, im, RTM_NEWMULTICAST); if (!in_dev->dead) ip_rt_multicast_event(in_dev); out: @@ -1705,6 +1767,8 @@ void __ip_mc_dec_group(struct in_device *in_dev, __be32 addr, gfp_t gfp) *ip = i->next_rcu; in_dev->mc_count--; __igmp_group_dropped(i, gfp); + inet_ifmcaddr_notify(in_dev->dev, i, + RTM_DELMULTICAST); ip_mc_clear_src(i); if (!in_dev->dead) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0e765466d7f7..2e2684886953 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5127,22 +5127,6 @@ static inline int inet6_ifaddr_msgsize(void) + nla_total_size(4) /* IFA_RT_PRIORITY */; } -enum addr_type_t { - UNICAST_ADDR, - MULTICAST_ADDR, - ANYCAST_ADDR, -}; - -struct inet6_fill_args { - u32 portid; - u32 seq; - int event; - unsigned int flags; - int netnsid; - int ifindex; - enum addr_type_t type; -}; - static int inet6_fill_ifaddr(struct sk_buff *skb, const struct inet6_ifaddr *ifa, struct inet6_fill_args *args) @@ -5221,15 +5205,16 @@ error: return -EMSGSIZE; } -static int inet6_fill_ifmcaddr(struct sk_buff *skb, - const struct ifmcaddr6 *ifmca, - struct inet6_fill_args *args) +int inet6_fill_ifmcaddr(struct sk_buff *skb, + const struct ifmcaddr6 *ifmca, + struct inet6_fill_args *args) { int ifindex = ifmca->idev->dev->ifindex; u8 scope = RT_SCOPE_UNIVERSE; struct nlmsghdr *nlh; - if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) + if (!args->force_rt_scope_universe && + ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, args->portid, args->seq, args->event, @@ -5254,6 +5239,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, nlmsg_end(skb, nlh); return 0; } +EXPORT_SYMBOL(inet6_fill_ifmcaddr); static int inet6_fill_ifacaddr(struct sk_buff *skb, const struct ifacaddr6 *ifaca, @@ -5418,6 +5404,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb, .flags = NLM_F_MULTI, .netnsid = -1, .type = type, + .force_rt_scope_universe = false, }; struct { unsigned long ifindex; @@ -5546,6 +5533,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, .event = RTM_NEWADDR, .flags = 0, .netnsid = -1, + .force_rt_scope_universe = false, }; struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; @@ -5617,6 +5605,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) .event = event, .flags = 0, .netnsid = -1, + .force_rt_scope_universe = false, }; int err = -ENOBUFS; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ca8692d565d..587831c148de 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -33,8 +33,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -47,6 +49,7 @@ #include #include +#include #include #include @@ -901,6 +904,39 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, return mc; } +static void inet6_ifmcaddr_notify(struct net_device *dev, + const struct ifmcaddr6 *ifmca, int event) +{ + struct inet6_fill_args fillargs = { + .portid = 0, + .seq = 0, + .event = event, + .flags = 0, + .netnsid = -1, + .force_rt_scope_universe = true, + }; + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOMEM; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(16), GFP_ATOMIC); + if (!skb) + goto error; + + err = inet6_fill_ifmcaddr(skb, ifmca, &fillargs); + if (err < 0) { + WARN_ON_ONCE(err == -EMSGSIZE); + nlmsg_free(skb); + goto error; + } + + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MCADDR, NULL, GFP_ATOMIC); + return; +error: + rtnl_set_sk_err(net, RTNLGRP_IPV6_MCADDR, err); +} + /* * device multicast group inc (add if not found) */ @@ -948,6 +984,7 @@ static int __ipv6_dev_mc_inc(struct net_device *dev, mld_del_delrec(idev, mc); igmp6_group_added(mc); + inet6_ifmcaddr_notify(dev, mc, RTM_NEWMULTICAST); mutex_unlock(&idev->mc_lock); ma_put(mc); return 0; @@ -977,6 +1014,8 @@ int __ipv6_dev_mc_dec(struct inet6_dev *idev, const struct in6_addr *addr) *map = ma->next; igmp6_group_dropped(ma); + inet6_ifmcaddr_notify(idev->dev, ma, + RTM_DELMULTICAST); ip6_mc_clear_src(ma); mutex_unlock(&idev->mc_lock); -- cgit v1.2.3 From 2ef6fc99e0d922a54073e7b6d6465c62f4d3b62b Mon Sep 17 00:00:00 2001 From: Thiébaud Weksteen Date: Thu, 5 Dec 2024 12:21:00 +1100 Subject: selinux: add netlink nlmsg_type audit message MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new audit message type to capture nlmsg-related information. This is similar to LSM_AUDIT_DATA_IOCTL_OP which was added for the other SELinux extended permission (ioctl). Adding a new type is preferred to adding to the existing lsm_network_audit structure which contains irrelevant information for the netlink sockets (i.e., dport, sport). Signed-off-by: Thiébaud Weksteen [PM: change "nlnk-msgtype" to "nl-msgtype" as discussed] Signed-off-by: Paul Moore --- include/linux/lsm_audit.h | 2 ++ security/lsm_audit.c | 3 +++ security/selinux/hooks.c | 4 ++-- 3 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 97a8b21eb033..69d2b7bc00ed 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -77,6 +77,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_LOCKDOWN 15 #define LSM_AUDIT_DATA_NOTIFICATION 16 #define LSM_AUDIT_DATA_ANONINODE 17 +#define LSM_AUDIT_DATA_NLMSGTYPE 18 union { struct path path; struct dentry *dentry; @@ -98,6 +99,7 @@ struct common_audit_data { struct lsm_ibendport_audit *ibendport; int reason; const char *anonclass; + u16 nlmsg_type; } u; /* this union contains LSM specific data */ union { diff --git a/security/lsm_audit.c b/security/lsm_audit.c index 9a8352972086..b2f565c0990a 100644 --- a/security/lsm_audit.c +++ b/security/lsm_audit.c @@ -425,6 +425,9 @@ static void dump_common_audit_data(struct audit_buffer *ab, case LSM_AUDIT_DATA_ANONINODE: audit_log_format(ab, " anonclass=%s", a->u.anonclass); break; + case LSM_AUDIT_DATA_NLMSGTYPE: + audit_log_format(ab, " nl-msgtype=%hu", a->u.nlmsg_type); + break; } /* switch (a->type) */ } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5e5f3398f39d..617f54abb640 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5939,14 +5939,14 @@ static int nlmsg_sock_has_extended_perms(struct sock *sk, u32 perms, u16 nlmsg_t { struct sk_security_struct *sksec = sk->sk_security; struct common_audit_data ad; - struct lsm_network_audit net; u8 driver; u8 xperm; if (sock_skip_has_perm(sksec->sid)) return 0; - ad_net_init_from_sk(&ad, &net, sk); + ad.type = LSM_AUDIT_DATA_NLMSGTYPE; + ad.u.nlmsg_type = nlmsg_type; driver = nlmsg_type >> 8; xperm = nlmsg_type & 0xff; -- cgit v1.2.3 From aeb3ec99026979287266e4b5a1194789c1488c1a Mon Sep 17 00:00:00 2001 From: Rongwei Liu Date: Fri, 13 Dec 2024 00:13:20 +0200 Subject: net/mlx5: Add device cap abs_native_port_num When the abs_native_port_num is set, the native_port_num reported by the device may not be continuous and bigger than the num_lag_ports. Signed-off-by: Rongwei Liu Reviewed-by: Shay Drory Reviewed-by: Saeed Mahameed Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20241212221329.961628-2-tariqt@nvidia.com Signed-off-by: Leon Romanovsky --- include/linux/mlx5/mlx5_ifc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 5451ff1d4356..43b3cb4bf8d1 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1599,7 +1599,8 @@ enum { struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_0[0x6]; u8 page_request_disable[0x1]; - u8 reserved_at_7[0x9]; + u8 abs_native_port_num[0x1]; + u8 reserved_at_8[0x8]; u8 shared_object_to_user_object_allowed[0x1]; u8 reserved_at_13[0xe]; u8 vhca_resource_manager[0x1]; -- cgit v1.2.3 From 0471b1093e3a5d702ba2bf5987c35ee0e2336855 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:04 +0100 Subject: tls: block decryption when a rekey is pending When a TLS handshake record carrying a KeyUpdate message is received, all subsequent records will be encrypted with a new key. We need to stop decrypting incoming records with the old key, and wait until userspace provides a new key. Make a note of this in the RX context just after decrypting that record, and stop recvmsg/splice calls with EKEYEXPIRED until the new key is available. key_update_pending can't be combined with the existing bitfield, because we will read it locklessly in ->poll. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/net/tls.h | 3 +++ net/tls/tls_sw.c | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 61fef2880114..857340338b69 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -59,6 +59,8 @@ struct tls_rec; #define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type) +#define TLS_HANDSHAKE_KEYUPDATE 24 /* rfc8446 B.3: Key update */ + #define TLS_AAD_SPACE_SIZE 13 #define TLS_MAX_IV_SIZE 16 @@ -130,6 +132,7 @@ struct tls_sw_context_rx { u8 async_capable:1; u8 zc_capable:1; u8 reader_contended:1; + bool key_update_pending; struct tls_strparser strp; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index bbf26cc4f6ee..3dcf8ee60fea 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1314,6 +1314,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, int ret = 0; long timeo; + /* a rekey is pending, let userspace deal with it */ + if (unlikely(ctx->key_update_pending)) + return -EKEYEXPIRED; + timeo = sock_rcvtimeo(sk, nonblock); while (!tls_strp_msg_ready(ctx)) { @@ -1720,6 +1724,34 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } +static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) +{ + const struct strp_msg *rxm = strp_msg(skb); + const struct tls_msg *tlm = tls_msg(skb); + char hs_type; + int err; + + if (likely(tlm->control != TLS_RECORD_TYPE_HANDSHAKE)) + return 0; + + if (rxm->full_len < 1) + return 0; + + err = skb_copy_bits(skb, rxm->offset, &hs_type, 1); + if (err < 0) { + DEBUG_NET_WARN_ON_ONCE(1); + return err; + } + + if (hs_type == TLS_HANDSHAKE_KEYUPDATE) { + struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; + + WRITE_ONCE(rx_ctx->key_update_pending, true); + } + + return 0; +} + static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, struct tls_decrypt_arg *darg) { @@ -1739,7 +1771,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return 0; + return tls_check_pending_rekey(tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) @@ -2719,6 +2751,7 @@ int tls_set_sw_offload(struct sock *sk, int tx) crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; aead = &sw_ctx_rx->aead_recv; + sw_ctx_rx->key_update_pending = false; } cipher_desc = get_cipher_desc(crypto_info->cipher_type); -- cgit v1.2.3 From 510128b30f2db1600172e9aaec44f66db3c16e15 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Thu, 12 Dec 2024 16:36:06 +0100 Subject: tls: add counters for rekey This introduces 5 counters to keep track of key updates: Tls{Rx,Tx}Rekey{Ok,Error} and TlsRxRekeyReceived. Suggested-by: Jakub Kicinski Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- include/uapi/linux/snmp.h | 5 +++++ net/tls/tls_main.c | 27 ++++++++++++++++++++++----- net/tls/tls_proc.c | 5 +++++ net/tls/tls_sw.c | 6 ++++-- 4 files changed, 36 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index adf5fd78dd50..51da2e00112d 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -358,6 +358,11 @@ enum LINUX_MIB_TLSRXDEVICERESYNC, /* TlsRxDeviceResync */ LINUX_MIB_TLSDECRYPTRETRY, /* TlsDecryptRetry */ LINUX_MIB_TLSRXNOPADVIOL, /* TlsRxNoPadViolation */ + LINUX_MIB_TLSRXREKEYOK, /* TlsRxRekeyOk */ + LINUX_MIB_TLSRXREKEYERROR, /* TlsRxRekeyError */ + LINUX_MIB_TLSTXREKEYOK, /* TlsTxRekeyOk */ + LINUX_MIB_TLSTXREKEYERROR, /* TlsTxRekeyError */ + LINUX_MIB_TLSRXREKEYRECEIVED, /* TlsRxRekeyReceived */ __LINUX_MIB_TLSMAX }; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 68b5735dafc1..9ee5a83c5b40 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -640,8 +640,11 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, /* Currently we only support setting crypto info more * than one time for TLS 1.3 */ - if (crypto_info->version != TLS_1_3_VERSION) + if (crypto_info->version != TLS_1_3_VERSION) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); return -EBUSY; + } update = true; old_crypto_info = crypto_info; @@ -696,8 +699,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSTXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRTXSW); + } conf = TLS_SW; } } else { @@ -711,8 +719,13 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, update ? crypto_info : NULL); if (rc) goto err_crypto_info; - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); - TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + + if (update) { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYOK); + } else { + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXSW); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSCURRRXSW); + } conf = TLS_SW; } if (!update) @@ -735,6 +748,10 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, return 0; err_crypto_info: + if (update) { + TLS_INC_STATS(sock_net(sk), tx ? LINUX_MIB_TLSTXREKEYERROR + : LINUX_MIB_TLSRXREKEYERROR); + } memzero_explicit(crypto_ctx, sizeof(*crypto_ctx)); return rc; } diff --git a/net/tls/tls_proc.c b/net/tls/tls_proc.c index 68982728f620..367666aa07b8 100644 --- a/net/tls/tls_proc.c +++ b/net/tls/tls_proc.c @@ -22,6 +22,11 @@ static const struct snmp_mib tls_mib_list[] = { SNMP_MIB_ITEM("TlsRxDeviceResync", LINUX_MIB_TLSRXDEVICERESYNC), SNMP_MIB_ITEM("TlsDecryptRetry", LINUX_MIB_TLSDECRYPTRETRY), SNMP_MIB_ITEM("TlsRxNoPadViolation", LINUX_MIB_TLSRXNOPADVIOL), + SNMP_MIB_ITEM("TlsRxRekeyOk", LINUX_MIB_TLSRXREKEYOK), + SNMP_MIB_ITEM("TlsRxRekeyError", LINUX_MIB_TLSRXREKEYERROR), + SNMP_MIB_ITEM("TlsTxRekeyOk", LINUX_MIB_TLSTXREKEYOK), + SNMP_MIB_ITEM("TlsTxRekeyError", LINUX_MIB_TLSTXREKEYERROR), + SNMP_MIB_ITEM("TlsRxRekeyReceived", LINUX_MIB_TLSRXREKEYRECEIVED), SNMP_MIB_SENTINEL }; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 9e5aff5bab98..47550d485819 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1724,7 +1724,8 @@ tls_decrypt_device(struct sock *sk, struct msghdr *msg, return 1; } -static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) +static int tls_check_pending_rekey(struct sock *sk, struct tls_context *ctx, + struct sk_buff *skb) { const struct strp_msg *rxm = strp_msg(skb); const struct tls_msg *tlm = tls_msg(skb); @@ -1747,6 +1748,7 @@ static int tls_check_pending_rekey(struct tls_context *ctx, struct sk_buff *skb) struct tls_sw_context_rx *rx_ctx = ctx->priv_ctx_rx; WRITE_ONCE(rx_ctx->key_update_pending, true); + TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSRXREKEYRECEIVED); } return 0; @@ -1771,7 +1773,7 @@ static int tls_rx_one_record(struct sock *sk, struct msghdr *msg, rxm->full_len -= prot->overhead_size; tls_advance_record_sn(sk, prot, &tls_ctx->rx); - return tls_check_pending_rekey(tls_ctx, darg->skb); + return tls_check_pending_rekey(sk, tls_ctx, darg->skb); } int decrypt_skb(struct sock *sk, struct scatterlist *sgout) -- cgit v1.2.3 From 35f7cad1743e04bf2944a2aadb6b6a42adc57bca Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:43 +0100 Subject: net: Add the possibility to support a selected hwtstamp in netdevice Introduce the description of a hwtstamp provider, mainly defined with a the hwtstamp source and the phydev pointer. Add a hwtstamp provider description within the netdev structure to allow saving the hwtstamp we want to use. This prepares for future support of an ethtool netlink command to select the desired hwtstamp provider. By default, the old API that does not support hwtstamp selectability is used, meaning the hwtstamp provider pointer is unset. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 10 ++++++++ include/linux/net_tstamp.h | 29 +++++++++++++++++++++++ include/linux/netdevice.h | 4 ++++ include/uapi/linux/net_tstamp.h | 11 +++++++++ net/core/dev_ioctl.c | 41 ++++++++++++++++++++++++++++++-- net/core/timestamping.c | 52 +++++++++++++++++++++++++++++++++++++---- 6 files changed, 140 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index b26bb33cd1d4..1a908af4175b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -1998,6 +1999,15 @@ void phy_detach(struct phy_device *phydev) phy_suspend(phydev); if (dev) { + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + /* Disable timestamp if it is the one selected */ + if (hwprov && hwprov->phydev == phydev) { + rcu_assign_pointer(dev->hwprov, NULL); + kfree_rcu(hwprov, rcu_head); + } + phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; phy_link_topo_del_phy(dev, phydev); diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 662074b08c94..ff0758e88ea1 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -19,6 +19,33 @@ enum hwtstamp_source { HWTSTAMP_SOURCE_PHYLIB, }; +/** + * struct hwtstamp_provider_desc - hwtstamp provider description + * + * @index: index of the hwtstamp provider. + * @qualifier: hwtstamp provider qualifier. + */ +struct hwtstamp_provider_desc { + int index; + enum hwtstamp_provider_qualifier qualifier; +}; + +/** + * struct hwtstamp_provider - hwtstamp provider object + * + * @rcu_head: RCU callback used to free the struct. + * @source: source of the hwtstamp provider. + * @phydev: pointer of the phydev source in case a PTP coming from phylib + * @desc: hwtstamp provider description. + */ + +struct hwtstamp_provider { + struct rcu_head rcu_head; + enum hwtstamp_source source; + struct phy_device *phydev; + struct hwtstamp_provider_desc desc; +}; + /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * @@ -31,6 +58,7 @@ enum hwtstamp_source { * copied the ioctl request back to user space * @source: indication whether timestamps should come from the netdev or from * an attached phylib PHY + * @qualifier: qualifier of the hwtstamp provider * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -43,6 +71,7 @@ struct kernel_hwtstamp_config { struct ifreq *ifr; bool copied_to_user; enum hwtstamp_source source; + enum hwtstamp_provider_qualifier qualifier; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d917949bba03..2593019ad5b1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -82,6 +82,7 @@ struct xdp_metadata_ops; struct xdp_md; struct ethtool_netdev_state; struct phy_link_topology; +struct hwtstamp_provider; typedef u32 xdp_features_t; @@ -2045,6 +2046,7 @@ enum netdev_reg_state { * * @neighbours: List heads pointing to this device's neighbours' * dev_list, one per address-family. + * @hwprov: Tracks which PTP performs hardware packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2457,6 +2459,8 @@ struct net_device { struct hlist_head neighbours[NEIGH_NR_TABLES]; + struct hwtstamp_provider __rcu *hwprov; + u8 priv[] ____cacheline_aligned __counted_by(priv_len); } ____cacheline_aligned; diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 858339d1c1c4..55b0ab51096c 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,6 +13,17 @@ #include #include /* for SO_TIMESTAMPING */ +/* + * Possible type of hwtstamp provider. Mainly "precise" the default one + * is for IEEE 1588 quality and "approx" is for NICs DMA point. + */ +enum hwtstamp_provider_qualifier { + HWTSTAMP_PROVIDER_QUALIFIER_PRECISE, + HWTSTAMP_PROVIDER_QUALIFIER_APPROX, + + HWTSTAMP_PROVIDER_QUALIFIER_CNT, +}; + /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 1f09930fca26..087a57b7e4fa 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -269,6 +270,21 @@ static int dev_eth_ioctl(struct net_device *dev, int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + cfg->qualifier = hwprov->desc.qualifier; + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && + hwprov->phydev) + return phy_hwtstamp_get(hwprov->phydev, cfg); + + if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) + return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); + + return -EOPNOTSUPP; + } + if (phy_is_default_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); @@ -324,11 +340,32 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - bool phy_ts = phy_is_default_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; + struct hwtstamp_provider *hwprov; + struct phy_device *phydev; bool changed = false; + bool phy_ts; int err; + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + if (hwprov->source == HWTSTAMP_SOURCE_PHYLIB && + hwprov->phydev) { + phy_ts = true; + phydev = hwprov->phydev; + } else if (hwprov->source == HWTSTAMP_SOURCE_NETDEV) { + phy_ts = false; + } else { + return -EOPNOTSUPP; + } + + cfg->qualifier = hwprov->desc.qualifier; + } else { + phy_ts = phy_is_default_hwtstamp(dev->phydev); + if (phy_ts) + phydev = dev->phydev; + } + cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; if (phy_ts && dev->see_all_hwtstamp_requests) { @@ -350,7 +387,7 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); if (phy_ts) { - err = phy_hwtstamp_set(dev->phydev, cfg, extack); + err = phy_hwtstamp_set(phydev, cfg, extack); if (err) { if (changed) ops->ndo_hwtstamp_set(dev, &old_cfg, NULL); diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 3717fb152ecc..a50a7ef49ae8 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -9,6 +9,7 @@ #include #include #include +#include static unsigned int classify(const struct sk_buff *skb) { @@ -21,19 +22,39 @@ static unsigned int classify(const struct sk_buff *skb) void skb_clone_tx_timestamp(struct sk_buff *skb) { + struct hwtstamp_provider *hwprov; struct mii_timestamper *mii_ts; + struct phy_device *phydev; struct sk_buff *clone; unsigned int type; - if (!skb->sk || !skb->dev || - !phy_is_default_hwtstamp(skb->dev->phydev)) + if (!skb->sk || !skb->dev) return; + rcu_read_lock(); + hwprov = rcu_dereference(skb->dev->hwprov); + if (hwprov) { + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || + !hwprov->phydev) { + rcu_read_unlock(); + return; + } + + phydev = hwprov->phydev; + } else { + phydev = skb->dev->phydev; + if (!phy_is_default_hwtstamp(phydev)) { + rcu_read_unlock(); + return; + } + } + rcu_read_unlock(); + type = classify(skb); if (type == PTP_CLASS_NONE) return; - mii_ts = skb->dev->phydev->mii_ts; + mii_ts = phydev->mii_ts; if (likely(mii_ts->txtstamp)) { clone = skb_clone_sk(skb); if (!clone) @@ -45,12 +66,33 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); bool skb_defer_rx_timestamp(struct sk_buff *skb) { + struct hwtstamp_provider *hwprov; struct mii_timestamper *mii_ts; + struct phy_device *phydev; unsigned int type; - if (!skb->dev || !phy_is_default_hwtstamp(skb->dev->phydev)) + if (!skb->dev) return false; + rcu_read_lock(); + hwprov = rcu_dereference(skb->dev->hwprov); + if (hwprov) { + if (hwprov->source != HWTSTAMP_SOURCE_PHYLIB || + !hwprov->phydev) { + rcu_read_unlock(); + return false; + } + + phydev = hwprov->phydev; + } else { + phydev = skb->dev->phydev; + if (!phy_is_default_hwtstamp(phydev)) { + rcu_read_unlock(); + return false; + } + } + rcu_read_unlock(); + if (skb_headroom(skb) < ETH_HLEN) return false; @@ -63,7 +105,7 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb) if (type == PTP_CLASS_NONE) return false; - mii_ts = skb->dev->phydev->mii_ts; + mii_ts = phydev->mii_ts; if (likely(mii_ts->rxtstamp)) return mii_ts->rxtstamp(mii_ts, skb, type); -- cgit v1.2.3 From b9e3f7dc9ed95daeb83cfa45b821cacaa01aa906 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:44 +0100 Subject: net: ethtool: tsinfo: Enhance tsinfo to support several hwtstamp by net topology Either the MAC or the PHY can provide hwtstamp, so we should be able to read the tsinfo for any hwtstamp provider. Enhance 'get' command to retrieve tsinfo of hwtstamp providers within a network topology. Add support for a specific dump command to retrieve all hwtstamp providers within the network topology, with added functionality for filtered dump to target a single interface. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 20 ++ Documentation/networking/ethtool-netlink.rst | 7 +- include/linux/ethtool.h | 4 + include/uapi/linux/ethtool_netlink_generated.h | 10 + net/ethtool/common.c | 141 +++++++++- net/ethtool/common.h | 13 + net/ethtool/netlink.c | 6 +- net/ethtool/netlink.h | 5 +- net/ethtool/ts.h | 20 ++ net/ethtool/tsinfo.c | 358 ++++++++++++++++++++++++- 10 files changed, 563 insertions(+), 21 deletions(-) create mode 100644 net/ethtool/ts.h (limited to 'include') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index c7634e957d9c..4082816e5f3d 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -836,6 +836,20 @@ attribute-sets: - name: tx-err type: uint + - + name: ts-hwtstamp-provider + attr-cnt-name: __ethtool-a-ts-hwtstamp-provider-cnt + attributes: + - + name: unspec + type: unused + value: 0 + - + name: index + type: u32 + - + name: qualifier + type: u32 - name: tsinfo attr-cnt-name: __ethtool-a-tsinfo-cnt @@ -867,6 +881,10 @@ attribute-sets: name: stats type: nest nested-attributes: ts-stat + - + name: hwtstamp-provider + type: nest + nested-attributes: ts-hwtstamp-provider - name: cable-result attr-cnt-name: __ethtool-a-cable-result-cnt @@ -1912,6 +1930,7 @@ operations: request: attributes: - header + - hwtstamp-provider reply: attributes: - header @@ -1920,6 +1939,7 @@ operations: - rx-filters - phc-index - stats + - hwtstamp-provider dump: *tsinfo-get-op - name: cable-test-act diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index b25926071ece..c585e2f0ddfa 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1245,9 +1245,10 @@ Gets timestamping information like ``ETHTOOL_GET_TS_INFO`` ioctl request. Request contents: - ===================================== ====== ========================== - ``ETHTOOL_A_TSINFO_HEADER`` nested request header - ===================================== ====== ========================== + ======================================== ====== ============================ + ``ETHTOOL_A_TSINFO_HEADER`` nested request header + ``ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ======================================== ====== ============================ Kernel response contents: diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index e217c6321ed0..f711bfd75c4d 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -711,6 +711,7 @@ struct ethtool_rxfh_param { * @cmd: command number = %ETHTOOL_GET_TS_INFO * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags * @phc_index: device index of the associated PHC, or -1 if there is none + * @phc_qualifier: qualifier of the associated PHC * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values */ @@ -718,6 +719,7 @@ struct kernel_ethtool_ts_info { u32 cmd; u32 so_timestamping; int phc_index; + enum hwtstamp_provider_qualifier phc_qualifier; enum hwtstamp_tx_types tx_types; enum hwtstamp_rx_filters rx_filters; }; @@ -749,6 +751,7 @@ struct kernel_ethtool_ts_info { * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. + * @supported_hwtstamp_qualifiers: bitfield of supported hwtstamp qualifier. * @get_drvinfo: Report driver/device information. Modern drivers no * longer have to implement this callback. Most fields are * correctly filled in by the core using system information, or @@ -966,6 +969,7 @@ struct ethtool_ops { u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; + u32 supported_hwtstamp_qualifiers; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); int (*get_regs_len)(struct net_device *); void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index b58f352fe4f2..df289dde0f61 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -385,6 +385,15 @@ enum { ETHTOOL_A_TS_STAT_MAX = (__ETHTOOL_A_TS_STAT_CNT - 1) }; +enum { + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_UNSPEC, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + + __ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX = (__ETHTOOL_A_TS_HWTSTAMP_PROVIDER_CNT - 1) +}; + enum { ETHTOOL_A_TSINFO_UNSPEC, ETHTOOL_A_TSINFO_HEADER, @@ -393,6 +402,7 @@ enum { ETHTOOL_A_TSINFO_RX_FILTERS, ETHTOOL_A_TSINFO_PHC_INDEX, ETHTOOL_A_TSINFO_STATS, + ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER, __ETHTOOL_A_TSINFO_CNT, ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1) diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 05ce4f8080b3..666db40bcfda 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -5,9 +5,12 @@ #include #include #include +#include #include "netlink.h" #include "common.h" +#include "../core/dev.h" + const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_SG_BIT] = "tx-scatter-gather", @@ -763,20 +766,91 @@ int ethtool_check_ops(const struct ethtool_ops *ops) return 0; } -int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) +static void ethtool_init_tsinfo(struct kernel_ethtool_ts_info *info) { - const struct ethtool_ops *ops = dev->ethtool_ops; - struct phy_device *phydev = dev->phydev; - int err = 0; - memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; info->phc_index = -1; +} + +int ethtool_net_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int err; + + if (!ops->get_ts_info) + return -ENODEV; + + /* Does ptp comes from netdev */ + ethtool_init_tsinfo(info); + info->phc_qualifier = hwprov_desc->qualifier; + err = ops->get_ts_info(dev, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index && + net_support_hwtstamp_qualifier(dev, hwprov_desc->qualifier)) + return 0; + + return -ENODEV; +} + +int +ethtool_phy_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + int err; + + /* Only precise qualifier is supported in phydev */ + if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) + return -ENODEV; + + /* Look in the phy topology */ + if (dev->link_topo) { + struct phy_device_node *pdn; + unsigned long phy_index; + + xa_for_each(&dev->link_topo->phys, phy_index, pdn) { + if (!phy_has_tsinfo(pdn->phy)) + continue; + + ethtool_init_tsinfo(info); + err = phy_ts_info(pdn->phy, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index) + return 0; + } + return -ENODEV; + } + + /* Look on the dev->phydev */ + if (phy_has_tsinfo(dev->phydev)) { + ethtool_init_tsinfo(info); + err = phy_ts_info(dev->phydev, info); + if (err) + return err; + + if (info->phc_index == hwprov_desc->index) + return 0; + } + + return -ENODEV; +} + +int ethtool_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + int err; - if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) - err = phy_ts_info(phydev, info); - else if (ops->get_ts_info) - err = ops->get_ts_info(dev, info); + err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); + if (err == -ENODEV) + err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; @@ -784,6 +858,55 @@ int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info return err; } +int __ethtool_get_ts_info(struct net_device *dev, + struct kernel_ethtool_ts_info *info) +{ + struct hwtstamp_provider *hwprov; + + hwprov = rtnl_dereference(dev->hwprov); + /* No provider specified, use default behavior */ + if (!hwprov) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct phy_device *phydev = dev->phydev; + int err = 0; + + ethtool_init_tsinfo(info); + if (phy_is_default_hwtstamp(phydev) && + phy_has_tsinfo(phydev)) + err = phy_ts_info(phydev, info); + else if (ops->get_ts_info) + err = ops->get_ts_info(dev, info); + + info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + return err; + } + + return ethtool_get_ts_info_by_phc(dev, info, &hwprov->desc); +} + +bool net_support_hwtstamp_qualifier(struct net_device *dev, + enum hwtstamp_provider_qualifier qualifier) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops) + return false; + + /* Return true with precise qualifier and with NIC without + * qualifier description to not break the old behavior. + */ + if (!ops->supported_hwtstamp_qualifiers && + qualifier == HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) + return true; + + if (ops->supported_hwtstamp_qualifiers & BIT(qualifier)) + return true; + + return false; +} + int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { struct kernel_ethtool_ts_info info = { }; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 4a2de3ce7354..f5119204c8ff 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -21,6 +21,7 @@ struct link_mode_info { }; struct genl_info; +struct hwtstamp_provider_desc; extern const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]; @@ -49,6 +50,18 @@ int ethtool_check_max_channel(struct net_device *dev, struct genl_info *info); int ethtool_check_rss_ctx_busy(struct net_device *dev, u32 rss_context); int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); +int ethtool_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +int ethtool_net_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +int +ethtool_phy_get_ts_info_by_phc(struct net_device *dev, + struct kernel_ethtool_ts_info *info, + struct hwtstamp_provider_desc *hwprov_desc); +bool net_support_hwtstamp_qualifier(struct net_device *dev, + enum hwtstamp_provider_qualifier qualifier); extern const struct ethtool_phy_ops *ethtool_phy_ops; extern const struct ethtool_pse_ops *ethtool_pse_ops; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index e3f0ef6b851b..6ae1d91f36e7 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1074,9 +1074,9 @@ static const struct genl_ops ethtool_genl_ops[] = { { .cmd = ETHTOOL_MSG_TSINFO_GET, .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, + .start = ethnl_tsinfo_start, + .dumpit = ethnl_tsinfo_dumpit, + .done = ethnl_tsinfo_done, .policy = ethnl_tsinfo_get_policy, .maxattr = ARRAY_SIZE(ethnl_tsinfo_get_policy) - 1, }, diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 203b08eb6c6f..960cda13e4fc 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -464,7 +464,7 @@ extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1]; extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1]; extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1]; -extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_HEADER + 1]; +extern const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1]; extern const struct nla_policy ethnl_cable_test_act_policy[ETHTOOL_A_CABLE_TEST_HEADER + 1]; extern const struct nla_policy ethnl_cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG + 1]; extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INFO_HEADER + 1]; @@ -499,6 +499,9 @@ int ethnl_phy_start(struct netlink_callback *cb); int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int ethnl_phy_done(struct netlink_callback *cb); +int ethnl_tsinfo_start(struct netlink_callback *cb); +int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_tsinfo_done(struct netlink_callback *cb); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/ts.h b/net/ethtool/ts.h new file mode 100644 index 000000000000..d901a879a671 --- /dev/null +++ b/net/ethtool/ts.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef _NET_ETHTOOL_TS_H +#define _NET_ETHTOOL_TS_H + +#include "netlink.h" + +static const struct nla_policy +ethnl_ts_hwtst_prov_policy[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_MAX + 1] = { + [ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX] = { .type = NLA_U32 }, + [ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER] = + NLA_POLICY_MAX(NLA_U32, HWTSTAMP_PROVIDER_QUALIFIER_CNT - 1) +}; + +int ts_parse_hwtst_provider(const struct nlattr *nest, + struct hwtstamp_provider_desc *hwprov_desc, + struct netlink_ext_ack *extack, + bool *mod); + +#endif /* _NET_ETHTOOL_TS_H */ diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index 03d12d6f79ca..7e495a41aeec 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -1,13 +1,18 @@ // SPDX-License-Identifier: GPL-2.0-only #include +#include +#include +#include #include "netlink.h" #include "common.h" #include "bitset.h" +#include "ts.h" struct tsinfo_req_info { struct ethnl_req_info base; + struct hwtstamp_provider_desc hwprov_desc; }; struct tsinfo_reply_data { @@ -16,34 +21,96 @@ struct tsinfo_reply_data { struct ethtool_ts_stats stats; }; +#define TSINFO_REQINFO(__req_base) \ + container_of(__req_base, struct tsinfo_req_info, base) + #define TSINFO_REPDATA(__reply_base) \ container_of(__reply_base, struct tsinfo_reply_data, base) #define ETHTOOL_TS_STAT_CNT \ (__ETHTOOL_A_TS_STAT_CNT - (ETHTOOL_A_TS_STAT_UNSPEC + 1)) -const struct nla_policy ethnl_tsinfo_get_policy[] = { +const struct nla_policy ethnl_tsinfo_get_policy[ETHTOOL_A_TSINFO_MAX + 1] = { [ETHTOOL_A_TSINFO_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), + [ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER] = + NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), }; +int ts_parse_hwtst_provider(const struct nlattr *nest, + struct hwtstamp_provider_desc *hwprov_desc, + struct netlink_ext_ack *extack, + bool *mod) +{ + struct nlattr *tb[ARRAY_SIZE(ethnl_ts_hwtst_prov_policy)]; + int ret; + + ret = nla_parse_nested(tb, + ARRAY_SIZE(ethnl_ts_hwtst_prov_policy) - 1, + nest, + ethnl_ts_hwtst_prov_policy, extack); + if (ret < 0) + return ret; + + if (NL_REQ_ATTR_CHECK(extack, nest, tb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX) || + NL_REQ_ATTR_CHECK(extack, nest, tb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER)) + return -EINVAL; + + ethnl_update_u32(&hwprov_desc->index, + tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX], + mod); + ethnl_update_u32(&hwprov_desc->qualifier, + tb[ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER], + mod); + + return 0; +} + +static int +tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); + bool mod = false; + + req->hwprov_desc.index = -1; + + if (!tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER]) + return 0; + + return ts_parse_hwtst_provider(tb[ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER], + &req->hwprov_desc, extack, &mod); +} + static int tsinfo_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct tsinfo_reply_data *data = TSINFO_REPDATA(reply_base); + struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); struct net_device *dev = reply_base->dev; int ret; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + + if (req->hwprov_desc.index != -1) { + ret = ethtool_get_ts_info_by_phc(dev, &data->ts_info, + &req->hwprov_desc); + ethnl_ops_complete(dev); + return ret; + } + if (req_base->flags & ETHTOOL_FLAG_STATS) { ethtool_stats_init((u64 *)&data->stats, sizeof(data->stats) / sizeof(u64)); if (dev->ethtool_ops->get_ts_stats) dev->ethtool_ops->get_ts_stats(dev, &data->stats); } + ret = __ethtool_get_ts_info(dev, &data->ts_info); ethnl_ops_complete(dev); @@ -87,8 +154,11 @@ static int tsinfo_reply_size(const struct ethnl_req_info *req_base, return ret; len += ret; /* _TSINFO_RX_FILTERS */ } - if (ts_info->phc_index >= 0) + if (ts_info->phc_index >= 0) { len += nla_total_size(sizeof(u32)); /* _TSINFO_PHC_INDEX */ + /* _TSINFO_HWTSTAMP_PROVIDER */ + len += nla_total_size(0) + 2 * nla_total_size(sizeof(u32)); + } if (req_base->flags & ETHTOOL_FLAG_STATS) len += nla_total_size(0) + /* _TSINFO_STATS */ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_TS_STAT_CNT; @@ -163,9 +233,29 @@ static int tsinfo_fill_reply(struct sk_buff *skb, if (ret < 0) return ret; } - if (ts_info->phc_index >= 0 && - nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, ts_info->phc_index)) - return -EMSGSIZE; + if (ts_info->phc_index >= 0) { + struct nlattr *nest; + + ret = nla_put_u32(skb, ETHTOOL_A_TSINFO_PHC_INDEX, + ts_info->phc_index); + if (ret) + return -EMSGSIZE; + + nest = nla_nest_start(skb, ETHTOOL_A_TSINFO_HWTSTAMP_PROVIDER); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + ts_info->phc_index) || + nla_put_u32(skb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + ts_info->phc_qualifier)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } if (req_base->flags & ETHTOOL_FLAG_STATS && tsinfo_put_stats(skb, &data->stats)) return -EMSGSIZE; @@ -173,6 +263,263 @@ static int tsinfo_fill_reply(struct sk_buff *skb, return 0; } +struct ethnl_tsinfo_dump_ctx { + struct tsinfo_req_info *req_info; + struct tsinfo_reply_data *reply_data; + unsigned long pos_ifindex; + bool netdev_dump_done; + unsigned long pos_phyindex; + enum hwtstamp_provider_qualifier pos_phcqualifier; +}; + +static void *ethnl_tsinfo_prepare_dump(struct sk_buff *skb, + struct net_device *dev, + struct tsinfo_reply_data *reply_data, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + void *ehdr = NULL; + + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_TSINFO_GET_REPLY); + if (!ehdr) + return ERR_PTR(-EMSGSIZE); + + reply_data = ctx->reply_data; + memset(reply_data, 0, sizeof(*reply_data)); + reply_data->base.dev = dev; + memset(&reply_data->ts_info, 0, sizeof(reply_data->ts_info)); + + return ehdr; +} + +static int ethnl_tsinfo_end_dump(struct sk_buff *skb, + struct net_device *dev, + struct tsinfo_req_info *req_info, + struct tsinfo_reply_data *reply_data, + void *ehdr) +{ + int ret; + + reply_data->ts_info.so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + + ret = ethnl_fill_reply_header(skb, dev, ETHTOOL_A_TSINFO_HEADER); + if (ret < 0) + return ret; + + ret = tsinfo_fill_reply(skb, &req_info->base, &reply_data->base); + if (ret < 0) + return ret; + + reply_data->base.dev = NULL; + genlmsg_end(skb, ehdr); + + return ret; +} + +static int ethnl_tsinfo_dump_one_phydev(struct sk_buff *skb, + struct net_device *dev, + struct phy_device *phydev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + void *ehdr = NULL; + int ret = 0; + + if (!phy_has_tsinfo(phydev)) + return -EOPNOTSUPP; + + reply_data = ctx->reply_data; + req_info = ctx->req_info; + ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); + if (IS_ERR(ehdr)) + return PTR_ERR(ehdr); + + ret = phy_ts_info(phydev, &reply_data->ts_info); + if (ret < 0) + goto err; + + ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, ehdr); + if (ret < 0) + goto err; + + return ret; +err: + genlmsg_cancel(skb, ehdr); + return ret; +} + +static int ethnl_tsinfo_dump_one_netdev(struct sk_buff *skb, + struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + void *ehdr = NULL; + int ret = 0; + + if (!ops->get_ts_info) + return -EOPNOTSUPP; + + reply_data = ctx->reply_data; + req_info = ctx->req_info; + for (; ctx->pos_phcqualifier < HWTSTAMP_PROVIDER_QUALIFIER_CNT; + ctx->pos_phcqualifier++) { + if (!net_support_hwtstamp_qualifier(dev, + ctx->pos_phcqualifier)) + continue; + + ehdr = ethnl_tsinfo_prepare_dump(skb, dev, reply_data, cb); + if (IS_ERR(ehdr)) { + ret = PTR_ERR(ehdr); + goto err; + } + + reply_data->ts_info.phc_qualifier = ctx->pos_phcqualifier; + ret = ops->get_ts_info(dev, &reply_data->ts_info); + if (ret < 0) + goto err; + + ret = ethnl_tsinfo_end_dump(skb, dev, req_info, reply_data, + ehdr); + if (ret < 0) + goto err; + } + + return ret; + +err: + genlmsg_cancel(skb, ehdr); + return ret; +} + +static int ethnl_tsinfo_dump_one_net_topo(struct sk_buff *skb, + struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct phy_device_node *pdn; + int ret = 0; + + if (!ctx->netdev_dump_done) { + ret = ethnl_tsinfo_dump_one_netdev(skb, dev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + ctx->netdev_dump_done = true; + } + + if (!dev->link_topo) { + if (phy_has_tsinfo(dev->phydev)) { + ret = ethnl_tsinfo_dump_one_phydev(skb, dev, + dev->phydev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + } + + return 0; + } + + xa_for_each_start(&dev->link_topo->phys, ctx->pos_phyindex, pdn, + ctx->pos_phyindex) { + if (phy_has_tsinfo(pdn->phy)) { + ret = ethnl_tsinfo_dump_one_phydev(skb, dev, + pdn->phy, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + return ret; + } + } + + return ret; +} + +int ethnl_tsinfo_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + if (ctx->req_info->base.dev) { + ret = ethnl_tsinfo_dump_one_net_topo(skb, + ctx->req_info->base.dev, + cb); + } else { + for_each_netdev_dump(net, dev, ctx->pos_ifindex) { + ret = ethnl_tsinfo_dump_one_net_topo(skb, dev, cb); + if (ret < 0 && ret != -EOPNOTSUPP) + break; + ctx->pos_phyindex = 0; + ctx->netdev_dump_done = false; + ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; + } + } + rtnl_unlock(); + + return ret; +} + +int ethnl_tsinfo_start(struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct nlattr **tb = info->info.attrs; + struct tsinfo_reply_data *reply_data; + struct tsinfo_req_info *req_info; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kzalloc(sizeof(*reply_data), GFP_KERNEL); + if (!reply_data) { + ret = -ENOMEM; + goto free_req_info; + } + + ret = ethnl_parse_header_dev_get(&req_info->base, + tb[ETHTOOL_A_TSINFO_HEADER], + sock_net(cb->skb->sk), cb->extack, + false); + if (ret < 0) + goto free_reply_data; + + ctx->req_info = req_info; + ctx->reply_data = reply_data; + ctx->pos_ifindex = 0; + ctx->pos_phyindex = 0; + ctx->netdev_dump_done = false; + ctx->pos_phcqualifier = HWTSTAMP_PROVIDER_QUALIFIER_PRECISE; + + return 0; + +free_reply_data: + kfree(reply_data); +free_req_info: + kfree(req_info); + + return ret; +} + +int ethnl_tsinfo_done(struct netlink_callback *cb) +{ + struct ethnl_tsinfo_dump_ctx *ctx = (void *)cb->ctx; + struct tsinfo_req_info *req_info = ctx->req_info; + + ethnl_parse_header_dev_put(&req_info->base); + kfree(ctx->reply_data); + kfree(ctx->req_info); + + return 0; +} + const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .request_cmd = ETHTOOL_MSG_TSINFO_GET, .reply_cmd = ETHTOOL_MSG_TSINFO_GET_REPLY, @@ -180,6 +527,7 @@ const struct ethnl_request_ops ethnl_tsinfo_request_ops = { .req_info_size = sizeof(struct tsinfo_req_info), .reply_data_size = sizeof(struct tsinfo_reply_data), + .parse_request = tsinfo_parse_request, .prepare_data = tsinfo_prepare_data, .reply_size = tsinfo_reply_size, .fill_reply = tsinfo_fill_reply, -- cgit v1.2.3 From 6e9e2eed4f39d52edf5fd006409d211facf49f6b Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Thu, 12 Dec 2024 18:06:45 +0100 Subject: net: ethtool: Add support for tsconfig command to get/set hwtstamp config Introduce support for ETHTOOL_MSG_TSCONFIG_GET/SET ethtool netlink socket to read and configure hwtstamp configuration of a PHC provider. Note that simultaneous hwtstamp isn't supported; configuring a new one disables the previous setting. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 56 ++++ Documentation/networking/ethtool-netlink.rst | 75 +++++ Documentation/networking/timestamping.rst | 38 ++- include/uapi/linux/ethtool_netlink_generated.h | 16 + net/ethtool/Makefile | 2 +- net/ethtool/common.c | 27 +- net/ethtool/common.h | 2 +- net/ethtool/netlink.c | 18 + net/ethtool/netlink.h | 3 + net/ethtool/tsconfig.c | 444 +++++++++++++++++++++++++ 10 files changed, 655 insertions(+), 26 deletions(-) create mode 100644 net/ethtool/tsconfig.c (limited to 'include') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 4082816e5f3d..60f85fbf4156 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1489,6 +1489,33 @@ attribute-sets: - name: downstream-sfp-name type: string + - + name: tsconfig + attr-cnt-name: __ethtool-a-tsconfig-cnt + attributes: + - + name: unspec + type: unused + value: 0 + - + name: header + type: nest + nested-attributes: header + - + name: hwtstamp-provider + type: nest + nested-attributes: ts-hwtstamp-provider + - + name: tx-types + type: nest + nested-attributes: bitset + - + name: rx-filters + type: nest + nested-attributes: bitset + - + name: hwtstamp-flags + type: u32 operations: enum-model: directional @@ -2314,3 +2341,32 @@ operations: name: phy-ntf doc: Notification for change in PHY devices. notify: phy-get + - + name: tsconfig-get + doc: Get hwtstamp config. + + attribute-set: tsconfig + + do: &tsconfig-get-op + request: + attributes: + - header + reply: + attributes: &tsconfig + - header + - hwtstamp-provider + - tx-types + - rx-filters + - hwtstamp-flags + dump: *tsconfig-get-op + - + name: tsconfig-set + doc: Set hwtstamp config. + + attribute-set: tsconfig + + do: + request: + attributes: *tsconfig + reply: + attributes: *tsconfig diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index c585e2f0ddfa..a7ba6368a4d5 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -237,6 +237,8 @@ Userspace to kernel: ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` flash transceiver module firmware ``ETHTOOL_MSG_PHY_GET`` get Ethernet PHY information + ``ETHTOOL_MSG_TSCONFIG_GET`` get hw timestamping configuration + ``ETHTOOL_MSG_TSCONFIG_SET`` set hw timestamping configuration ===================================== ================================= Kernel to userspace: @@ -286,6 +288,8 @@ Kernel to userspace: ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF`` transceiver module flash updates ``ETHTOOL_MSG_PHY_GET_REPLY`` Ethernet PHY information ``ETHTOOL_MSG_PHY_NTF`` Ethernet PHY information change + ``ETHTOOL_MSG_TSCONFIG_GET_REPLY`` hw timestamping configuration + ``ETHTOOL_MSG_TSCONFIG_SET_REPLY`` new hw timestamping configuration ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2244,6 +2248,75 @@ Kernel response contents: When ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` is PHY_UPSTREAM_PHY, the PHY's parent is another PHY. +TSCONFIG_GET +============ + +Retrieves the information about the current hardware timestamping source and +configuration. + +It is similar to the deprecated ``SIOCGHWTSTAMP`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +When set the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` attribute identifies the +source of the hw timestamping provider. It is composed by +``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX`` attribute which describe the index of +the PTP device and ``ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER`` which describe +the qualifier of the timestamp. + +When set the ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` +and the ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` attributes identify the Tx +type, the Rx filter and the flags configured for the current hw timestamping +provider. The attributes are propagated to the driver through the following +structure: + +.. kernel-doc:: include/linux/net_tstamp.h + :identifiers: kernel_hwtstamp_config + +TSCONFIG_SET +============ + +Set the information about the current hardware timestamping source and +configuration. + +It is similar to the deprecated ``SIOCSHWTSTAMP`` ioctl request. + +Request contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +Kernel response contents: + + ======================================== ====== ============================ + ``ETHTOOL_A_TSCONFIG_HEADER`` nested request header + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` nested PTP hw clock provider + ``ETHTOOL_A_TSCONFIG_TX_TYPES`` bitset hwtstamp Tx type + ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` bitset hwtstamp Rx filter + ``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` u32 hwtstamp flags + ======================================== ====== ============================ + +For a description of each attribute, see ``TSCONFIG_GET``. + Request translation =================== @@ -2352,4 +2425,6 @@ are netlink only. n/a ``ETHTOOL_MSG_MM_SET`` n/a ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` n/a ``ETHTOOL_MSG_PHY_GET`` + ``SIOCGHWTSTAMP`` ``ETHTOOL_MSG_TSCONFIG_GET`` + ``SIOCSHWTSTAMP`` ``ETHTOOL_MSG_TSCONFIG_SET`` =================================== ===================================== diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index b37bfbfc7d79..61ef9da10e28 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -525,8 +525,8 @@ implicitly defined. ts[0] holds a software timestamp if set, ts[1] is again deprecated and ts[2] holds a hardware timestamp if set. -3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP -======================================================================= +3. Hardware Timestamping configuration: ETHTOOL_MSG_TSCONFIG_SET/GET +==================================================================== Hardware time stamping must also be initialized for each device driver that is expected to do hardware time stamping. The parameter is defined in @@ -539,12 +539,14 @@ include/uapi/linux/net_tstamp.h as:: }; Desired behavior is passed into the kernel and to a specific device by -calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose -ifr_data points to a struct hwtstamp_config. The tx_type and -rx_filter are hints to the driver what it is expected to do. If -the requested fine-grained filtering for incoming packets is not -supported, the driver may time stamp more than just the requested types -of packets. +calling the tsconfig netlink socket ``ETHTOOL_MSG_TSCONFIG_SET``. +The ``ETHTOOL_A_TSCONFIG_TX_TYPES``, ``ETHTOOL_A_TSCONFIG_RX_FILTERS`` and +``ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS`` netlink attributes are then used to set +the struct hwtstamp_config accordingly. + +The ``ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER`` netlink nested attribute is used +to select the source of the hardware time stamping. It is composed of an index +for the device source and a qualifier for the type of time stamping. Drivers are free to use a more permissive configuration than the requested configuration. It is expected that drivers should only implement directly the @@ -563,9 +565,16 @@ Only a processes with admin rights may change the configuration. User space is responsible to ensure that multiple processes don't interfere with each other and that the settings are reset. -Any process can read the actual configuration by passing this -structure to ioctl(SIOCGHWTSTAMP) in the same way. However, this has -not been implemented in all drivers. +Any process can read the actual configuration by requesting tsconfig netlink +socket ``ETHTOOL_MSG_TSCONFIG_GET``. + +The legacy configuration is the use of the ioctl(SIOCSHWTSTAMP) with a pointer +to a struct ifreq whose ifr_data points to a struct hwtstamp_config. +The tx_type and rx_filter are hints to the driver what it is expected to do. +If the requested fine-grained filtering for incoming packets is not +supported, the driver may time stamp more than just the requested types +of packets. ioctl(SIOCGHWTSTAMP) is used in the same way as the +ioctl(SIOCSHWTSTAMP). However, this has not been implemented in all drivers. :: @@ -610,9 +619,10 @@ not been implemented in all drivers. -------------------------------------------------------- A driver which supports hardware time stamping must support the -SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with -the actual values as described in the section on SIOCSHWTSTAMP. It -should also support SIOCGHWTSTAMP. +ndo_hwtstamp_set NDO or the legacy SIOCSHWTSTAMP ioctl and update the +supplied struct hwtstamp_config with the actual values as described in +the section on SIOCSHWTSTAMP. It should also support ndo_hwtstamp_get or +the legacy SIOCGHWTSTAMP. Time stamps for received packets must be stored in the skb. To get a pointer to the shared time stamp structure of the skb call skb_hwtstamps(). Then diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index df289dde0f61..43993a2d68e5 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -694,6 +694,18 @@ enum { ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) }; +enum { + ETHTOOL_A_TSCONFIG_UNSPEC, + ETHTOOL_A_TSCONFIG_HEADER, + ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER, + ETHTOOL_A_TSCONFIG_TX_TYPES, + ETHTOOL_A_TSCONFIG_RX_FILTERS, + ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + + __ETHTOOL_A_TSCONFIG_CNT, + ETHTOOL_A_TSCONFIG_MAX = (__ETHTOOL_A_TSCONFIG_CNT - 1) +}; + enum { ETHTOOL_MSG_USER_NONE = 0, ETHTOOL_MSG_STRSET_GET = 1, @@ -741,6 +753,8 @@ enum { ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_MODULE_FW_FLASH_ACT, ETHTOOL_MSG_PHY_GET, + ETHTOOL_MSG_TSCONFIG_GET, + ETHTOOL_MSG_TSCONFIG_SET, __ETHTOOL_MSG_USER_CNT, ETHTOOL_MSG_USER_MAX = (__ETHTOOL_MSG_USER_CNT - 1) @@ -794,6 +808,8 @@ enum { ETHTOOL_MSG_MODULE_FW_FLASH_NTF, ETHTOOL_MSG_PHY_GET_REPLY, ETHTOOL_MSG_PHY_NTF, + ETHTOOL_MSG_TSCONFIG_GET_REPLY, + ETHTOOL_MSG_TSCONFIG_SET_REPLY, __ETHTOOL_MSG_KERNEL_CNT, ETHTOOL_MSG_KERNEL_MAX = (__ETHTOOL_MSG_KERNEL_CNT - 1) diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 9b540644ba31..a1490c4afe6b 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -9,4 +9,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o \ - phy.o + phy.o tsconfig.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 666db40bcfda..02f941f667dd 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -797,7 +797,7 @@ int ethtool_net_get_ts_info_by_phc(struct net_device *dev, return -ENODEV; } -int +struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc) @@ -806,7 +806,7 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, /* Only precise qualifier is supported in phydev */ if (hwprov_desc->qualifier != HWTSTAMP_PROVIDER_QUALIFIER_PRECISE) - return -ENODEV; + return ERR_PTR(-ENODEV); /* Look in the phy topology */ if (dev->link_topo) { @@ -820,12 +820,12 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, ethtool_init_tsinfo(info); err = phy_ts_info(pdn->phy, info); if (err) - return err; + return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) - return 0; + return pdn->phy; } - return -ENODEV; + return ERR_PTR(-ENODEV); } /* Look on the dev->phydev */ @@ -833,13 +833,13 @@ ethtool_phy_get_ts_info_by_phc(struct net_device *dev, ethtool_init_tsinfo(info); err = phy_ts_info(dev->phydev, info); if (err) - return err; + return ERR_PTR(err); if (info->phc_index == hwprov_desc->index) - return 0; + return dev->phydev; } - return -ENODEV; + return ERR_PTR(-ENODEV); } int ethtool_get_ts_info_by_phc(struct net_device *dev, @@ -849,8 +849,15 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev, int err; err = ethtool_net_get_ts_info_by_phc(dev, info, hwprov_desc); - if (err == -ENODEV) - err = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); + if (err == -ENODEV) { + struct phy_device *phy; + + phy = ethtool_phy_get_ts_info_by_phc(dev, info, hwprov_desc); + if (IS_ERR(phy)) + err = PTR_ERR(phy); + else + err = 0; + } info->so_timestamping |= SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; diff --git a/net/ethtool/common.h b/net/ethtool/common.h index f5119204c8ff..850eadde4bfc 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -56,7 +56,7 @@ int ethtool_get_ts_info_by_phc(struct net_device *dev, int ethtool_net_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc); -int +struct phy_device * ethtool_phy_get_ts_info_by_phc(struct net_device *dev, struct kernel_ethtool_ts_info *info, struct hwtstamp_provider_desc *hwprov_desc); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6ae1d91f36e7..849c98e637c6 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -394,6 +394,8 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops, [ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops, [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, + [ETHTOOL_MSG_TSCONFIG_GET] = ðnl_tsconfig_request_ops, + [ETHTOOL_MSG_TSCONFIG_SET] = ðnl_tsconfig_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1243,6 +1245,22 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_phy_get_policy, .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_TSCONFIG_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_tsconfig_get_policy, + .maxattr = ARRAY_SIZE(ethnl_tsconfig_get_policy) - 1, + }, + { + .cmd = ETHTOOL_MSG_TSCONFIG_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_default_set_doit, + .policy = ethnl_tsconfig_set_policy, + .maxattr = ARRAY_SIZE(ethnl_tsconfig_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 960cda13e4fc..0a09298fff92 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -435,6 +435,7 @@ extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; extern const struct ethnl_request_ops ethnl_phy_request_ops; +extern const struct ethnl_request_ops ethnl_tsconfig_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -485,6 +486,8 @@ extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1]; extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1]; +extern const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1]; +extern const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c new file mode 100644 index 000000000000..9188e088fb2f --- /dev/null +++ b/net/ethtool/tsconfig.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "netlink.h" +#include "common.h" +#include "bitset.h" +#include "../core/dev.h" +#include "ts.h" + +struct tsconfig_req_info { + struct ethnl_req_info base; +}; + +struct tsconfig_reply_data { + struct ethnl_reply_data base; + struct hwtstamp_provider_desc hwprov_desc; + struct { + u32 tx_type; + u32 rx_filter; + u32 flags; + } hwtst_config; +}; + +#define TSCONFIG_REPDATA(__reply_base) \ + container_of(__reply_base, struct tsconfig_reply_data, base) + +const struct nla_policy ethnl_tsconfig_get_policy[ETHTOOL_A_TSCONFIG_HEADER + 1] = { + [ETHTOOL_A_TSCONFIG_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int tsconfig_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + const struct genl_info *info) +{ + struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + struct hwtstamp_provider *hwprov = NULL; + struct net_device *dev = reply_base->dev; + struct kernel_hwtstamp_config cfg = {}; + int ret; + + if (!dev->netdev_ops->ndo_hwtstamp_get) + return -EOPNOTSUPP; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = dev_get_hwtstamp_phylib(dev, &cfg); + if (ret) + goto out; + + data->hwtst_config.tx_type = BIT(cfg.tx_type); + data->hwtst_config.rx_filter = BIT(cfg.rx_filter); + data->hwtst_config.flags = BIT(cfg.flags); + + data->hwprov_desc.index = -1; + hwprov = rtnl_dereference(dev->hwprov); + if (hwprov) { + data->hwprov_desc.index = hwprov->desc.index; + data->hwprov_desc.qualifier = hwprov->desc.qualifier; + } else { + struct kernel_ethtool_ts_info ts_info = {}; + + ts_info.phc_index = -1; + ret = __ethtool_get_ts_info(dev, &ts_info); + if (ret) + goto out; + + if (ts_info.phc_index == -1) + return -ENODEV; + + data->hwprov_desc.index = ts_info.phc_index; + data->hwprov_desc.qualifier = ts_info.phc_qualifier; + } + +out: + ethnl_ops_complete(dev); + return ret; +} + +static int tsconfig_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int len = 0; + int ret; + + BUILD_BUG_ON(__HWTSTAMP_TX_CNT > 32); + BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT > 32); + + if (data->hwtst_config.flags) + /* _TSCONFIG_HWTSTAMP_FLAGS */ + len += nla_total_size(sizeof(u32)); + + if (data->hwtst_config.tx_type) { + ret = ethnl_bitset32_size(&data->hwtst_config.tx_type, + NULL, __HWTSTAMP_TX_CNT, + ts_tx_type_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_TX_TYPES */ + } + if (data->hwtst_config.rx_filter) { + ret = ethnl_bitset32_size(&data->hwtst_config.rx_filter, + NULL, __HWTSTAMP_FILTER_CNT, + ts_rx_filter_names, compact); + if (ret < 0) + return ret; + len += ret; /* _TSCONFIG_RX_FILTERS */ + } + + if (data->hwprov_desc.index >= 0) + /* _TSCONFIG_HWTSTAMP_PROVIDER */ + len += nla_total_size(0) + + 2 * nla_total_size(sizeof(u32)); + + return len; +} + +static int tsconfig_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct tsconfig_reply_data *data = TSCONFIG_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int ret; + + if (data->hwtst_config.flags) { + ret = nla_put_u32(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS, + data->hwtst_config.flags); + if (ret < 0) + return ret; + } + + if (data->hwtst_config.tx_type) { + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_TX_TYPES, + &data->hwtst_config.tx_type, NULL, + __HWTSTAMP_TX_CNT, + ts_tx_type_names, compact); + if (ret < 0) + return ret; + } + + if (data->hwtst_config.rx_filter) { + ret = ethnl_put_bitset32(skb, ETHTOOL_A_TSCONFIG_RX_FILTERS, + &data->hwtst_config.rx_filter, + NULL, __HWTSTAMP_FILTER_CNT, + ts_rx_filter_names, compact); + if (ret < 0) + return ret; + } + + if (data->hwprov_desc.index >= 0) { + struct nlattr *nest; + + nest = nla_nest_start(skb, ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, ETHTOOL_A_TS_HWTSTAMP_PROVIDER_INDEX, + data->hwprov_desc.index) || + nla_put_u32(skb, + ETHTOOL_A_TS_HWTSTAMP_PROVIDER_QUALIFIER, + data->hwprov_desc.qualifier)) { + nla_nest_cancel(skb, nest); + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } + return 0; +} + +/* TSCONFIG_SET */ +const struct nla_policy ethnl_tsconfig_set_policy[ETHTOOL_A_TSCONFIG_MAX + 1] = { + [ETHTOOL_A_TSCONFIG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER] = + NLA_POLICY_NESTED(ethnl_ts_hwtst_prov_policy), + [ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS] = { .type = NLA_U32 }, + [ETHTOOL_A_TSCONFIG_RX_FILTERS] = { .type = NLA_NESTED }, + [ETHTOOL_A_TSCONFIG_TX_TYPES] = { .type = NLA_NESTED }, +}; + +static int tsconfig_send_reply(struct net_device *dev, struct genl_info *info) +{ + struct tsconfig_reply_data *reply_data; + struct tsconfig_req_info *req_info; + struct sk_buff *rskb; + void *reply_payload; + int reply_len = 0; + int ret; + + req_info = kzalloc(sizeof(*req_info), GFP_KERNEL); + if (!req_info) + return -ENOMEM; + reply_data = kmalloc(sizeof(*reply_data), GFP_KERNEL); + if (!reply_data) { + kfree(req_info); + return -ENOMEM; + } + + ASSERT_RTNL(); + reply_data->base.dev = dev; + ret = tsconfig_prepare_data(&req_info->base, &reply_data->base, info); + if (ret < 0) + goto err_cleanup; + + ret = tsconfig_reply_size(&req_info->base, &reply_data->base); + if (ret < 0) + goto err_cleanup; + + reply_len = ret + ethnl_reply_header_size(); + rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_TSCONFIG_SET_REPLY, + ETHTOOL_A_TSCONFIG_HEADER, info, &reply_payload); + if (!rskb) + goto err_cleanup; + + ret = tsconfig_fill_reply(rskb, &req_info->base, &reply_data->base); + if (ret < 0) + goto err_cleanup; + + genlmsg_end(rskb, reply_payload); + ret = genlmsg_reply(rskb, info); + +err_cleanup: + kfree(reply_data); + kfree(req_info); + return ret; +} + +static int ethnl_set_tsconfig_validate(struct ethnl_req_info *req_base, + struct genl_info *info) +{ + const struct net_device_ops *ops = req_base->dev->netdev_ops; + + if (!ops->ndo_hwtstamp_set || !ops->ndo_hwtstamp_get) + return -EOPNOTSUPP; + + return 1; +} + +static struct hwtstamp_provider * +tsconfig_set_hwprov_from_desc(struct net_device *dev, + struct genl_info *info, + struct hwtstamp_provider_desc *hwprov_desc) +{ + struct kernel_ethtool_ts_info ts_info; + struct hwtstamp_provider *hwprov; + struct nlattr **tb = info->attrs; + struct phy_device *phy = NULL; + enum hwtstamp_source source; + int ret; + + ret = ethtool_net_get_ts_info_by_phc(dev, &ts_info, hwprov_desc); + if (!ret) { + /* Found */ + source = HWTSTAMP_SOURCE_NETDEV; + } else { + phy = ethtool_phy_get_ts_info_by_phc(dev, &ts_info, hwprov_desc); + if (IS_ERR(phy)) { + if (PTR_ERR(phy) == -ENODEV) + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER], + "phc not in this net device topology"); + return ERR_CAST(phy); + } + + source = HWTSTAMP_SOURCE_PHYLIB; + } + + hwprov = kzalloc(sizeof(*hwprov), GFP_KERNEL); + if (!hwprov) + return ERR_PTR(-ENOMEM); + + hwprov->desc.index = hwprov_desc->index; + hwprov->desc.qualifier = hwprov_desc->qualifier; + hwprov->source = source; + hwprov->phydev = phy; + + return hwprov; +} + +static int ethnl_set_tsconfig(struct ethnl_req_info *req_base, + struct genl_info *info) +{ + struct kernel_hwtstamp_config hwtst_config = {0}; + bool hwprov_mod = false, config_mod = false; + struct hwtstamp_provider *hwprov = NULL; + struct net_device *dev = req_base->dev; + struct nlattr **tb = info->attrs; + int ret; + + BUILD_BUG_ON(__HWTSTAMP_TX_CNT >= 32); + BUILD_BUG_ON(__HWTSTAMP_FILTER_CNT >= 32); + + if (!netif_device_present(dev)) + return -ENODEV; + + if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER]) { + struct hwtstamp_provider_desc __hwprov_desc = {.index = -1}; + struct hwtstamp_provider *__hwprov; + + __hwprov = rtnl_dereference(dev->hwprov); + if (__hwprov) { + __hwprov_desc.index = __hwprov->desc.index; + __hwprov_desc.qualifier = __hwprov->desc.qualifier; + } + + ret = ts_parse_hwtst_provider(tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_PROVIDER], + &__hwprov_desc, info->extack, + &hwprov_mod); + if (ret < 0) + return ret; + + if (hwprov_mod) { + hwprov = tsconfig_set_hwprov_from_desc(dev, info, + &__hwprov_desc); + if (IS_ERR(hwprov)) + return PTR_ERR(hwprov); + } + } + + /* Get current hwtstamp config if we are not changing the + * hwtstamp source. It will be zeroed in the other case. + */ + if (!hwprov_mod) { + ret = dev_get_hwtstamp_phylib(dev, &hwtst_config); + if (ret < 0 && ret != -EOPNOTSUPP) + goto err_free_hwprov; + } + + /* Get the hwtstamp config from netlink */ + if (tb[ETHTOOL_A_TSCONFIG_TX_TYPES]) { + u32 req_tx_type; + + req_tx_type = BIT(hwtst_config.tx_type); + ret = ethnl_update_bitset32(&req_tx_type, + __HWTSTAMP_TX_CNT, + tb[ETHTOOL_A_TSCONFIG_TX_TYPES], + ts_tx_type_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; + + /* Select only one tx type at a time */ + if (ffs(req_tx_type) != fls(req_tx_type)) { + ret = -EINVAL; + goto err_free_hwprov; + } + + hwtst_config.tx_type = ffs(req_tx_type) - 1; + } + + if (tb[ETHTOOL_A_TSCONFIG_RX_FILTERS]) { + u32 req_rx_filter; + + req_rx_filter = BIT(hwtst_config.rx_filter); + ret = ethnl_update_bitset32(&req_rx_filter, + __HWTSTAMP_FILTER_CNT, + tb[ETHTOOL_A_TSCONFIG_RX_FILTERS], + ts_rx_filter_names, info->extack, + &config_mod); + if (ret < 0) + goto err_free_hwprov; + + /* Select only one rx filter at a time */ + if (ffs(req_rx_filter) != fls(req_rx_filter)) { + ret = -EINVAL; + goto err_free_hwprov; + } + + hwtst_config.rx_filter = ffs(req_rx_filter) - 1; + } + + if (tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS]) { + ethnl_update_u32(&hwtst_config.flags, + tb[ETHTOOL_A_TSCONFIG_HWTSTAMP_FLAGS], + &config_mod); + } + + ret = net_hwtstamp_validate(&hwtst_config); + if (ret) + goto err_free_hwprov; + + if (hwprov_mod) { + struct kernel_hwtstamp_config zero_config = {0}; + struct hwtstamp_provider *__hwprov; + + /* Disable current time stamping if we try to enable + * another one + */ + ret = dev_set_hwtstamp_phylib(dev, &zero_config, info->extack); + if (ret < 0) + goto err_free_hwprov; + + /* Change the selected hwtstamp source */ + __hwprov = rcu_replace_pointer_rtnl(dev->hwprov, hwprov); + if (__hwprov) + kfree_rcu(__hwprov, rcu_head); + } + + if (config_mod) { + ret = dev_set_hwtstamp_phylib(dev, &hwtst_config, + info->extack); + if (ret < 0) + return ret; + } + + if (hwprov_mod || config_mod) { + ret = tsconfig_send_reply(dev, info); + if (ret && ret != -EOPNOTSUPP) { + NL_SET_ERR_MSG(info->extack, + "error while reading the new configuration set"); + return ret; + } + } + + /* tsconfig has no notification */ + return 0; + +err_free_hwprov: + kfree(hwprov); + + return ret; +} + +const struct ethnl_request_ops ethnl_tsconfig_request_ops = { + .request_cmd = ETHTOOL_MSG_TSCONFIG_GET, + .reply_cmd = ETHTOOL_MSG_TSCONFIG_GET_REPLY, + .hdr_attr = ETHTOOL_A_TSCONFIG_HEADER, + .req_info_size = sizeof(struct tsconfig_req_info), + .reply_data_size = sizeof(struct tsconfig_reply_data), + + .prepare_data = tsconfig_prepare_data, + .reply_size = tsconfig_reply_size, + .fill_reply = tsconfig_fill_reply, + + .set_validate = ethnl_set_tsconfig_validate, + .set = ethnl_set_tsconfig, +}; -- cgit v1.2.3 From 87e2a15bc00840762b082399493597e8d3c1e42c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:20 +0000 Subject: f2fs: Convert submit tracepoints to take a folio Remove accesses to page->index and page->mapping as well as unnecessary calls to page_file_mapping(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 6 +++--- include/trace/events/f2fs.h | 30 +++++++++++++++--------------- 2 files changed, 18 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index a2478c2afb3a..7cb2272c723e 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -697,7 +697,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) META_GENERIC : DATA_GENERIC_ENHANCE))) return -EFSCORRUPTED; - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(page_folio(page), fio); /* Allocate a new bio */ bio = __bio_alloc(fio, 1); @@ -894,7 +894,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio) __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) return -EFSCORRUPTED; - trace_f2fs_submit_page_bio(page, fio); + trace_f2fs_submit_folio_bio(page_folio(page), fio); if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, fio->new_blkaddr)) @@ -1018,7 +1018,7 @@ alloc_new: io->last_block_in_bio = fio->new_blkaddr; - trace_f2fs_submit_page_write(fio->page, fio); + trace_f2fs_submit_folio_write(page_folio(fio->page), fio); #ifdef CONFIG_BLK_DEV_ZONED if (f2fs_sb_has_blkzoned(sbi) && btype < META && is_end_zone_blkaddr(sbi, fio->new_blkaddr)) { diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 2851c823095b..3253f45508e8 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1119,11 +1119,11 @@ TRACE_EVENT(f2fs_reserve_new_blocks, (unsigned long long)__entry->count) ); -DECLARE_EVENT_CLASS(f2fs__submit_page_bio, +DECLARE_EVENT_CLASS(f2fs__submit_folio_bio, - TP_PROTO(struct page *page, struct f2fs_io_info *fio), + TP_PROTO(struct folio *folio, struct f2fs_io_info *fio), - TP_ARGS(page, fio), + TP_ARGS(folio, fio), TP_STRUCT__entry( __field(dev_t, dev) @@ -1138,9 +1138,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ), TP_fast_assign( - __entry->dev = page_file_mapping(page)->host->i_sb->s_dev; - __entry->ino = page_file_mapping(page)->host->i_ino; - __entry->index = page->index; + __entry->dev = folio->mapping->host->i_sb->s_dev; + __entry->ino = folio->mapping->host->i_ino; + __entry->index = folio->index; __entry->old_blkaddr = fio->old_blkaddr; __entry->new_blkaddr = fio->new_blkaddr; __entry->op = fio->op; @@ -1149,7 +1149,7 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, __entry->type = fio->type; ), - TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " + TP_printk("dev = (%d,%d), ino = %lu, folio_index = 0x%lx, " "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s(%s), type = %s_%s", show_dev_ino(__entry), (unsigned long)__entry->index, @@ -1160,22 +1160,22 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, show_block_type(__entry->type)) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_bio, +DEFINE_EVENT_CONDITION(f2fs__submit_folio_bio, f2fs_submit_folio_bio, - TP_PROTO(struct page *page, struct f2fs_io_info *fio), + TP_PROTO(struct folio *folio, struct f2fs_io_info *fio), - TP_ARGS(page, fio), + TP_ARGS(folio, fio), - TP_CONDITION(page->mapping) + TP_CONDITION(folio->mapping) ); -DEFINE_EVENT_CONDITION(f2fs__submit_page_bio, f2fs_submit_page_write, +DEFINE_EVENT_CONDITION(f2fs__submit_folio_bio, f2fs_submit_folio_write, - TP_PROTO(struct page *page, struct f2fs_io_info *fio), + TP_PROTO(struct folio *folio, struct f2fs_io_info *fio), - TP_ARGS(page, fio), + TP_ARGS(folio, fio), - TP_CONDITION(page->mapping) + TP_CONDITION(folio->mapping) ); DECLARE_EVENT_CLASS(f2fs__bio, -- cgit v1.2.3 From c910a64bc4e21782959221b6ea2d6c4cce0506c7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 Nov 2024 04:58:26 +0000 Subject: f2fs: Remove calls to folio_file_mapping() All folios that f2fs sees belong to f2fs and not to the swapcache so it can dereference folio->mapping directly like all other filesystems do. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/inline.c | 2 +- include/trace/events/f2fs.h | 9 ++++----- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e68a4dd9bc17..0657f731d4b7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2455,7 +2455,7 @@ next_page: static int f2fs_read_data_folio(struct file *file, struct folio *folio) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; int ret = -EAGAIN; trace_f2fs_readpage(folio, DATA); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 005babf1bed1..cbd2a0d34804 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -81,7 +81,7 @@ bool f2fs_may_inline_dentry(struct inode *inode) void f2fs_do_read_inline_data(struct folio *folio, struct page *ipage) { - struct inode *inode = folio_file_mapping(folio)->host; + struct inode *inode = folio->mapping->host; if (folio_test_uptodate(folio)) return; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 3253f45508e8..eb3b2f1326b1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1322,12 +1322,11 @@ DECLARE_EVENT_CLASS(f2fs__folio, ), TP_fast_assign( - __entry->dev = folio_file_mapping(folio)->host->i_sb->s_dev; - __entry->ino = folio_file_mapping(folio)->host->i_ino; + __entry->dev = folio->mapping->host->i_sb->s_dev; + __entry->ino = folio->mapping->host->i_ino; __entry->type = type; - __entry->dir = - S_ISDIR(folio_file_mapping(folio)->host->i_mode); - __entry->index = folio_index(folio); + __entry->dir = S_ISDIR(folio->mapping->host->i_mode); + __entry->index = folio->index; __entry->dirty = folio_test_dirty(folio); __entry->uptodate = folio_test_uptodate(folio); ), -- cgit v1.2.3 From 828fd3f1d611cff5accb9fbff75f6bb011d8c9e2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Mon, 16 Dec 2024 09:32:17 +0100 Subject: Input: davinci-keyscan - remove leftover header The corresponding driver was removed two years ago but the platform data header was left behind. Remove it now. Fixes: 3c9cb34939fb ("input: remove davinci keyboard driver") Signed-off-by: Bartosz Golaszewski Acked-by: Arnd Bergmann Link: https://lore.kernel.org/r/20241216083218.22926-1-brgl@bgdev.pl Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/keyscan-davinci.h | 29 --------------------------- 1 file changed, 29 deletions(-) delete mode 100644 include/linux/platform_data/keyscan-davinci.h (limited to 'include') diff --git a/include/linux/platform_data/keyscan-davinci.h b/include/linux/platform_data/keyscan-davinci.h deleted file mode 100644 index 260d596ba0af..000000000000 --- a/include/linux/platform_data/keyscan-davinci.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2009 Texas Instruments, Inc - * - * Author: Miguel Aguilar - */ - -#ifndef DAVINCI_KEYSCAN_H -#define DAVINCI_KEYSCAN_H - -#include - -enum davinci_matrix_types { - DAVINCI_KEYSCAN_MATRIX_4X4, - DAVINCI_KEYSCAN_MATRIX_5X3, -}; - -struct davinci_ks_platform_data { - int (*device_enable)(struct device *dev); - unsigned short *keymap; - u32 keymapsize; - u8 rep:1; - u8 strobe; - u8 interval; - u8 matrix_type; -}; - -#endif - -- cgit v1.2.3 From 1e7381f3617d14b3c11da80ff5f8a93ab14cfc46 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 9 Oct 2024 08:04:50 -0700 Subject: KVM: Explicitly verify target vCPU is online in kvm_get_vcpu() Explicitly verify the target vCPU is fully online _prior_ to clamping the index in kvm_get_vcpu(). If the index is "bad", the nospec clamping will generate '0', i.e. KVM will return vCPU0 instead of NULL. In practice, the bug is unlikely to cause problems, as it will only come into play if userspace or the guest is buggy or misbehaving, e.g. KVM may send interrupts to vCPU0 instead of dropping them on the floor. However, returning vCPU0 when it shouldn't exist per online_vcpus is problematic now that KVM uses an xarray for the vCPUs array, as KVM needs to insert into the xarray before publishing the vCPU to userspace (see commit c5b077549136 ("KVM: Convert the kvm->vcpus array to a xarray")), i.e. before vCPU creation is guaranteed to succeed. As a result, incorrectly providing access to vCPU0 will trigger a use-after-free if vCPU0 is dereferenced and kvm_vm_ioctl_create_vcpu() bails out of vCPU creation due to an error and frees vCPU0. Commit afb2acb2e3a3 ("KVM: Fix vcpu_array[0] races") papered over that issue, but in doing so introduced an unsolvable teardown conundrum. Preventing accesses to vCPU0 before it's fully online will allow reverting commit afb2acb2e3a3, without re-introducing the vcpu_array[0] UAF race. Fixes: 1d487e9bf8ba ("KVM: fix spectrev1 gadgets") Cc: stable@vger.kernel.org Cc: Will Deacon Cc: Michal Luczaj Reviewed-by: Pankaj Gupta Acked-by: Will Deacon Link: https://lore.kernel.org/r/20241009150455.1057573-2-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 401439bb21e3..b0b38744c4b0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -963,6 +963,15 @@ static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { int num_vcpus = atomic_read(&kvm->online_vcpus); + + /* + * Explicitly verify the target vCPU is online, as the anti-speculation + * logic only limits the CPU's ability to speculate, e.g. given a "bad" + * index, clamping the index to 0 would return vCPU0, not NULL. + */ + if (i >= num_vcpus) + return NULL; + i = array_index_nospec(i, num_vcpus); /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu. */ -- cgit v1.2.3 From 0664dc74e9d004c36b4400081811df795169809a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 9 Oct 2024 08:04:51 -0700 Subject: KVM: Verify there's at least one online vCPU when iterating over all vCPUs Explicitly check that there is at least online vCPU before iterating over all vCPUs. Because the max index is an unsigned long, passing "0 - 1" in the online_vcpus==0 case results in xa_for_each_range() using an unlimited max, i.e. allows it to access vCPU0 when it shouldn't. This will allow KVM to safely _erase_ from vcpu_array if the last stages of vCPU creation fail, i.e. without generating a use-after-free if a different task happens to be concurrently iterating over all vCPUs. Note, because xa_for_each_range() is a macro, kvm_for_each_vcpu() subtly reloads online_vcpus after each iteration, i.e. adding an extra load doesn't meaningfully impact the total cost of iterating over all vCPUs. And because online_vcpus is never decremented, there is no risk of a reload triggering a walk of the entire xarray. Cc: Will Deacon Cc: Michal Luczaj Acked-by: Will Deacon Link: https://lore.kernel.org/r/20241009150455.1057573-3-seanjc@google.com Signed-off-by: Sean Christopherson --- include/linux/kvm_host.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b0b38744c4b0..92f192bec07b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -979,9 +979,10 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) return xa_load(&kvm->vcpu_array, i); } -#define kvm_for_each_vcpu(idx, vcpup, kvm) \ - xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ - (atomic_read(&kvm->online_vcpus) - 1)) +#define kvm_for_each_vcpu(idx, vcpup, kvm) \ + if (atomic_read(&kvm->online_vcpus)) \ + xa_for_each_range(&kvm->vcpu_array, idx, vcpup, 0, \ + (atomic_read(&kvm->online_vcpus) - 1)) static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) { -- cgit v1.2.3 From 3a3f61ce5e0b4bcf730acc09c1af91012d241f85 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Nov 2024 20:06:55 -0800 Subject: exec: Make sure task->comm is always NUL-terminated Using strscpy() meant that the final character in task->comm may be non-NUL for a moment before the "string too long" truncation happens. Instead of adding a new use of the ambiguous strncpy(), we'd want to use memtostr_pad() which enforces being able to check at compile time that sizes are sensible, but this requires being able to see string buffer lengths. Instead of trying to inline __set_task_comm() (which needs to call trace and perf functions), just open-code it. But to make sure we're always safe, add compile-time checking like we already do for get_task_comm(). Suggested-by: Linus Torvalds Suggested-by: "Eric W. Biederman" Signed-off-by: Kees Cook --- fs/exec.c | 12 ++++++------ include/linux/sched.h | 9 ++++----- io_uring/io-wq.c | 2 +- io_uring/sqpoll.c | 2 +- kernel/kthread.c | 3 ++- 5 files changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index e0435b31a811..5f16500ac325 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1200,16 +1200,16 @@ char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk) EXPORT_SYMBOL_GPL(__get_task_comm); /* - * These functions flushes out all traces of the currently running executable - * so that a new one can be started + * This is unlocked -- the string will always be NUL-terminated, but + * may show overlapping contents if racing concurrent reads. */ - void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { - task_lock(tsk); + size_t len = min(strlen(buf), sizeof(tsk->comm) - 1); + trace_task_rename(tsk, buf); - strscpy_pad(tsk->comm, buf, sizeof(tsk->comm)); - task_unlock(tsk); + memcpy(tsk->comm, buf, len); + memset(&tsk->comm[len], 0, sizeof(tsk->comm) - len); perf_event_comm(tsk, exec); } diff --git a/include/linux/sched.h b/include/linux/sched.h index e6ee4258169a..ac9f429ddc17 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1932,11 +1932,10 @@ static inline void kick_process(struct task_struct *tsk) { } #endif extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); - -static inline void set_task_comm(struct task_struct *tsk, const char *from) -{ - __set_task_comm(tsk, from, false); -} +#define set_task_comm(tsk, from) ({ \ + BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ + __set_task_comm(tsk, from, false); \ +}) extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); #define get_task_comm(buf, tsk) ({ \ diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index a38f36b68060..5d0928f37471 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -634,7 +634,7 @@ static int io_wq_worker(void *data) struct io_wq_acct *acct = io_wq_get_acct(worker); struct io_wq *wq = worker->wq; bool exit_mask = false, last_timeout = false; - char buf[TASK_COMM_LEN]; + char buf[TASK_COMM_LEN] = {}; set_mask_bits(&worker->flags, 0, BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING)); diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index a26593979887..90011f06c7fb 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -271,7 +271,7 @@ static int io_sq_thread(void *data) struct io_ring_ctx *ctx; struct rusage start; unsigned long timeout = 0; - char buf[TASK_COMM_LEN]; + char buf[TASK_COMM_LEN] = {}; DEFINE_WAIT(wait); /* offload context creation failed, just exit */ diff --git a/kernel/kthread.c b/kernel/kthread.c index db4ceb0f503c..162d55811744 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -736,10 +736,11 @@ EXPORT_SYMBOL(kthread_stop_put); int kthreadd(void *unused) { + static const char comm[TASK_COMM_LEN] = "kthreadd"; struct task_struct *tsk = current; /* Setup a clean context for our children to inherit. */ - set_task_comm(tsk, "kthreadd"); + set_task_comm(tsk, comm); ignore_signals(tsk); set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD)); set_mems_allowed(node_states[N_MEMORY]); -- cgit v1.2.3 From 543841d1806029889c2f69f040e88b247aba8e22 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 21 Nov 2024 07:07:05 -0800 Subject: exec: fix up /proc/pid/comm in the execveat(AT_EMPTY_PATH) case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Zbigniew mentioned at Linux Plumber's that systemd is interested in switching to execveat() for service execution, but can't, because the contents of /proc/pid/comm are the file descriptor which was used, instead of the path to the binary[1]. This makes the output of tools like top and ps useless, especially in a world where most fds are opened CLOEXEC so the number is truly meaningless. When the filename passed in is empty (e.g. with AT_EMPTY_PATH), use the dentry's filename for "comm" instead of using the useless numeral from the synthetic fdpath construction. This way the actual exec machinery is unchanged, but cosmetically the comm looks reasonable to admins investigating things. Instead of adding TASK_COMM_LEN more bytes to bprm, use one of the unused flag bits to indicate that we need to set "comm" from the dentry. Suggested-by: Zbigniew Jędrzejewski-Szmek Suggested-by: Tycho Andersen Suggested-by: Al Viro Suggested-by: Linus Torvalds Link: https://github.com/uapi-group/kernel-features#set-comm-field-before-exec [1] Reviewed-by: Aleksa Sarai Tested-by: Zbigniew Jędrzejewski-Szmek Signed-off-by: Kees Cook --- fs/exec.c | 29 ++++++++++++++++++++++++++--- include/linux/binfmts.h | 4 +++- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 5f16500ac325..1843366be6ff 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1347,7 +1347,28 @@ int begin_new_exec(struct linux_binprm * bprm) set_dumpable(current->mm, SUID_DUMP_USER); perf_event_exec(); - __set_task_comm(me, kbasename(bprm->filename), true); + + /* + * If the original filename was empty, alloc_bprm() made up a path + * that will probably not be useful to admins running ps or similar. + * Let's fix it up to be something reasonable. + */ + if (bprm->comm_from_dentry) { + /* + * Hold RCU lock to keep the name from being freed behind our back. + * Use acquire semantics to make sure the terminating NUL from + * __d_alloc() is seen. + * + * Note, we're deliberately sloppy here. We don't need to care about + * detecting a concurrent rename and just want a terminated name. + */ + rcu_read_lock(); + __set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name), + true); + rcu_read_unlock(); + } else { + __set_task_comm(me, kbasename(bprm->filename), true); + } /* An exec changes our domain. We are no longer part of the thread group */ @@ -1521,11 +1542,13 @@ static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int fl if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { - if (filename->name[0] == '\0') + if (filename->name[0] == '\0') { bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); - else + bprm->comm_from_dentry = 1; + } else { bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", fd, filename->name); + } if (!bprm->fdpath) goto out_free; diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index e6c00e860951..3305c849abd6 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -42,7 +42,9 @@ struct linux_binprm { * Set when errors can no longer be returned to the * original userspace. */ - point_of_no_return:1; + point_of_no_return:1, + /* Set when "comm" must come from the dentry. */ + comm_from_dentry:1; struct file *executable; /* Executable to pass to the interpreter */ struct file *interpreter; struct file *file; -- cgit v1.2.3 From a32f3e9d1ed146f81162702605d65447a319eb76 Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:55 +0100 Subject: sock: support SO_PRIORITY cmsg The Linux socket API currently allows setting SO_PRIORITY at the socket level, applying a uniform priority to all packets sent through that socket. The exception to this is IP_TOS, when the priority value is calculated during the handling of ancillary data, as implemented in commit f02db315b8d8 ("ipv4: IP_TOS and IP_TTL can be specified as ancillary data"). However, this is a computed value, and there is currently no mechanism to set a custom priority via control messages prior to this patch. According to this patch, if SO_PRIORITY is specified as ancillary data, the packet is sent with the priority value set through sockc->priority, overriding the socket-level values set via the traditional setsockopt() method. This is analogous to the existing support for SO_MARK, as implemented in commit c6af0c227a22 ("ip: support SO_MARK cmsg"). If both cmsg SO_PRIORITY and IP_TOS are passed, then the one that takes precedence is the last one in the cmsg list. This patch has the side effect that raw_send_hdrinc now interprets cmsg IP_TOS. Reviewed-by: Willem de Bruijn Suggested-by: Ferenc Fejes Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-3-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- include/net/inet_sock.h | 2 +- include/net/ip.h | 2 +- include/net/sock.h | 4 +++- net/can/raw.c | 2 +- net/core/sock.c | 7 +++++++ net/ipv4/ip_output.c | 4 ++-- net/ipv4/ip_sockglue.c | 2 +- net/ipv4/raw.c | 2 +- net/ipv6/ip6_output.c | 3 ++- net/ipv6/ping.c | 1 + net/ipv6/raw.c | 3 ++- net/ipv6/udp.c | 1 + net/packet/af_packet.c | 2 +- 13 files changed, 24 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 56d8bc5593d3..3ccbad881d74 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -172,7 +172,7 @@ struct inet_cork { u8 tx_flags; __u8 ttl; __s16 tos; - char priority; + u32 priority; __u16 gso_size; u32 ts_opt_id; u64 transmit_time; diff --git a/include/net/ip.h b/include/net/ip.h index 0e548c1f2a0e..9f5e33e371fc 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -81,7 +81,6 @@ struct ipcm_cookie { __u8 protocol; __u8 ttl; __s16 tos; - char priority; __u16 gso_size; }; @@ -96,6 +95,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm_init(ipcm); ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark); + ipcm->sockc.priority = READ_ONCE(inet->sk.sk_priority); ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags); ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; diff --git a/include/net/sock.h b/include/net/sock.h index 7464e9f9f47c..316a34d6c48b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1814,13 +1814,15 @@ struct sockcm_cookie { u32 mark; u32 tsflags; u32 ts_opt_id; + u32 priority; }; static inline void sockcm_init(struct sockcm_cookie *sockc, const struct sock *sk) { *sockc = (struct sockcm_cookie) { - .tsflags = READ_ONCE(sk->sk_tsflags) + .tsflags = READ_ONCE(sk->sk_tsflags), + .priority = READ_ONCE(sk->sk_priority), }; } diff --git a/net/can/raw.c b/net/can/raw.c index 255c0a8f39d6..46e8ed9d64da 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -962,7 +962,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } skb->dev = dev; - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc.priority; skb->mark = READ_ONCE(sk->sk_mark); skb->tstamp = sockc.transmit_time; diff --git a/net/core/sock.c b/net/core/sock.c index 9016f984d44e..a3d9941c1d32 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2947,6 +2947,13 @@ int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, case SCM_RIGHTS: case SCM_CREDENTIALS: break; + case SO_PRIORITY: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) + return -EINVAL; + if (!sk_set_prio_allowed(sk, *(u32 *)CMSG_DATA(cmsg))) + return -EPERM; + sockc->priority = *(u32 *)CMSG_DATA(cmsg); + break; default: return -EINVAL; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a59204a8d850..f45a083f2c13 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1333,7 +1333,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->ttl = ipc->ttl; cork->tos = ipc->tos; cork->mark = ipc->sockc.mark; - cork->priority = ipc->priority; + cork->priority = ipc->sockc.priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; sock_tx_timestamp(sk, &ipc->sockc, &cork->tx_flags); @@ -1470,7 +1470,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, ip_options_build(skb, opt, cork->addr, rt); } - skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority); + skb->priority = cork->priority; skb->mark = cork->mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->transmit_time, SKB_CLOCK_MONOTONIC); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cf377377b52d..f6a03b418dde 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -315,7 +315,7 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; - ipc->priority = rt_tos2priority(ipc->tos); + ipc->sockc.priority = rt_tos2priority(ipc->tos); break; case IP_PROTOCOL: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0e9e01967ec9..4304a68d1db0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -358,7 +358,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IP); - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); skb_dst_set(skb, &rt->dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3d672dea9f56..993106876604 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1401,6 +1401,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; + cork->base.priority = ipc6->sockc.priority; sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->base.flags |= IPCORK_TS_OPT_ID; @@ -1942,7 +1943,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = cork->base.priority; skb->mark = cork->base.mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 88b3fcacd4f9..46b8adf6e7f8 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -119,6 +119,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) return -EINVAL; ipcm6_init_sk(&ipc6, sk); + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8476a3944a88..a45aba090aa4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -619,7 +619,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb_reserve(skb, hlen); skb->protocol = htons(ETH_P_IPV6); - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc->priority; skb->mark = sockc->mark; skb_set_delivery_type_by_clockid(skb, sockc->transmit_time, sk->sk_clockid); @@ -780,6 +780,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = fl6.flowi6_mark; + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); if (sin6) { if (addr_len < SIN6_LEN_RFC2133) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d766fd798ecf..7c14c449804c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1448,6 +1448,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipc6.gso_size = READ_ONCE(up->gso_size); ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags); ipc6.sockc.mark = READ_ONCE(sk->sk_mark); + ipc6.sockc.priority = READ_ONCE(sk->sk_priority); /* destination address check */ if (sin6) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 886c0dd47b66..f8d87d622699 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3126,7 +3126,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->protocol = proto; skb->dev = dev; - skb->priority = READ_ONCE(sk->sk_priority); + skb->priority = sockc.priority; skb->mark = sockc.mark; skb_set_delivery_type_by_clockid(skb, sockc.transmit_time, sk->sk_clockid); -- cgit v1.2.3 From e45469e594b255ef8d750ed5576698743450d2ac Mon Sep 17 00:00:00 2001 From: Anna Emese Nyiri Date: Fri, 13 Dec 2024 09:44:57 +0100 Subject: sock: Introduce SO_RCVPRIORITY socket option Add new socket option, SO_RCVPRIORITY, to include SO_PRIORITY in the ancillary data returned by recvmsg(). This is analogous to the existing support for SO_RCVMARK, as implemented in commit 6fd1d51cfa253 ("net: SO_RCVMARK socket option for SO_MARK with recvmsg()"). Reviewed-by: Willem de Bruijn Suggested-by: Ferenc Fejes Signed-off-by: Anna Emese Nyiri Link: https://patch.msgid.link/20241213084457.45120-5-annaemesenyiri@gmail.com Signed-off-by: Jakub Kicinski --- arch/alpha/include/uapi/asm/socket.h | 2 ++ arch/mips/include/uapi/asm/socket.h | 2 ++ arch/parisc/include/uapi/asm/socket.h | 2 ++ arch/sparc/include/uapi/asm/socket.h | 2 ++ include/net/sock.h | 4 +++- include/uapi/asm-generic/socket.h | 2 ++ net/core/sock.c | 8 ++++++++ net/socket.c | 11 +++++++++++ tools/include/uapi/asm-generic/socket.h | 2 ++ 9 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 302507bf9b5d..3df5f2dd4c0f 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -148,6 +148,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index d118d4731580..22fa8f19924a 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -159,6 +159,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index d268d69bfcd2..aa9cd4b951fe 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -140,6 +140,8 @@ #define SCM_TS_OPT_ID 0x404C +#define SO_RCVPRIORITY 0x404D + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 113cd9f353e3..5b464a568664 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -141,6 +141,8 @@ #define SCM_TS_OPT_ID 0x005a +#define SO_RCVPRIORITY 0x005b + #if !defined(__KERNEL__) diff --git a/include/net/sock.h b/include/net/sock.h index 316a34d6c48b..d4bdd3286e03 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -953,6 +953,7 @@ enum sock_flags { SOCK_XDP, /* XDP is attached */ SOCK_TSTAMP_NEW, /* Indicates 64 bit timestamps always */ SOCK_RCVMARK, /* Receive SO_MARK ancillary data with packet */ + SOCK_RCVPRIORITY, /* Receive SO_PRIORITY ancillary data with packet */ }; #define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)) @@ -2660,7 +2661,8 @@ static inline void sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, { #define FLAGS_RECV_CMSGS ((1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_RCVTSTAMP) | \ - (1UL << SOCK_RCVMARK)) + (1UL << SOCK_RCVMARK) |\ + (1UL << SOCK_RCVPRIORITY)) #define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \ SOF_TIMESTAMPING_RAW_HARDWARE) diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index deacfd6dd197..aa5016ff3d91 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -143,6 +143,8 @@ #define SCM_TS_OPT_ID 81 +#define SO_RCVPRIORITY 82 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/net/core/sock.c b/net/core/sock.c index a3d9941c1d32..e7bcc8952248 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1519,6 +1519,10 @@ set_sndbuf: sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; + case SO_RCVPRIORITY: + sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool); + break; + case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; @@ -1947,6 +1951,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname, v.val = sock_flag(sk, SOCK_RCVMARK); break; + case SO_RCVPRIORITY: + v.val = sock_flag(sk, SOCK_RCVPRIORITY); + break; + case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; diff --git a/net/socket.c b/net/socket.c index 9a117248f18f..16402b8be5a7 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1008,12 +1008,23 @@ static void sock_recv_mark(struct msghdr *msg, struct sock *sk, } } +static void sock_recv_priority(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + if (sock_flag(sk, SOCK_RCVPRIORITY) && skb) { + __u32 priority = skb->priority; + + put_cmsg(msg, SOL_SOCKET, SO_PRIORITY, sizeof(__u32), &priority); + } +} + void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); sock_recv_mark(msg, sk, skb); + sock_recv_priority(msg, sk, skb); } EXPORT_SYMBOL_GPL(__sock_recv_cmsgs); diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 281df9139d2b..ffff554a5230 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -126,6 +126,8 @@ #define SCM_TS_OPT_ID 78 +#define SO_RCVPRIORITY 79 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -- cgit v1.2.3 From 1c9eb9e684c606982c5dba019c58eaed5474c3c0 Mon Sep 17 00:00:00 2001 From: Dharma Balasubiramani Date: Fri, 6 Dec 2024 12:59:52 -0700 Subject: dt-bindings: clock: Add SAMA7D65 PMC compatible string Add the `microchip,sama7d65-pmc` compatible string to the existing binding, since the SAMA7D65 PMC shares the same properties and clock requirements as the SAMA7G5. Export MCK3 and MCK5 to be accessed and referenced in DT to assign to the clocks property for sama7d65 SoC. Signed-off-by: Dharma Balasubiramani Signed-off-by: Ryan Wanner Reviewed-by: Claudiu Beznea Acked-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/5252a28531deaee67af1edd8e72d45ca57783464.1733505542.git.Ryan.Wanner@microchip.com [claudiu.beznea: use tabs instead of spaces in include/dt-bindings/clock/at91.h] Signed-off-by: Claudiu Beznea --- Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml | 2 ++ include/dt-bindings/clock/at91.h | 4 ++++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml b/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml index c9eb60776b4d..885d47dd5724 100644 --- a/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml +++ b/Documentation/devicetree/bindings/clock/atmel,at91rm9200-pmc.yaml @@ -43,6 +43,7 @@ properties: - atmel,sama5d4-pmc - microchip,sam9x60-pmc - microchip,sam9x7-pmc + - microchip,sama7d65-pmc - microchip,sama7g5-pmc - const: syscon @@ -90,6 +91,7 @@ allOf: enum: - microchip,sam9x60-pmc - microchip,sam9x7-pmc + - microchip,sama7d65-pmc - microchip,sama7g5-pmc then: properties: diff --git a/include/dt-bindings/clock/at91.h b/include/dt-bindings/clock/at91.h index 99f4767ff6bb..f2a7b7d39c0d 100644 --- a/include/dt-bindings/clock/at91.h +++ b/include/dt-bindings/clock/at91.h @@ -42,6 +42,10 @@ #define PMC_PLLADIV2 (PMC_MAIN + 11) #define PMC_LVDSPLL (PMC_MAIN + 12) +/* SAMA7D65 */ +#define PMC_MCK3 (PMC_MAIN + 13) +#define PMC_MCK5 (PMC_MAIN + 14) + #ifndef AT91_PMC_MOSCS #define AT91_PMC_MOSCS 0 /* MOSCS Flag */ #define AT91_PMC_LOCKA 1 /* PLLA Lock */ -- cgit v1.2.3 From c220e216d6bcd52cc7333e38edf43dc66ba0dd13 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Fri, 29 Nov 2024 14:38:04 +0100 Subject: exportfs: add permission method This allows filesystems such as pidfs to provide their custom permission checks. Link: https://lore.kernel.org/r/20241129-work-pidfs-file_handle-v1-5-87d803a42495@kernel.org Reviewed-by: Amir Goldstein Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- fs/fhandle.c | 35 ++++++++++++++--------------------- include/linux/exportfs.h | 17 ++++++++++++++++- 2 files changed, 30 insertions(+), 22 deletions(-) (limited to 'include') diff --git a/fs/fhandle.c b/fs/fhandle.c index e17029b1dc44..3e092ae6d142 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -187,17 +187,6 @@ static int get_path_from_fd(int fd, struct path *root) return 0; } -enum handle_to_path_flags { - HANDLE_CHECK_PERMS = (1 << 0), - HANDLE_CHECK_SUBTREE = (1 << 1), -}; - -struct handle_to_path_ctx { - struct path root; - enum handle_to_path_flags flags; - unsigned int fh_flags; -}; - static int vfs_dentry_acceptable(void *context, struct dentry *dentry) { struct handle_to_path_ctx *ctx = context; @@ -279,13 +268,13 @@ static int do_handle_to_path(struct file_handle *handle, struct path *path, return 0; } -static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, - unsigned int o_flags) +static inline int may_decode_fh(struct handle_to_path_ctx *ctx, + unsigned int o_flags) { struct path *root = &ctx->root; if (capable(CAP_DAC_READ_SEARCH)) - return true; + return 0; /* * Allow relaxed permissions of file handles if the caller has @@ -309,7 +298,7 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, * There's only one dentry for each directory inode (VFS rule)... */ if (!(o_flags & O_DIRECTORY)) - return false; + return -EPERM; if (ns_capable(root->mnt->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) ctx->flags = HANDLE_CHECK_PERMS; @@ -319,14 +308,14 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, !has_locked_children(real_mount(root->mnt), root->dentry)) ctx->flags = HANDLE_CHECK_PERMS | HANDLE_CHECK_SUBTREE; else - return false; + return -EPERM; /* Are we able to override DAC permissions? */ if (!ns_capable(current_user_ns(), CAP_DAC_READ_SEARCH)) - return false; + return -EPERM; ctx->fh_flags = EXPORT_FH_DIR_ONLY; - return true; + return 0; } static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, @@ -336,15 +325,19 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, struct file_handle f_handle; struct file_handle *handle = NULL; struct handle_to_path_ctx ctx = {}; + const struct export_operations *eops; retval = get_path_from_fd(mountdirfd, &ctx.root); if (retval) goto out_err; - if (!may_decode_fh(&ctx, o_flags)) { - retval = -EPERM; + eops = ctx.root.mnt->mnt_sb->s_export_op; + if (eops && eops->permission) + retval = eops->permission(&ctx, o_flags); + else + retval = may_decode_fh(&ctx, o_flags); + if (retval) goto out_path; - } if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { retval = -EFAULT; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index c69b79b64466..a087606ace19 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -3,6 +3,7 @@ #define LINUX_EXPORTFS_H 1 #include +#include struct dentry; struct iattr; @@ -10,7 +11,6 @@ struct inode; struct iomap; struct super_block; struct vfsmount; -struct path; /* limit the handle size to NFSv4 handle size now */ #define MAX_HANDLE_SZ 128 @@ -157,6 +157,17 @@ struct fid { }; }; +enum handle_to_path_flags { + HANDLE_CHECK_PERMS = (1 << 0), + HANDLE_CHECK_SUBTREE = (1 << 1), +}; + +struct handle_to_path_ctx { + struct path root; + enum handle_to_path_flags flags; + unsigned int fh_flags; +}; + #define EXPORT_FH_CONNECTABLE 0x1 /* Encode file handle with parent */ #define EXPORT_FH_FID 0x2 /* File handle may be non-decodeable */ #define EXPORT_FH_DIR_ONLY 0x4 /* Only decode file handle for a directory */ @@ -226,6 +237,9 @@ struct fid { * is also a directory. In the event that it cannot be found, or storage * space cannot be allocated, a %ERR_PTR should be returned. * + * permission: + * Allow filesystems to specify a custom permission function. + * * open: * Allow filesystems to specify a custom open function. * @@ -255,6 +269,7 @@ struct export_operations { bool write, u32 *device_generation); int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, int nr_iomaps, struct iattr *iattr); + int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); struct file * (*open)(struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ #define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */ -- cgit v1.2.3 From 16ecd47cb0cd895c7c2f5dd5db50f6c005c51639 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sat, 14 Dec 2024 22:01:28 +0100 Subject: pidfs: lookup pid through rbtree The new pid inode number allocation scheme is neat but I overlooked a possible, even though unlikely, attack that can be used to trigger an overflow on both 32bit and 64bit. An unique 64 bit identifier was constructed for each struct pid by two combining a 32 bit idr with a 32 bit generation number. A 32bit number was allocated using the idr_alloc_cyclic() infrastructure. When the idr wrapped around a 32 bit wraparound counter was incremented. The 32 bit wraparound counter served as the upper 32 bits and the allocated idr number as the lower 32 bits. Since the idr can only allocate up to INT_MAX entries everytime a wraparound happens INT_MAX - 1 entries are lost (Ignoring that numbering always starts at 2 to avoid theoretical collisions with the root inode number.). If userspace fully populates the idr such that and puts itself into control of two entries such that one entry is somewhere in the middle and the other entry is the INT_MAX entry then it is possible to overflow the wraparound counter. That is probably difficult to pull off but the mere possibility is annoying. The problem could be contained to 32 bit by switching to a data structure such as the maple tree that allows allocating 64 bit numbers on 64 bit machines. That would leave 32 bit in a lurch but that probably doesn't matter that much. The other problem is that removing entries form the maple tree is somewhat non-trivial because the removal code can be called under the irq write lock of tasklist_lock and irq{save,restore} code. Instead, allocate unique identifiers for struct pid by simply incrementing a 64 bit counter and insert each struct pid into the rbtree so it can be looked up to decode file handles avoiding to leak actual pids across pid namespaces in file handles. On both 64 bit and 32 bit the same 64 bit identifier is used to lookup struct pid in the rbtree. On 64 bit the unique identifier for struct pid simply becomes the inode number. Comparing two pidfds continues to be as simple as comparing inode numbers. On 32 bit the 64 bit number assigned to struct pid is split into two 32 bit numbers. The lower 32 bits are used as the inode number and the upper 32 bits are used as the inode generation number. Whenever a wraparound happens on 32 bit the 64 bit number will be incremented by 2 so inode numbering starts at 2 again. When a wraparound happens on 32 bit multiple pidfds with the same inode number are likely to exist. This isn't a problem since before pidfs pidfds used the anonymous inode meaning all pidfds had the same inode number. On 32 bit sserspace can thus reconstruct the 64 bit identifier by retrieving both the inode number and the inode generation number to compare, or use file handles. This gives the same guarantees on both 32 bit and 64 bit. Link: https://lore.kernel.org/r/20241214-gekoppelt-erdarbeiten-a1f9a982a5a6@brauner Signed-off-by: Christian Brauner --- fs/pidfs.c | 129 +++++++++++++++++++++++++++++++------------------- include/linux/pid.h | 2 + include/linux/pidfs.h | 2 +- kernel/pid.c | 6 +-- 4 files changed, 86 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/fs/pidfs.c b/fs/pidfs.c index fe10d2a126a2..c5a51c69acc8 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -24,18 +24,9 @@ #include "internal.h" #include "mount.h" -static DEFINE_IDR(pidfs_ino_idr); - -static u32 pidfs_ino_upper_32_bits = 0; +static struct rb_root pidfs_ino_tree = RB_ROOT; #if BITS_PER_LONG == 32 -/* - * On 32 bit systems the lower 32 bits are the inode number and - * the higher 32 bits are the generation number. The starting - * value for the inode number and the generation number is one. - */ -static u32 pidfs_ino_lower_32_bits = 1; - static inline unsigned long pidfs_ino(u64 ino) { return lower_32_bits(ino); @@ -49,52 +40,79 @@ static inline u32 pidfs_gen(u64 ino) #else -static u32 pidfs_ino_lower_32_bits = 0; - /* On 64 bit simply return ino. */ static inline unsigned long pidfs_ino(u64 ino) { return ino; } -/* On 64 bit the generation number is 1. */ +/* On 64 bit the generation number is 0. */ static inline u32 pidfs_gen(u64 ino) { - return 1; + return 0; } #endif -/* - * Construct an inode number for struct pid in a way that we can use the - * lower 32bit to lookup struct pid independent of any pid numbers that - * could be leaked into userspace (e.g., via file handle encoding). - */ -int pidfs_add_pid(struct pid *pid) +static int pidfs_ino_cmp(struct rb_node *a, const struct rb_node *b) { - u32 upper; - int lower; - - /* - * Inode numbering for pidfs start at 2. This avoids collisions - * with the root inode which is 1 for pseudo filesystems. - */ - lower = idr_alloc_cyclic(&pidfs_ino_idr, pid, 2, 0, GFP_ATOMIC); - if (lower >= 0 && lower < pidfs_ino_lower_32_bits) - pidfs_ino_upper_32_bits++; - upper = pidfs_ino_upper_32_bits; - pidfs_ino_lower_32_bits = lower; - if (lower < 0) - return lower; - - pid->ino = ((u64)upper << 32) | lower; - pid->stashed = NULL; + struct pid *pid_a = rb_entry(a, struct pid, pidfs_node); + struct pid *pid_b = rb_entry(b, struct pid, pidfs_node); + u64 pid_ino_a = pid_a->ino; + u64 pid_ino_b = pid_b->ino; + + if (pid_ino_a < pid_ino_b) + return -1; + if (pid_ino_a > pid_ino_b) + return 1; return 0; } -/* The idr number to remove is the lower 32 bits of the inode. */ +void pidfs_add_pid(struct pid *pid) +{ + static u64 pidfs_ino_nr = 2; + + /* + * On 64 bit nothing special happens. The 64bit number assigned + * to struct pid is the inode number. + * + * On 32 bit the 64 bit number assigned to struct pid is split + * into two 32 bit numbers. The lower 32 bits are used as the + * inode number and the upper 32 bits are used as the inode + * generation number. + * + * On 32 bit pidfs_ino() will return the lower 32 bit. When + * pidfs_ino() returns zero a wrap around happened. When a + * wraparound happens the 64 bit number will be incremented by 2 + * so inode numbering starts at 2 again. + * + * On 64 bit comparing two pidfds is as simple as comparing + * inode numbers. + * + * When a wraparound happens on 32 bit multiple pidfds with the + * same inode number are likely to exist (This isn't a problem + * since before pidfs pidfds used the anonymous inode meaning + * all pidfds had the same inode number.). Userspace can + * reconstruct the 64 bit identifier by retrieving both the + * inode number and the inode generation number to compare or + * use file handles. + */ + if (pidfs_ino(pidfs_ino_nr) == 0) + pidfs_ino_nr += 2; + + pid->ino = pidfs_ino_nr; + pid->stashed = NULL; + pidfs_ino_nr++; + + write_seqcount_begin(&pidmap_lock_seq); + rb_find_add_rcu(&pid->pidfs_node, &pidfs_ino_tree, pidfs_ino_cmp); + write_seqcount_end(&pidmap_lock_seq); +} + void pidfs_remove_pid(struct pid *pid) { - idr_remove(&pidfs_ino_idr, lower_32_bits(pid->ino)); + write_seqcount_begin(&pidmap_lock_seq); + rb_erase(&pid->pidfs_node, &pidfs_ino_tree); + write_seqcount_end(&pidmap_lock_seq); } #ifdef CONFIG_PROC_FS @@ -513,24 +531,37 @@ static int pidfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, return FILEID_KERNFS; } +static int pidfs_ino_find(const void *key, const struct rb_node *node) +{ + const u64 pid_ino = *(u64 *)key; + const struct pid *pid = rb_entry(node, struct pid, pidfs_node); + + if (pid_ino < pid->ino) + return -1; + if (pid_ino > pid->ino) + return 1; + return 0; +} + /* Find a struct pid based on the inode number. */ static struct pid *pidfs_ino_get_pid(u64 ino) { - unsigned long pid_ino = pidfs_ino(ino); - u32 gen = pidfs_gen(ino); struct pid *pid; + struct rb_node *node; + unsigned int seq; guard(rcu)(); - - pid = idr_find(&pidfs_ino_idr, lower_32_bits(pid_ino)); - if (!pid) + do { + seq = read_seqcount_begin(&pidmap_lock_seq); + node = rb_find_rcu(&ino, &pidfs_ino_tree, pidfs_ino_find); + if (node) + break; + } while (read_seqcount_retry(&pidmap_lock_seq, seq)); + + if (!node) return NULL; - if (pidfs_ino(pid->ino) != pid_ino) - return NULL; - - if (pidfs_gen(pid->ino) != gen) - return NULL; + pid = rb_entry(node, struct pid, pidfs_node); /* Within our pid namespace hierarchy? */ if (pid_vnr(pid) == 0) diff --git a/include/linux/pid.h b/include/linux/pid.h index a3aad9b4074c..fe575fcdb4af 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -59,6 +59,7 @@ struct pid spinlock_t lock; struct dentry *stashed; u64 ino; + struct rb_node pidfs_node; /* lists of tasks that use this pid */ struct hlist_head tasks[PIDTYPE_MAX]; struct hlist_head inodes; @@ -68,6 +69,7 @@ struct pid struct upid numbers[]; }; +extern seqcount_spinlock_t pidmap_lock_seq; extern struct pid init_struct_pid; struct file; diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 2958652bb108..df574d6708d4 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -4,7 +4,7 @@ struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); -int pidfs_add_pid(struct pid *pid); +void pidfs_add_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ diff --git a/kernel/pid.c b/kernel/pid.c index 58567d6904b2..aa2a7d4da455 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -103,6 +104,7 @@ EXPORT_SYMBOL_GPL(init_pid_ns); */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock); +seqcount_spinlock_t pidmap_lock_seq = SEQCNT_SPINLOCK_ZERO(pidmap_lock_seq, &pidmap_lock); void put_pid(struct pid *pid) { @@ -273,9 +275,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid, spin_lock_irq(&pidmap_lock); if (!(ns->pid_allocated & PIDNS_ADDING)) goto out_unlock; - retval = pidfs_add_pid(pid); - if (retval) - goto out_unlock; + pidfs_add_pid(pid); for ( ; upid >= pid->numbers; --upid) { /* Make the PID visible to find_pid_ns. */ idr_replace(&upid->ns->idr, pid, upid->nr); -- cgit v1.2.3 From 2102773c60787d97ce93e8aa6890b74f166edd35 Mon Sep 17 00:00:00 2001 From: Raviteja Laggyshetty Date: Wed, 4 Dec 2024 13:26:05 -0800 Subject: dt-bindings: interconnect: add interconnect bindings for SM8750 Add interconnect device bindings. These devices can be used to describe any RPMh and NoC based interconnect devices. Signed-off-by: Raviteja Laggyshetty Signed-off-by: Melody Olvera Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20241204-sm8750_master_interconnects-v3-1-3d9aad4200e9@quicinc.com Signed-off-by: Georgi Djakov --- .../bindings/interconnect/qcom,sm8750-rpmh.yaml | 136 ++++++++++++++++++++ .../dt-bindings/interconnect/qcom,sm8750-rpmh.h | 143 +++++++++++++++++++++ 2 files changed, 279 insertions(+) create mode 100644 Documentation/devicetree/bindings/interconnect/qcom,sm8750-rpmh.yaml create mode 100644 include/dt-bindings/interconnect/qcom,sm8750-rpmh.h (limited to 'include') diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sm8750-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sm8750-rpmh.yaml new file mode 100644 index 000000000000..a816acc301e1 --- /dev/null +++ b/Documentation/devicetree/bindings/interconnect/qcom,sm8750-rpmh.yaml @@ -0,0 +1,136 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interconnect/qcom,sm8750-rpmh.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm RPMh Network-On-Chip Interconnect on SM8750 + +maintainers: + - Abel Vesa + - Neil Armstrong + +description: | + RPMh interconnect providers support system bandwidth requirements through + RPMh hardware accelerators known as Bus Clock Manager (BCM). The provider is + able to communicate with the BCM through the Resource State Coordinator (RSC) + associated with each execution environment. Provider nodes must point to at + least one RPMh device child node pertaining to their RSC and each provider + can map to multiple RPMh resources. + + See also:: include/dt-bindings/interconnect/qcom,sm8750-rpmh.h + +properties: + compatible: + enum: + - qcom,sm8750-aggre1-noc + - qcom,sm8750-aggre2-noc + - qcom,sm8750-clk-virt + - qcom,sm8750-cnoc-main + - qcom,sm8750-config-noc + - qcom,sm8750-gem-noc + - qcom,sm8750-lpass-ag-noc + - qcom,sm8750-lpass-lpiaon-noc + - qcom,sm8750-lpass-lpicx-noc + - qcom,sm8750-mc-virt + - qcom,sm8750-mmss-noc + - qcom,sm8750-nsp-noc + - qcom,sm8750-pcie-anoc + - qcom,sm8750-system-noc + + reg: + maxItems: 1 + + clocks: + minItems: 1 + maxItems: 2 + +required: + - compatible + +allOf: + - $ref: qcom,rpmh-common.yaml# + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8750-clk-virt + - qcom,sm8750-mc-virt + then: + properties: + reg: false + else: + required: + - reg + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8750-pcie-anoc + then: + properties: + clocks: + items: + - description: aggre-NOC PCIe AXI clock + - description: cfg-NOC PCIe a-NOC AHB clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8750-aggre1-noc + then: + properties: + clocks: + items: + - description: aggre UFS PHY AXI clock + - description: aggre USB3 PRIM AXI clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8750-aggre2-noc + then: + properties: + clocks: + items: + - description: RPMH CC IPA clock + + - if: + properties: + compatible: + contains: + enum: + - qcom,sm8750-aggre1-noc + - qcom,sm8750-aggre2-noc + - qcom,sm8750-pcie-anoc + then: + required: + - clocks + else: + properties: + clocks: false + +unevaluatedProperties: false + +examples: + - | + clk_virt: interconnect-0 { + compatible = "qcom,sm8750-clk-virt"; + #interconnect-cells = <2>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; + + aggre1_noc: interconnect@16e0000 { + compatible = "qcom,sm8750-aggre1-noc"; + reg = <0x016e0000 0x16400>; + #interconnect-cells = <2>; + clocks = <&gcc_phy_axi_clk>, <&gcc_prim_axi_clk>; + qcom,bcm-voters = <&apps_bcm_voter>; + }; diff --git a/include/dt-bindings/interconnect/qcom,sm8750-rpmh.h b/include/dt-bindings/interconnect/qcom,sm8750-rpmh.h new file mode 100644 index 000000000000..30563952a646 --- /dev/null +++ b/include/dt-bindings/interconnect/qcom,sm8750-rpmh.h @@ -0,0 +1,143 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2024, Qualcomm Innovation Center, Inc. All rights reserved. + */ + +#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SM8750_H +#define __DT_BINDINGS_INTERCONNECT_QCOM_SM8750_H + +#define MASTER_QSPI_0 0 +#define MASTER_QUP_1 1 +#define MASTER_QUP_3 2 +#define MASTER_SDCC_4 3 +#define MASTER_UFS_MEM 4 +#define MASTER_USB3_0 5 +#define SLAVE_A1NOC_SNOC 6 + +#define MASTER_QDSS_BAM 0 +#define MASTER_QUP_2 1 +#define MASTER_CRYPTO 2 +#define MASTER_IPA 3 +#define MASTER_SOCCP_AGGR_NOC 4 +#define MASTER_SP 5 +#define MASTER_QDSS_ETR 6 +#define MASTER_QDSS_ETR_1 7 +#define MASTER_SDCC_2 8 +#define SLAVE_A2NOC_SNOC 9 + +#define MASTER_QUP_CORE_0 0 +#define MASTER_QUP_CORE_1 1 +#define MASTER_QUP_CORE_2 2 +#define SLAVE_QUP_CORE_0 3 +#define SLAVE_QUP_CORE_1 4 +#define SLAVE_QUP_CORE_2 5 + +#define MASTER_CNOC_CFG 0 +#define SLAVE_AHB2PHY_SOUTH 1 +#define SLAVE_AHB2PHY_NORTH 2 +#define SLAVE_CAMERA_CFG 3 +#define SLAVE_CLK_CTL 4 +#define SLAVE_CRYPTO_0_CFG 5 +#define SLAVE_DISPLAY_CFG 6 +#define SLAVE_EVA_CFG 7 +#define SLAVE_GFX3D_CFG 8 +#define SLAVE_I2C 9 +#define SLAVE_I3C_IBI0_CFG 10 +#define SLAVE_I3C_IBI1_CFG 11 +#define SLAVE_IMEM_CFG 12 +#define SLAVE_CNOC_MSS 13 +#define SLAVE_PCIE_CFG 14 +#define SLAVE_PRNG 15 +#define SLAVE_QDSS_CFG 16 +#define SLAVE_QSPI_0 17 +#define SLAVE_QUP_3 18 +#define SLAVE_QUP_1 19 +#define SLAVE_QUP_2 20 +#define SLAVE_SDCC_2 21 +#define SLAVE_SDCC_4 22 +#define SLAVE_SPSS_CFG 23 +#define SLAVE_TCSR 24 +#define SLAVE_TLMM 25 +#define SLAVE_UFS_MEM_CFG 26 +#define SLAVE_USB3_0 27 +#define SLAVE_VENUS_CFG 28 +#define SLAVE_VSENSE_CTRL_CFG 29 +#define SLAVE_CNOC_MNOC_CFG 30 +#define SLAVE_PCIE_ANOC_CFG 31 +#define SLAVE_QDSS_STM 32 +#define SLAVE_TCU 33 + +#define MASTER_GEM_NOC_CNOC 0 +#define MASTER_GEM_NOC_PCIE_SNOC 1 +#define SLAVE_AOSS 2 +#define SLAVE_IPA_CFG 3 +#define SLAVE_IPC_ROUTER_CFG 4 +#define SLAVE_SOCCP 5 +#define SLAVE_TME_CFG 6 +#define SLAVE_APPSS 7 +#define SLAVE_CNOC_CFG 8 +#define SLAVE_DDRSS_CFG 9 +#define SLAVE_BOOT_IMEM 10 +#define SLAVE_IMEM 11 +#define SLAVE_BOOT_IMEM_2 12 +#define SLAVE_SERVICE_CNOC 13 +#define SLAVE_PCIE_0 14 + +#define MASTER_GPU_TCU 0 +#define MASTER_SYS_TCU 1 +#define MASTER_APPSS_PROC 2 +#define MASTER_GFX3D 3 +#define MASTER_LPASS_GEM_NOC 4 +#define MASTER_MSS_PROC 5 +#define MASTER_MNOC_HF_MEM_NOC 6 +#define MASTER_MNOC_SF_MEM_NOC 7 +#define MASTER_COMPUTE_NOC 8 +#define MASTER_ANOC_PCIE_GEM_NOC 9 +#define MASTER_SNOC_SF_MEM_NOC 10 +#define MASTER_UBWC_P 11 +#define MASTER_GIC 12 +#define SLAVE_UBWC_P 13 +#define SLAVE_GEM_NOC_CNOC 14 +#define SLAVE_LLCC 15 +#define SLAVE_MEM_NOC_PCIE_SNOC 16 + +#define MASTER_LPIAON_NOC 0 +#define SLAVE_LPASS_GEM_NOC 1 + +#define MASTER_LPASS_LPINOC 0 +#define SLAVE_LPIAON_NOC_LPASS_AG_NOC 1 + +#define MASTER_LPASS_PROC 0 +#define SLAVE_LPICX_NOC_LPIAON_NOC 1 + +#define MASTER_LLCC 0 +#define SLAVE_EBI1 1 + +#define MASTER_CAMNOC_HF 0 +#define MASTER_CAMNOC_NRT_ICP_SF 1 +#define MASTER_CAMNOC_RT_CDM_SF 2 +#define MASTER_CAMNOC_SF 3 +#define MASTER_MDP 4 +#define MASTER_CDSP_HCP 5 +#define MASTER_VIDEO_CV_PROC 6 +#define MASTER_VIDEO_EVA 7 +#define MASTER_VIDEO_MVP 8 +#define MASTER_VIDEO_V_PROC 9 +#define MASTER_CNOC_MNOC_CFG 10 +#define SLAVE_MNOC_HF_MEM_NOC 11 +#define SLAVE_MNOC_SF_MEM_NOC 12 +#define SLAVE_SERVICE_MNOC 13 + +#define MASTER_CDSP_PROC 0 +#define SLAVE_CDSP_MEM_NOC 1 + +#define MASTER_PCIE_ANOC_CFG 0 +#define MASTER_PCIE_0 1 +#define SLAVE_ANOC_PCIE_GEM_NOC 2 +#define SLAVE_SERVICE_PCIE_ANOC 3 + +#define MASTER_A1NOC_SNOC 0 +#define MASTER_A2NOC_SNOC 1 +#define SLAVE_SNOC_GEM_NOC_SF 2 + +#endif -- cgit v1.2.3 From fa52c04daec9ff9820260901a8b1d271bb532d12 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 7 Nov 2024 12:47:05 +0100 Subject: mfd: core: Make platform_data pointer const in struct mfd_cell The content of the platform_data of a struct mfd_cell is simply passed on to the platform_device_add_data() call in mfd_add_device() . platform_device_add_data() already handles the data behind that pointer as const and also uses kmemdup to create a copy of the data before handing that copy over to the newly created platform-device, so there is no reason to not extend this to struct mfd_cell, as the old copy in the mfd_cell will be stale anyway. This allows to pass structs gathered from of_device_get_match_data() as platform-data to sub-devices - which is retrieved as const already. Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241107114712.538976-3-heiko@sntech.de Signed-off-by: Lee Jones --- include/linux/mfd/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index e8bcad641d8c..faeea7abd688 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -72,7 +72,7 @@ struct mfd_cell { int (*resume)(struct platform_device *dev); /* platform data passed to the sub devices drivers */ - void *platform_data; + const void *platform_data; size_t pdata_size; /* Matches ACPI */ -- cgit v1.2.3 From 998f70d1806bb718a7565f350283e4a79c8cbb4b Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Thu, 7 Nov 2024 12:47:07 +0100 Subject: mfd: Add base driver for qnap-mcu devices These microcontroller units are used in network-attached-storage devices made by QNAP and provide additional functionality to the system. This adds the base driver that implements the serial protocol via serdev and additionally hooks into the poweroff handlers to turn off the parts of the system not supplied by the general PMIC. Turning off (at least the TSx33 devices using Rockchip SoCs) consists of two separate actions. Turning off the MCU alone does not turn off the main SoC and turning off only the SoC/PMIC does not turn off the hard-drives. Also if the MCU is not turned off, the system also won't start again until it is unplugged from power. So on shutdown the MCU needs to be turned off separately before the main PMIC. The protocol spoken by the MCU is sadly not documented, but was obtained by listening to the chatter on the serial port, as thankfully the "hal_app" program from QNAPs firmware allows triggering all/most MCU actions from the command line. The implementation of how to talk to the serial device got some inspiration from the rave-sp servdev driver. Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20241107114712.538976-5-heiko@sntech.de Signed-off-by: Lee Jones --- MAINTAINERS | 6 + drivers/mfd/Kconfig | 13 ++ drivers/mfd/Makefile | 2 + drivers/mfd/qnap-mcu.c | 338 +++++++++++++++++++++++++++++++++++++++++++ include/linux/mfd/qnap-mcu.h | 26 ++++ 5 files changed, 385 insertions(+) create mode 100644 drivers/mfd/qnap-mcu.c create mode 100644 include/linux/mfd/qnap-mcu.h (limited to 'include') diff --git a/MAINTAINERS b/MAINTAINERS index 1e930c7a58b1..ccb9e7badb19 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -19105,6 +19105,12 @@ L: linux-media@vger.kernel.org S: Odd Fixes F: drivers/media/tuners/qm1d1c0042* +QNAP MCU DRIVER +M: Heiko Stuebner +S: Maintained +F: drivers/mfd/qnap-mcu.c +F: include/linux/qnap-mcu.h + QNX4 FILESYSTEM M: Anders Larsen S: Maintained diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index ae23b317a64e..74f4de8cd6f1 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -2386,6 +2386,19 @@ config MFD_INTEL_M10_BMC_PMCI additional drivers must be enabled in order to use the functionality of the device. +config MFD_QNAP_MCU + tristate "QNAP microcontroller unit core driver" + depends on SERIAL_DEV_BUS + select MFD_CORE + help + Select this to get support for the QNAP MCU device found in + several devices of QNAP network attached storage products that + implements additional functionality for the device, like fan + and LED control. + + This driver implements the base serial protocol to talk to the + device and provides functions for the other parts to hook into. + config MFD_RSMU_I2C tristate "Renesas Synchronization Management Unit with I2C" depends on I2C && OF diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index e057d6d6faef..b2d540934179 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -288,5 +288,7 @@ obj-$(CONFIG_MFD_INTEL_M10_BMC_PMCI) += intel-m10-bmc-pmci.o obj-$(CONFIG_MFD_ATC260X) += atc260x-core.o obj-$(CONFIG_MFD_ATC260X_I2C) += atc260x-i2c.o +obj-$(CONFIG_MFD_QNAP_MCU) += qnap-mcu.o + obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o rsmu_core.o obj-$(CONFIG_MFD_RSMU_SPI) += rsmu_spi.o rsmu_core.o diff --git a/drivers/mfd/qnap-mcu.c b/drivers/mfd/qnap-mcu.c new file mode 100644 index 000000000000..4be39d8b2905 --- /dev/null +++ b/drivers/mfd/qnap-mcu.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Core driver for the microcontroller unit in QNAP NAS devices that is + * connected via a dedicated UART port. + * + * Copyright (C) 2024 Heiko Stuebner + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* The longest command found so far is 5 bytes long */ +#define QNAP_MCU_MAX_CMD_SIZE 5 +#define QNAP_MCU_MAX_DATA_SIZE 36 +#define QNAP_MCU_CHECKSUM_SIZE 1 + +#define QNAP_MCU_RX_BUFFER_SIZE \ + (QNAP_MCU_MAX_DATA_SIZE + QNAP_MCU_CHECKSUM_SIZE) + +#define QNAP_MCU_TX_BUFFER_SIZE \ + (QNAP_MCU_MAX_CMD_SIZE + QNAP_MCU_CHECKSUM_SIZE) + +#define QNAP_MCU_ACK_LEN 2 +#define QNAP_MCU_VERSION_LEN 4 + +#define QNAP_MCU_TIMEOUT_MS 500 + +/** + * struct qnap_mcu_reply - Reply to a command + * + * @data: Buffer to store reply payload in + * @length: Expected reply length, including the checksum + * @received: Received number of bytes, so far + * @done: Triggered when the entire reply has been received + */ +struct qnap_mcu_reply { + u8 *data; + size_t length; + size_t received; + struct completion done; +}; + +/** + * struct qnap_mcu - QNAP NAS embedded controller + * + * @serdev: Pointer to underlying serdev + * @bus_lock: Lock to serialize access to the device + * @reply: Reply data structure + * @variant: Device variant specific information + * @version: MCU firmware version + */ +struct qnap_mcu { + struct serdev_device *serdev; + struct mutex bus_lock; + struct qnap_mcu_reply reply; + const struct qnap_mcu_variant *variant; + u8 version[QNAP_MCU_VERSION_LEN]; +}; + +/* + * The QNAP-MCU uses a basic XOR checksum. + * It is always the last byte and XORs the whole previous message. + */ +static u8 qnap_mcu_csum(const u8 *buf, size_t size) +{ + u8 csum = 0; + + while (size--) + csum ^= *buf++; + + return csum; +} + +static int qnap_mcu_write(struct qnap_mcu *mcu, const u8 *data, u8 data_size) +{ + unsigned char tx[QNAP_MCU_TX_BUFFER_SIZE]; + size_t length = data_size + QNAP_MCU_CHECKSUM_SIZE; + + if (length > sizeof(tx)) { + dev_err(&mcu->serdev->dev, "data too big for transmit buffer"); + return -EINVAL; + } + + memcpy(tx, data, data_size); + tx[data_size] = qnap_mcu_csum(data, data_size); + + serdev_device_write_flush(mcu->serdev); + + return serdev_device_write(mcu->serdev, tx, length, HZ); +} + +static size_t qnap_mcu_receive_buf(struct serdev_device *serdev, const u8 *buf, size_t size) +{ + struct device *dev = &serdev->dev; + struct qnap_mcu *mcu = dev_get_drvdata(dev); + struct qnap_mcu_reply *reply = &mcu->reply; + const u8 *src = buf; + const u8 *end = buf + size; + + if (!reply->length) { + dev_warn(dev, "Received %zu bytes, we were not waiting for\n", size); + return size; + } + + while (src < end) { + reply->data[reply->received] = *src++; + reply->received++; + + if (reply->received == reply->length) { + /* We don't expect any characters from the device now */ + reply->length = 0; + + complete(&reply->done); + + /* + * We report the consumed number of bytes. If there + * are still bytes remaining (though there shouldn't) + * the serdev layer will re-execute this handler with + * the remainder of the Rx bytes. + */ + return src - buf; + } + } + + /* + * The only way to get out of the above loop and end up here + * is through consuming all of the supplied data, so here we + * report that we processed it all. + */ + return size; +} + +static const struct serdev_device_ops qnap_mcu_serdev_device_ops = { + .receive_buf = qnap_mcu_receive_buf, + .write_wakeup = serdev_device_write_wakeup, +}; + +int qnap_mcu_exec(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size, + u8 *reply_data, size_t reply_data_size) +{ + unsigned char rx[QNAP_MCU_RX_BUFFER_SIZE]; + size_t length = reply_data_size + QNAP_MCU_CHECKSUM_SIZE; + struct qnap_mcu_reply *reply = &mcu->reply; + int ret = 0; + + if (length > sizeof(rx)) { + dev_err(&mcu->serdev->dev, "expected data too big for receive buffer"); + return -EINVAL; + } + + mutex_lock(&mcu->bus_lock); + + reply->data = rx, + reply->length = length, + reply->received = 0, + reinit_completion(&reply->done); + + qnap_mcu_write(mcu, cmd_data, cmd_data_size); + + serdev_device_wait_until_sent(mcu->serdev, msecs_to_jiffies(QNAP_MCU_TIMEOUT_MS)); + + if (!wait_for_completion_timeout(&reply->done, msecs_to_jiffies(QNAP_MCU_TIMEOUT_MS))) { + dev_err(&mcu->serdev->dev, "Command timeout\n"); + ret = -ETIMEDOUT; + } else { + u8 crc = qnap_mcu_csum(rx, reply_data_size); + + if (crc != rx[reply_data_size]) { + dev_err(&mcu->serdev->dev, + "Invalid Checksum received\n"); + ret = -EIO; + } else { + memcpy(reply_data, rx, reply_data_size); + } + } + + mutex_unlock(&mcu->bus_lock); + return ret; +} +EXPORT_SYMBOL_GPL(qnap_mcu_exec); + +int qnap_mcu_exec_with_ack(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size) +{ + u8 ack[QNAP_MCU_ACK_LEN]; + int ret; + + ret = qnap_mcu_exec(mcu, cmd_data, cmd_data_size, ack, sizeof(ack)); + if (ret) + return ret; + + /* Should return @0 */ + if (ack[0] != '@' || ack[1] != '0') { + dev_err(&mcu->serdev->dev, "Did not receive ack\n"); + return -EIO; + } + + return 0; +} +EXPORT_SYMBOL_GPL(qnap_mcu_exec_with_ack); + +static int qnap_mcu_get_version(struct qnap_mcu *mcu) +{ + const u8 cmd[] = { '%', 'V' }; + u8 rx[14]; + int ret; + + /* Reply is the 2 command-bytes + 4 bytes describing the version */ + ret = qnap_mcu_exec(mcu, cmd, sizeof(cmd), rx, QNAP_MCU_VERSION_LEN + 2); + if (ret) + return ret; + + memcpy(mcu->version, &rx[2], QNAP_MCU_VERSION_LEN); + + return 0; +} + +/* + * The MCU controls power to the peripherals but not the CPU. + * + * So using the PMIC to power off the system keeps the MCU and hard-drives + * running. This also then prevents the system from turning back on until + * the MCU is turned off by unplugging the power cable. + * Turning off the MCU alone on the other hand turns off the hard drives, + * LEDs, etc while the main SoC stays running - including its network ports. + */ +static int qnap_mcu_power_off(struct sys_off_data *data) +{ + const u8 cmd[] = { '@', 'C', '0' }; + struct qnap_mcu *mcu = data->cb_data; + int ret; + + ret = qnap_mcu_exec_with_ack(mcu, cmd, sizeof(cmd)); + if (ret) { + dev_err(&mcu->serdev->dev, "MCU poweroff failed %d\n", ret); + return NOTIFY_STOP; + } + + return NOTIFY_DONE; +} + +static const struct qnap_mcu_variant qnap_ts433_mcu = { + .baud_rate = 115200, + .num_drives = 4, + .fan_pwm_min = 51, /* Specified in original model.conf */ + .fan_pwm_max = 255, + .usb_led = true, +}; + +static struct mfd_cell qnap_mcu_cells[] = { + { .name = "qnap-mcu-input", }, + { .name = "qnap-mcu-leds", }, + { .name = "qnap-mcu-hwmon", } +}; + +static int qnap_mcu_probe(struct serdev_device *serdev) +{ + struct device *dev = &serdev->dev; + struct qnap_mcu *mcu; + int ret; + + mcu = devm_kzalloc(dev, sizeof(*mcu), GFP_KERNEL); + if (!mcu) + return -ENOMEM; + + mcu->serdev = serdev; + dev_set_drvdata(dev, mcu); + + mcu->variant = of_device_get_match_data(dev); + if (!mcu->variant) + return -ENODEV; + + mutex_init(&mcu->bus_lock); + init_completion(&mcu->reply.done); + + serdev_device_set_client_ops(serdev, &qnap_mcu_serdev_device_ops); + ret = devm_serdev_device_open(dev, serdev); + if (ret) + return ret; + + serdev_device_set_baudrate(serdev, mcu->variant->baud_rate); + serdev_device_set_flow_control(serdev, false); + + ret = serdev_device_set_parity(serdev, SERDEV_PARITY_NONE); + if (ret) + return dev_err_probe(dev, ret, "Failed to set parity\n"); + + ret = qnap_mcu_get_version(mcu); + if (ret) + return ret; + + ret = devm_register_sys_off_handler(dev, + SYS_OFF_MODE_POWER_OFF_PREPARE, + SYS_OFF_PRIO_DEFAULT, + &qnap_mcu_power_off, mcu); + if (ret) + return dev_err_probe(dev, ret, + "Failed to register poweroff handler\n"); + + for (int i = 0; i < ARRAY_SIZE(qnap_mcu_cells); i++) { + qnap_mcu_cells[i].platform_data = mcu->variant; + qnap_mcu_cells[i].pdata_size = sizeof(*mcu->variant); + } + + ret = devm_mfd_add_devices(dev, PLATFORM_DEVID_AUTO, qnap_mcu_cells, + ARRAY_SIZE(qnap_mcu_cells), NULL, 0, NULL); + if (ret) + return dev_err_probe(dev, ret, "Failed to add child devices\n"); + + return 0; +} + +static const struct of_device_id qnap_mcu_dt_ids[] = { + { .compatible = "qnap,ts433-mcu", .data = &qnap_ts433_mcu }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, qnap_mcu_dt_ids); + +static struct serdev_device_driver qnap_mcu_drv = { + .probe = qnap_mcu_probe, + .driver = { + .name = "qnap-mcu", + .of_match_table = qnap_mcu_dt_ids, + }, +}; +module_serdev_device_driver(qnap_mcu_drv); + +MODULE_AUTHOR("Heiko Stuebner "); +MODULE_DESCRIPTION("QNAP MCU core driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/qnap-mcu.h b/include/linux/mfd/qnap-mcu.h new file mode 100644 index 000000000000..8d48c212fd44 --- /dev/null +++ b/include/linux/mfd/qnap-mcu.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Core definitions for QNAP MCU MFD driver. + * Copyright (C) 2024 Heiko Stuebner + */ + +#ifndef _LINUX_QNAP_MCU_H_ +#define _LINUX_QNAP_MCU_H_ + +struct qnap_mcu; + +struct qnap_mcu_variant { + u32 baud_rate; + int num_drives; + int fan_pwm_min; + int fan_pwm_max; + bool usb_led; +}; + +int qnap_mcu_exec(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size, + u8 *reply_data, size_t reply_data_size); +int qnap_mcu_exec_with_ack(struct qnap_mcu *mcu, + const u8 *cmd_data, size_t cmd_data_size); + +#endif /* _LINUX_QNAP_MCU_H_ */ -- cgit v1.2.3 From c925bb8853dae5cb25e7108298e905b55301bbff Mon Sep 17 00:00:00 2001 From: Marcus Folkesson Date: Tue, 10 Dec 2024 16:24:40 +0100 Subject: mfd: da9052: Store result from fault_log Other sub-components (da9052-wdt) could use the result to determine reboot cause. Expose the result by make it part of the da9052 structure. Signed-off-by: Marcus Folkesson Link: https://lore.kernel.org/r/20241210-da9052-wdt-v2-1-95a5756e9ac8@gmail.com Signed-off-by: Lee Jones --- drivers/mfd/da9052-core.c | 1 + include/linux/mfd/da9052/da9052.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c index dc85801b9fa0..b06cd518413b 100644 --- a/drivers/mfd/da9052-core.c +++ b/drivers/mfd/da9052-core.c @@ -585,6 +585,7 @@ static int da9052_clear_fault_log(struct da9052 *da9052) "Cannot reset FAULT_LOG values %d\n", ret); } + da9052->fault_log = fault_log; return ret; } diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index 76feb3a7066d..9cb2fc2938ce 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -93,6 +93,8 @@ struct da9052 { int chip_irq; + int fault_log; + /* SOC I/O transfer related fixes for DA9052/53 */ int (*fix_io) (struct da9052 *da9052, unsigned char reg); }; -- cgit v1.2.3 From c2b148f3bc94b61e885dc8529d6b6136576bd865 Mon Sep 17 00:00:00 2001 From: Thomas Richard Date: Wed, 11 Dec 2024 17:27:16 +0100 Subject: mfd: Add support for AAEON UP board FPGA The UP boards implement some features (pin controller, LEDs) through an on-board FPGA. This MFD driver implements the line protocol to communicate with the FPGA through regmap, and registers pin controller and led cells. This commit adds support for UP and UP Squared boards. Based on the work done by Gary Wang . Signed-off-by: Thomas Richard Link: https://lore.kernel.org/r/20241211-aaeon-up-board-pinctrl-support-v1-1-24719be27631@bootlin.com Signed-off-by: Lee Jones --- drivers/mfd/Kconfig | 12 ++ drivers/mfd/Makefile | 2 + drivers/mfd/upboard-fpga.c | 325 +++++++++++++++++++++++++++++++++++++++ include/linux/mfd/upboard-fpga.h | 55 +++++++ 4 files changed, 394 insertions(+) create mode 100644 drivers/mfd/upboard-fpga.c create mode 100644 include/linux/mfd/upboard-fpga.h (limited to 'include') diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index 74f4de8cd6f1..6b0682af6e32 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -2427,5 +2427,17 @@ config MFD_RSMU_SPI Additional drivers must be enabled in order to use the functionality of the device. +config MFD_UPBOARD_FPGA + tristate "Support for the AAeon UP board FPGA" + depends on (X86 && ACPI) + select MFD_CORE + help + Select this option to enable the AAEON UP and UP^2 onboard FPGA. + This is the core driver of this FPGA, which has a pin controller and a + LED controller. + + To compile this driver as a module, choose M here: the module will be + called upboard-fpga. + endmenu endif diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile index b2d540934179..9220eaf7cf12 100644 --- a/drivers/mfd/Makefile +++ b/drivers/mfd/Makefile @@ -292,3 +292,5 @@ obj-$(CONFIG_MFD_QNAP_MCU) += qnap-mcu.o obj-$(CONFIG_MFD_RSMU_I2C) += rsmu_i2c.o rsmu_core.o obj-$(CONFIG_MFD_RSMU_SPI) += rsmu_spi.o rsmu_core.o + +obj-$(CONFIG_MFD_UPBOARD_FPGA) += upboard-fpga.o diff --git a/drivers/mfd/upboard-fpga.c b/drivers/mfd/upboard-fpga.c new file mode 100644 index 000000000000..5a330e2f2229 --- /dev/null +++ b/drivers/mfd/upboard-fpga.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * UP Board FPGA driver. + * + * FPGA provides more GPIO driving power, LEDS and pin mux function. + * + * Copyright (c) AAEON. All rights reserved. + * Copyright (C) 2024 Bootlin + * + * Author: Gary Wang + * Author: Thomas Richard + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UPBOARD_AAEON_MANUFACTURER_ID 0x01 +#define UPBOARD_MANUFACTURER_ID_MASK GENMASK(7, 0) + +#define UPBOARD_ADDRESS_SIZE 7 +#define UPBOARD_REGISTER_SIZE 16 + +#define UPBOARD_READ_FLAG BIT(UPBOARD_ADDRESS_SIZE) + +#define UPBOARD_FW_ID_MAJOR_SUPPORTED 0x0 + +#define UPBOARD_FW_ID_BUILD_MASK GENMASK(15, 12) +#define UPBOARD_FW_ID_MAJOR_MASK GENMASK(11, 8) +#define UPBOARD_FW_ID_MINOR_MASK GENMASK(7, 4) +#define UPBOARD_FW_ID_PATCH_MASK GENMASK(3, 0) + +static int upboard_fpga_read(void *context, unsigned int reg, unsigned int *val) +{ + struct upboard_fpga *fpga = context; + int i; + + /* Clear to start new transaction */ + gpiod_set_value(fpga->clear_gpio, 0); + gpiod_set_value(fpga->clear_gpio, 1); + + reg |= UPBOARD_READ_FLAG; + + /* Send clock and addr from strobe & datain pins */ + for (i = UPBOARD_ADDRESS_SIZE; i >= 0; i--) { + gpiod_set_value(fpga->strobe_gpio, 0); + gpiod_set_value(fpga->datain_gpio, !!(reg & BIT(i))); + gpiod_set_value(fpga->strobe_gpio, 1); + } + + gpiod_set_value(fpga->strobe_gpio, 0); + *val = 0; + + /* Read data from dataout pin */ + for (i = UPBOARD_REGISTER_SIZE - 1; i >= 0; i--) { + gpiod_set_value(fpga->strobe_gpio, 1); + gpiod_set_value(fpga->strobe_gpio, 0); + *val |= gpiod_get_value(fpga->dataout_gpio) << i; + } + + gpiod_set_value(fpga->strobe_gpio, 1); + + return 0; +} + +static int upboard_fpga_write(void *context, unsigned int reg, unsigned int val) +{ + struct upboard_fpga *fpga = context; + int i; + + /* Clear to start new transcation */ + gpiod_set_value(fpga->clear_gpio, 0); + gpiod_set_value(fpga->clear_gpio, 1); + + /* Send clock and addr from strobe & datain pins */ + for (i = UPBOARD_ADDRESS_SIZE; i >= 0; i--) { + gpiod_set_value(fpga->strobe_gpio, 0); + gpiod_set_value(fpga->datain_gpio, !!(reg & BIT(i))); + gpiod_set_value(fpga->strobe_gpio, 1); + } + + gpiod_set_value(fpga->strobe_gpio, 0); + + /* Write data to datain pin */ + for (i = UPBOARD_REGISTER_SIZE - 1; i >= 0; i--) { + gpiod_set_value(fpga->datain_gpio, !!(val & BIT(i))); + gpiod_set_value(fpga->strobe_gpio, 1); + gpiod_set_value(fpga->strobe_gpio, 0); + } + + gpiod_set_value(fpga->strobe_gpio, 1); + + return 0; +} + +static const struct regmap_range upboard_up_readable_ranges[] = { + regmap_reg_range(UPBOARD_REG_PLATFORM_ID, UPBOARD_REG_FIRMWARE_ID), + regmap_reg_range(UPBOARD_REG_FUNC_EN0, UPBOARD_REG_FUNC_EN0), + regmap_reg_range(UPBOARD_REG_GPIO_EN0, UPBOARD_REG_GPIO_EN1), + regmap_reg_range(UPBOARD_REG_GPIO_DIR0, UPBOARD_REG_GPIO_DIR1), +}; + +static const struct regmap_range upboard_up_writable_ranges[] = { + regmap_reg_range(UPBOARD_REG_FUNC_EN0, UPBOARD_REG_FUNC_EN0), + regmap_reg_range(UPBOARD_REG_GPIO_EN0, UPBOARD_REG_GPIO_EN1), + regmap_reg_range(UPBOARD_REG_GPIO_DIR0, UPBOARD_REG_GPIO_DIR1), +}; + +static const struct regmap_access_table upboard_up_readable_table = { + .yes_ranges = upboard_up_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(upboard_up_readable_ranges), +}; + +static const struct regmap_access_table upboard_up_writable_table = { + .yes_ranges = upboard_up_writable_ranges, + .n_yes_ranges = ARRAY_SIZE(upboard_up_writable_ranges), +}; + +static const struct regmap_config upboard_up_regmap_config = { + .reg_bits = UPBOARD_ADDRESS_SIZE, + .val_bits = UPBOARD_REGISTER_SIZE, + .max_register = UPBOARD_REG_MAX, + .reg_read = upboard_fpga_read, + .reg_write = upboard_fpga_write, + .fast_io = false, + .cache_type = REGCACHE_NONE, + .rd_table = &upboard_up_readable_table, + .wr_table = &upboard_up_writable_table, +}; + +static const struct regmap_range upboard_up2_readable_ranges[] = { + regmap_reg_range(UPBOARD_REG_PLATFORM_ID, UPBOARD_REG_FIRMWARE_ID), + regmap_reg_range(UPBOARD_REG_FUNC_EN0, UPBOARD_REG_FUNC_EN1), + regmap_reg_range(UPBOARD_REG_GPIO_EN0, UPBOARD_REG_GPIO_EN2), + regmap_reg_range(UPBOARD_REG_GPIO_DIR0, UPBOARD_REG_GPIO_DIR2), +}; + +static const struct regmap_range upboard_up2_writable_ranges[] = { + regmap_reg_range(UPBOARD_REG_FUNC_EN0, UPBOARD_REG_FUNC_EN1), + regmap_reg_range(UPBOARD_REG_GPIO_EN0, UPBOARD_REG_GPIO_EN2), + regmap_reg_range(UPBOARD_REG_GPIO_DIR0, UPBOARD_REG_GPIO_DIR2), +}; + +static const struct regmap_access_table upboard_up2_readable_table = { + .yes_ranges = upboard_up2_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(upboard_up2_readable_ranges), +}; + +static const struct regmap_access_table upboard_up2_writable_table = { + .yes_ranges = upboard_up2_writable_ranges, + .n_yes_ranges = ARRAY_SIZE(upboard_up2_writable_ranges), +}; + +static const struct regmap_config upboard_up2_regmap_config = { + .reg_bits = UPBOARD_ADDRESS_SIZE, + .val_bits = UPBOARD_REGISTER_SIZE, + .max_register = UPBOARD_REG_MAX, + .reg_read = upboard_fpga_read, + .reg_write = upboard_fpga_write, + .fast_io = false, + .cache_type = REGCACHE_NONE, + .rd_table = &upboard_up2_readable_table, + .wr_table = &upboard_up2_writable_table, +}; + +static const struct mfd_cell upboard_up_mfd_cells[] = { + { .name = "upboard-pinctrl" }, + { .name = "upboard-leds" }, +}; + +static const struct upboard_fpga_data upboard_up_fpga_data = { + .type = UPBOARD_UP_FPGA, + .regmap_config = &upboard_up_regmap_config, +}; + +static const struct upboard_fpga_data upboard_up2_fpga_data = { + .type = UPBOARD_UP2_FPGA, + .regmap_config = &upboard_up2_regmap_config, +}; + +static int upboard_fpga_gpio_init(struct upboard_fpga *fpga) +{ + fpga->enable_gpio = devm_gpiod_get(fpga->dev, "enable", GPIOD_ASIS); + if (IS_ERR(fpga->enable_gpio)) + return PTR_ERR(fpga->enable_gpio); + + fpga->clear_gpio = devm_gpiod_get(fpga->dev, "clear", GPIOD_OUT_LOW); + if (IS_ERR(fpga->clear_gpio)) + return PTR_ERR(fpga->clear_gpio); + + fpga->strobe_gpio = devm_gpiod_get(fpga->dev, "strobe", GPIOD_OUT_LOW); + if (IS_ERR(fpga->strobe_gpio)) + return PTR_ERR(fpga->strobe_gpio); + + fpga->datain_gpio = devm_gpiod_get(fpga->dev, "datain", GPIOD_OUT_LOW); + if (IS_ERR(fpga->datain_gpio)) + return PTR_ERR(fpga->datain_gpio); + + fpga->dataout_gpio = devm_gpiod_get(fpga->dev, "dataout", GPIOD_IN); + if (IS_ERR(fpga->dataout_gpio)) + return PTR_ERR(fpga->dataout_gpio); + + gpiod_set_value(fpga->enable_gpio, 1); + + return 0; +} + +static int upboard_fpga_get_firmware_version(struct upboard_fpga *fpga) +{ + unsigned int platform_id, manufacturer_id; + int ret; + + if (!fpga) + return -ENOMEM; + + ret = regmap_read(fpga->regmap, UPBOARD_REG_PLATFORM_ID, &platform_id); + if (ret) + return ret; + + manufacturer_id = platform_id & UPBOARD_MANUFACTURER_ID_MASK; + if (manufacturer_id != UPBOARD_AAEON_MANUFACTURER_ID) + return dev_err_probe(fpga->dev, -ENODEV, + "driver not compatible with custom FPGA FW from manufacturer id %#02x.", + manufacturer_id); + + ret = regmap_read(fpga->regmap, UPBOARD_REG_FIRMWARE_ID, &fpga->firmware_version); + if (ret) + return ret; + + if (FIELD_GET(UPBOARD_FW_ID_MAJOR_MASK, fpga->firmware_version) != + UPBOARD_FW_ID_MAJOR_SUPPORTED) + return dev_err_probe(fpga->dev, -ENODEV, + "unsupported FPGA FW v%lu.%lu.%lu build %#02lx", + FIELD_GET(UPBOARD_FW_ID_MAJOR_MASK, fpga->firmware_version), + FIELD_GET(UPBOARD_FW_ID_MINOR_MASK, fpga->firmware_version), + FIELD_GET(UPBOARD_FW_ID_PATCH_MASK, fpga->firmware_version), + FIELD_GET(UPBOARD_FW_ID_BUILD_MASK, fpga->firmware_version)); + return 0; +} + +static ssize_t upboard_fpga_version_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct upboard_fpga *fpga = dev_get_drvdata(dev); + + return sysfs_emit(buf, "FPGA FW v%lu.%lu.%lu build %#02lx\n", + FIELD_GET(UPBOARD_FW_ID_MAJOR_MASK, fpga->firmware_version), + FIELD_GET(UPBOARD_FW_ID_MINOR_MASK, fpga->firmware_version), + FIELD_GET(UPBOARD_FW_ID_PATCH_MASK, fpga->firmware_version), + FIELD_GET(UPBOARD_FW_ID_BUILD_MASK, fpga->firmware_version)); +} + +static DEVICE_ATTR_RO(upboard_fpga_version); + +static struct attribute *upboard_fpga_attrs[] = { + &dev_attr_upboard_fpga_version.attr, + NULL +}; + +ATTRIBUTE_GROUPS(upboard_fpga); + +static int upboard_fpga_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct upboard_fpga *fpga; + int ret; + + fpga = devm_kzalloc(dev, sizeof(*fpga), GFP_KERNEL); + if (!fpga) + return -ENOMEM; + + fpga->fpga_data = device_get_match_data(dev); + + fpga->dev = dev; + + platform_set_drvdata(pdev, fpga); + + fpga->regmap = devm_regmap_init(dev, NULL, fpga, fpga->fpga_data->regmap_config); + if (IS_ERR(fpga->regmap)) + return PTR_ERR(fpga->regmap); + + ret = upboard_fpga_gpio_init(fpga); + if (ret) + return dev_err_probe(dev, ret, "Failed to initialize FPGA common GPIOs"); + + ret = upboard_fpga_get_firmware_version(fpga); + if (ret) + return ret; + + return devm_mfd_add_devices(dev, PLATFORM_DEVID_NONE, upboard_up_mfd_cells, + ARRAY_SIZE(upboard_up_mfd_cells), NULL, 0, NULL); +} + +static const struct acpi_device_id upboard_fpga_acpi_match[] = { + { "AANT0F01", (kernel_ulong_t)&upboard_up2_fpga_data }, + { "AANT0F04", (kernel_ulong_t)&upboard_up_fpga_data }, + {} +}; +MODULE_DEVICE_TABLE(acpi, upboard_fpga_acpi_match); + +static struct platform_driver upboard_fpga_driver = { + .driver = { + .name = "upboard-fpga", + .acpi_match_table = ACPI_PTR(upboard_fpga_acpi_match), + .dev_groups = upboard_fpga_groups, + }, + .probe = upboard_fpga_probe, +}; + +module_platform_driver(upboard_fpga_driver); + +MODULE_AUTHOR("Gary Wang "); +MODULE_AUTHOR("Thomas Richard "); +MODULE_DESCRIPTION("UP Board FPGA driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/mfd/upboard-fpga.h b/include/linux/mfd/upboard-fpga.h new file mode 100644 index 000000000000..12231e40f5da --- /dev/null +++ b/include/linux/mfd/upboard-fpga.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * UP Board CPLD/FPGA driver + * + * Copyright (c) AAEON. All rights reserved. + * Copyright (C) 2024 Bootlin + * + * Author: Gary Wang + * Author: Thomas Richard + * + */ + +#ifndef __LINUX_MFD_UPBOARD_FPGA_H +#define __LINUX_MFD_UPBOARD_FPGA_H + +#define UPBOARD_REGISTER_SIZE 16 + +enum upboard_fpgareg { + UPBOARD_REG_PLATFORM_ID = 0x10, + UPBOARD_REG_FIRMWARE_ID = 0x11, + UPBOARD_REG_FUNC_EN0 = 0x20, + UPBOARD_REG_FUNC_EN1 = 0x21, + UPBOARD_REG_GPIO_EN0 = 0x30, + UPBOARD_REG_GPIO_EN1 = 0x31, + UPBOARD_REG_GPIO_EN2 = 0x32, + UPBOARD_REG_GPIO_DIR0 = 0x40, + UPBOARD_REG_GPIO_DIR1 = 0x41, + UPBOARD_REG_GPIO_DIR2 = 0x42, + UPBOARD_REG_MAX, +}; + +enum upboard_fpga_type { + UPBOARD_UP_FPGA, + UPBOARD_UP2_FPGA, +}; + +struct upboard_fpga_data { + enum upboard_fpga_type type; + const struct regmap_config *regmap_config; +}; + +struct upboard_fpga { + struct device *dev; + struct regmap *regmap; + struct gpio_desc *enable_gpio; + struct gpio_desc *reset_gpio; + struct gpio_desc *clear_gpio; + struct gpio_desc *strobe_gpio; + struct gpio_desc *datain_gpio; + struct gpio_desc *dataout_gpio; + unsigned int firmware_version; + const struct upboard_fpga_data *fpga_data; +}; + +#endif /* __LINUX_MFD_UPBOARD_FPGA_H */ -- cgit v1.2.3 From 1c896113f04e34d0036ef506532d2e6cf77dd1e5 Mon Sep 17 00:00:00 2001 From: Marek Behún Date: Sun, 15 Dec 2024 22:13:23 +0100 Subject: turris-omnia-mcu-interface.h: Move macro definitions outside of enums MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the definitions of enumerator related macros outside of the enumerator definitions. Suggested-by: Lee Jones Link: https://lore.kernel.org/linux-leds/20241212183357.GK7139@google.com/ Signed-off-by: Marek Behún Link: https://lore.kernel.org/r/20241215211323.23364-1-kabel@kernel.org Signed-off-by: Lee Jones --- include/linux/turris-omnia-mcu-interface.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/turris-omnia-mcu-interface.h b/include/linux/turris-omnia-mcu-interface.h index 06c94e032c6f..38b45ab00053 100644 --- a/include/linux/turris-omnia-mcu-interface.h +++ b/include/linux/turris-omnia-mcu-interface.h @@ -241,16 +241,18 @@ enum omnia_int_e { enum omnia_cmd_led_mode_e { OMNIA_CMD_LED_MODE_LED_MASK = GENMASK(3, 0), -#define OMNIA_CMD_LED_MODE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_MODE_LED_MASK, _l) OMNIA_CMD_LED_MODE_USER = BIT(4), }; +#define OMNIA_CMD_LED_MODE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_MODE_LED_MASK, _l) + enum omnia_cmd_led_state_e { OMNIA_CMD_LED_STATE_LED_MASK = GENMASK(3, 0), -#define OMNIA_CMD_LED_STATE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_STATE_LED_MASK, _l) OMNIA_CMD_LED_STATE_ON = BIT(4), }; +#define OMNIA_CMD_LED_STATE_LED(_l) FIELD_PREP(OMNIA_CMD_LED_STATE_LED_MASK, _l) + enum omnia_cmd_poweroff_e { OMNIA_CMD_POWER_OFF_POWERON_BUTTON = BIT(0), OMNIA_CMD_POWER_OFF_MAGIC = 0xdead, -- cgit v1.2.3 From 915d2f0718a42ee0b334be34cc53664a865a5928 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 Nov 2024 16:55:32 -0800 Subject: KVM: Move KVM_REG_SIZE() definition to common uAPI header Define KVM_REG_SIZE() in the common kvm.h header, and delete the arm64 and RISC-V versions. As evidenced by the surrounding definitions, all aspects of the register size encoding are generic, i.e. RISC-V should have moved arm64's definition to common code instead of copy+pasting. Acked-by: Anup Patel Reviewed-by: Andrew Jones Reviewed-by: Muhammad Usama Anjum Link: https://lore.kernel.org/r/20241128005547.4077116-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/arm64/include/uapi/asm/kvm.h | 3 --- arch/riscv/include/uapi/asm/kvm.h | 3 --- include/uapi/linux/kvm.h | 4 ++++ 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 66736ff04011..568bf858f319 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index 3482c9a73d1b..9f60d6185077 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -211,9 +211,6 @@ struct kvm_riscv_sbi_sta { #define KVM_RISCV_TIMER_STATE_OFF 0 #define KVM_RISCV_TIMER_STATE_ON 1 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - /* If you need to interpret the index values, here is the key: */ #define KVM_REG_RISCV_TYPE_MASK 0x00000000FF000000 #define KVM_REG_RISCV_TYPE_SHIFT 24 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 502ea63b5d2e..343de0a51797 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1070,6 +1070,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL -- cgit v1.2.3 From 346947223bacf96155f603528823a60b18b92d9a Mon Sep 17 00:00:00 2001 From: Przemek Kitszel Date: Mon, 16 Dec 2024 15:15:31 +0100 Subject: devlink: add devlink_fmsg_put() macro Add devlink_fmsg_put() that dispatches based on the type of the value to put, example: bool -> devlink_fmsg_bool_pair_put(). Reviewed-by: Wojciech Drewek Reviewed-by: Simon Horman Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- include/net/devlink.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index fbb9a2668e24..b5e1427ea4d7 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1261,6 +1261,17 @@ enum devlink_trap_group_generic_id { .min_burst = _min_burst, \ } +#define devlink_fmsg_put(fmsg, name, value) ( \ + _Generic((value), \ + bool : devlink_fmsg_bool_pair_put, \ + u8 : devlink_fmsg_u8_pair_put, \ + u16 : devlink_fmsg_u32_pair_put, \ + u32 : devlink_fmsg_u32_pair_put, \ + u64 : devlink_fmsg_u64_pair_put, \ + char * : devlink_fmsg_string_pair_put, \ + const char * : devlink_fmsg_string_pair_put) \ + (fmsg, name, (value))) + enum { /* device supports reload operations */ DEVLINK_F_RELOAD = 1UL << 0, -- cgit v1.2.3 From 3dbfde7f6bc7b8efff26e3e98fdd8cba20287da7 Mon Sep 17 00:00:00 2001 From: Mateusz Polchlopek Date: Mon, 16 Dec 2024 15:15:32 +0100 Subject: devlink: add devlink_fmsg_dump_skb() function Add devlink_fmsg_dump_skb() function that adds some diagnostic information about skb (like length, pkt type, MAC, etc) to devlink fmsg mechanism using bunch of devlink_fmsg_put() function calls. Signed-off-by: Mateusz Polchlopek Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Przemek Kitszel Signed-off-by: Tony Nguyen --- include/net/devlink.h | 2 ++ net/devlink/health.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) (limited to 'include') diff --git a/include/net/devlink.h b/include/net/devlink.h index b5e1427ea4d7..58e33959c852 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1268,6 +1268,7 @@ enum devlink_trap_group_generic_id { u16 : devlink_fmsg_u32_pair_put, \ u32 : devlink_fmsg_u32_pair_put, \ u64 : devlink_fmsg_u64_pair_put, \ + int : devlink_fmsg_u32_pair_put, \ char * : devlink_fmsg_string_pair_put, \ const char * : devlink_fmsg_string_pair_put) \ (fmsg, name, (value))) @@ -2005,6 +2006,7 @@ int devlink_compat_switch_id_get(struct net_device *dev, int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port); size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port); +void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb); #else diff --git a/net/devlink/health.c b/net/devlink/health.c index b8d3084e6fe0..57db6799722a 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -1238,3 +1238,70 @@ int devlink_nl_health_reporter_test_doit(struct sk_buff *skb, return reporter->ops->test(reporter, info->extack); } + +/** + * devlink_fmsg_dump_skb - Dump sk_buffer structure + * @fmsg: devlink formatted message pointer + * @skb: pointer to skb + * + * Dump diagnostic information about sk_buff structure, like headroom, length, + * tailroom, MAC, etc. + */ +void devlink_fmsg_dump_skb(struct devlink_fmsg *fmsg, const struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + struct sock *sk = skb->sk; + bool has_mac, has_trans; + + has_mac = skb_mac_header_was_set(skb); + has_trans = skb_transport_header_was_set(skb); + + devlink_fmsg_pair_nest_start(fmsg, "skb"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "actual len", skb->len); + devlink_fmsg_put(fmsg, "head len", skb_headlen(skb)); + devlink_fmsg_put(fmsg, "data len", skb->data_len); + devlink_fmsg_put(fmsg, "tail len", skb_tailroom(skb)); + devlink_fmsg_put(fmsg, "MAC", has_mac ? skb->mac_header : -1); + devlink_fmsg_put(fmsg, "MAC len", + has_mac ? skb_mac_header_len(skb) : -1); + devlink_fmsg_put(fmsg, "network hdr", skb->network_header); + devlink_fmsg_put(fmsg, "network hdr len", + has_trans ? skb_network_header_len(skb) : -1); + devlink_fmsg_put(fmsg, "transport hdr", + has_trans ? skb->transport_header : -1); + devlink_fmsg_put(fmsg, "csum", (__force u32)skb->csum); + devlink_fmsg_put(fmsg, "csum_ip_summed", (u8)skb->ip_summed); + devlink_fmsg_put(fmsg, "csum_complete_sw", !!skb->csum_complete_sw); + devlink_fmsg_put(fmsg, "csum_valid", !!skb->csum_valid); + devlink_fmsg_put(fmsg, "csum_level", (u8)skb->csum_level); + devlink_fmsg_put(fmsg, "sw_hash", !!skb->sw_hash); + devlink_fmsg_put(fmsg, "l4_hash", !!skb->l4_hash); + devlink_fmsg_put(fmsg, "proto", ntohs(skb->protocol)); + devlink_fmsg_put(fmsg, "pkt_type", (u8)skb->pkt_type); + devlink_fmsg_put(fmsg, "iif", skb->skb_iif); + + if (sk) { + devlink_fmsg_pair_nest_start(fmsg, "sk"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "family", sk->sk_type); + devlink_fmsg_put(fmsg, "type", sk->sk_type); + devlink_fmsg_put(fmsg, "proto", sk->sk_protocol); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + } + + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + + devlink_fmsg_pair_nest_start(fmsg, "shinfo"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_put(fmsg, "tx_flags", sh->tx_flags); + devlink_fmsg_put(fmsg, "nr_frags", sh->nr_frags); + devlink_fmsg_put(fmsg, "gso_size", sh->gso_size); + devlink_fmsg_put(fmsg, "gso_type", sh->gso_type); + devlink_fmsg_put(fmsg, "gso_segs", sh->gso_segs); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); +} +EXPORT_SYMBOL_GPL(devlink_fmsg_dump_skb); -- cgit v1.2.3 From 7c4b497fd4032935676b9024396f187fee005739 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Tue, 17 Dec 2024 18:41:54 +0100 Subject: clk: davinci: remove platform data struct There are no board files using struct davinci_pll_platform_data anymore. The structure itself is currently used to store a single pointer. Let's remove the struct definition, the header and rework the driver to not require the syscon regmap to be stored in probe(). Signed-off-by: Bartosz Golaszewski Link: https://lore.kernel.org/r/20241217174154.84441-1-brgl@bgdev.pl Reviewed-by: David Lechner Signed-off-by: Stephen Boyd --- drivers/clk/davinci/pll.c | 32 +++------------------------ include/linux/platform_data/clk-davinci-pll.h | 21 ------------------ 2 files changed, 3 insertions(+), 50 deletions(-) delete mode 100644 include/linux/platform_data/clk-davinci-pll.h (limited to 'include') diff --git a/drivers/clk/davinci/pll.c b/drivers/clk/davinci/pll.c index 5bbbb3a66477..82727b1fc67a 100644 --- a/drivers/clk/davinci/pll.c +++ b/drivers/clk/davinci/pll.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include @@ -840,27 +839,6 @@ int of_davinci_pll_init(struct device *dev, struct device_node *node, return 0; } -static struct davinci_pll_platform_data *davinci_pll_get_pdata(struct device *dev) -{ - struct davinci_pll_platform_data *pdata = dev_get_platdata(dev); - - /* - * Platform data is optional, so allocate a new struct if one was not - * provided. For device tree, this will always be the case. - */ - if (!pdata) - pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); - if (!pdata) - return NULL; - - /* for device tree, we need to fill in the struct */ - if (dev->of_node) - pdata->cfgchip = - syscon_regmap_lookup_by_compatible("ti,da830-cfgchip"); - - return pdata; -} - /* needed in early boot for clocksource/clockevent */ #ifdef CONFIG_ARCH_DAVINCI_DA850 CLK_OF_DECLARE(da850_pll0, "ti,da850-pll0", of_da850_pll0_init); @@ -890,8 +868,8 @@ typedef int (*davinci_pll_init)(struct device *dev, void __iomem *base, static int davinci_pll_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct davinci_pll_platform_data *pdata; davinci_pll_init pll_init = NULL; + struct regmap *cfgchip; void __iomem *base; pll_init = device_get_match_data(dev); @@ -903,17 +881,13 @@ static int davinci_pll_probe(struct platform_device *pdev) return -EINVAL; } - pdata = davinci_pll_get_pdata(dev); - if (!pdata) { - dev_err(dev, "missing platform data\n"); - return -EINVAL; - } + cfgchip = syscon_regmap_lookup_by_compatible("ti,da830-cfgchip"); base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(base)) return PTR_ERR(base); - return pll_init(dev, base, pdata->cfgchip); + return pll_init(dev, base, cfgchip); } static struct platform_driver davinci_pll_driver = { diff --git a/include/linux/platform_data/clk-davinci-pll.h b/include/linux/platform_data/clk-davinci-pll.h deleted file mode 100644 index e55dab1d578b..000000000000 --- a/include/linux/platform_data/clk-davinci-pll.h +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PLL clock driver for TI Davinci SoCs - * - * Copyright (C) 2018 David Lechner - */ - -#ifndef __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ -#define __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ - -#include - -/** - * davinci_pll_platform_data - * @cfgchip: CFGCHIP syscon regmap - */ -struct davinci_pll_platform_data { - struct regmap *cfgchip; -}; - -#endif /* __LINUX_PLATFORM_DATA_CLK_DAVINCI_PLL_H__ */ -- cgit v1.2.3 From d5af79c05e9382d38b8546dc5362381ce07ba3d1 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 9 Dec 2024 16:00:41 -0800 Subject: Documentation: move dev-tools debugging files to process/debugging/ Move gdb and kgdb debugging documentation to the dedicated debugging directory (Documentation/process/debugging/). Adjust the index.rst files to follow the file movement. Adjust files that refer to these moved files to follow the file movement. Update location of kgdb.rst in MAINTAINERS file. Add a link from dev-tools/index to process/debugging/index. Note: translations are not updated. Signed-off-by: Randy Dunlap Cc: Sebastian Fricke Cc: Jonathan Corbet Cc: workflows@vger.kernel.org Cc: Jason Wessel Cc: Daniel Thompson Cc: Douglas Anderson Cc: linux-debuggers@vger.kernel.org Cc: kgdb-bugreport@lists.sourceforge.net Cc: Doug Anderson Cc: Alex Shi Cc: Hu Haowen <2023002089@link.tyut.edu.cn> Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: linux-serial@vger.kernel.org Acked-by: Greg Kroah-Hartman Acked-by: Daniel Thompson Reviewed-by: Douglas Anderson Signed-off-by: Jonathan Corbet Link: https://lore.kernel.org/r/20241210000041.305477-1-rdunlap@infradead.org --- Documentation/admin-guide/README.rst | 4 +- Documentation/dev-tools/gdb-kernel-debugging.rst | 179 ---- Documentation/dev-tools/index.rst | 5 +- Documentation/dev-tools/kgdb.rst | 937 --------------------- .../process/debugging/gdb-kernel-debugging.rst | 179 ++++ Documentation/process/debugging/index.rst | 2 + Documentation/process/debugging/kgdb.rst | 937 +++++++++++++++++++++ MAINTAINERS | 2 +- include/linux/tty_driver.h | 2 +- lib/Kconfig.debug | 2 +- lib/Kconfig.kgdb | 2 +- 11 files changed, 1127 insertions(+), 1124 deletions(-) delete mode 100644 Documentation/dev-tools/gdb-kernel-debugging.rst delete mode 100644 Documentation/dev-tools/kgdb.rst create mode 100644 Documentation/process/debugging/gdb-kernel-debugging.rst create mode 100644 Documentation/process/debugging/kgdb.rst (limited to 'include') diff --git a/Documentation/admin-guide/README.rst b/Documentation/admin-guide/README.rst index f2bebff6a733..eb9452668909 100644 --- a/Documentation/admin-guide/README.rst +++ b/Documentation/admin-guide/README.rst @@ -356,5 +356,5 @@ instructions at 'Documentation/admin-guide/reporting-issues.rst'. Hints on understanding kernel bug reports are in 'Documentation/admin-guide/bug-hunting.rst'. More on debugging the kernel -with gdb is in 'Documentation/dev-tools/gdb-kernel-debugging.rst' and -'Documentation/dev-tools/kgdb.rst'. +with gdb is in 'Documentation/process/debugging/gdb-kernel-debugging.rst' and +'Documentation/process/debugging/kgdb.rst'. diff --git a/Documentation/dev-tools/gdb-kernel-debugging.rst b/Documentation/dev-tools/gdb-kernel-debugging.rst deleted file mode 100644 index 895285c037c7..000000000000 --- a/Documentation/dev-tools/gdb-kernel-debugging.rst +++ /dev/null @@ -1,179 +0,0 @@ -.. highlight:: none - -Debugging kernel and modules via gdb -==================================== - -The kernel debugger kgdb, hypervisors like QEMU or JTAG-based hardware -interfaces allow to debug the Linux kernel and its modules during runtime -using gdb. Gdb comes with a powerful scripting interface for python. The -kernel provides a collection of helper scripts that can simplify typical -kernel debugging steps. This is a short tutorial about how to enable and use -them. It focuses on QEMU/KVM virtual machines as target, but the examples can -be transferred to the other gdb stubs as well. - - -Requirements ------------- - -- gdb 7.2+ (recommended: 7.4+) with python support enabled (typically true - for distributions) - - -Setup ------ - -- Create a virtual Linux machine for QEMU/KVM (see www.linux-kvm.org and - www.qemu.org for more details). For cross-development, - https://landley.net/aboriginal/bin keeps a pool of machine images and - toolchains that can be helpful to start from. - -- Build the kernel with CONFIG_GDB_SCRIPTS enabled, but leave - CONFIG_DEBUG_INFO_REDUCED off. If your architecture supports - CONFIG_FRAME_POINTER, keep it enabled. - -- Install that kernel on the guest, turn off KASLR if necessary by adding - "nokaslr" to the kernel command line. - Alternatively, QEMU allows to boot the kernel directly using -kernel, - -append, -initrd command line switches. This is generally only useful if - you do not depend on modules. See QEMU documentation for more details on - this mode. In this case, you should build the kernel with - CONFIG_RANDOMIZE_BASE disabled if the architecture supports KASLR. - -- Build the gdb scripts (required on kernels v5.1 and above):: - - make scripts_gdb - -- Enable the gdb stub of QEMU/KVM, either - - - at VM startup time by appending "-s" to the QEMU command line - - or - - - during runtime by issuing "gdbserver" from the QEMU monitor - console - -- cd /path/to/linux-build - -- Start gdb: gdb vmlinux - - Note: Some distros may restrict auto-loading of gdb scripts to known safe - directories. In case gdb reports to refuse loading vmlinux-gdb.py, add:: - - add-auto-load-safe-path /path/to/linux-build - - to ~/.gdbinit. See gdb help for more details. - -- Attach to the booted guest:: - - (gdb) target remote :1234 - - -Examples of using the Linux-provided gdb helpers ------------------------------------------------- - -- Load module (and main kernel) symbols:: - - (gdb) lx-symbols - loading vmlinux - scanning for modules in /home/user/linux/build - loading @0xffffffffa0020000: /home/user/linux/build/net/netfilter/xt_tcpudp.ko - loading @0xffffffffa0016000: /home/user/linux/build/net/netfilter/xt_pkttype.ko - loading @0xffffffffa0002000: /home/user/linux/build/net/netfilter/xt_limit.ko - loading @0xffffffffa00ca000: /home/user/linux/build/net/packet/af_packet.ko - loading @0xffffffffa003c000: /home/user/linux/build/fs/fuse/fuse.ko - ... - loading @0xffffffffa0000000: /home/user/linux/build/drivers/ata/ata_generic.ko - -- Set a breakpoint on some not yet loaded module function, e.g.:: - - (gdb) b btrfs_init_sysfs - Function "btrfs_init_sysfs" not defined. - Make breakpoint pending on future shared library load? (y or [n]) y - Breakpoint 1 (btrfs_init_sysfs) pending. - -- Continue the target:: - - (gdb) c - -- Load the module on the target and watch the symbols being loaded as well as - the breakpoint hit:: - - loading @0xffffffffa0034000: /home/user/linux/build/lib/libcrc32c.ko - loading @0xffffffffa0050000: /home/user/linux/build/lib/lzo/lzo_compress.ko - loading @0xffffffffa006e000: /home/user/linux/build/lib/zlib_deflate/zlib_deflate.ko - loading @0xffffffffa01b1000: /home/user/linux/build/fs/btrfs/btrfs.ko - - Breakpoint 1, btrfs_init_sysfs () at /home/user/linux/fs/btrfs/sysfs.c:36 - 36 btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); - -- Dump the log buffer of the target kernel:: - - (gdb) lx-dmesg - [ 0.000000] Initializing cgroup subsys cpuset - [ 0.000000] Initializing cgroup subsys cpu - [ 0.000000] Linux version 3.8.0-rc4-dbg+ (... - [ 0.000000] Command line: root=/dev/sda2 resume=/dev/sda1 vga=0x314 - [ 0.000000] e820: BIOS-provided physical RAM map: - [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009fbff] usable - [ 0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved - .... - -- Examine fields of the current task struct(supported by x86 and arm64 only):: - - (gdb) p $lx_current().pid - $1 = 4998 - (gdb) p $lx_current().comm - $2 = "modprobe\000\000\000\000\000\000\000" - -- Make use of the per-cpu function for the current or a specified CPU:: - - (gdb) p $lx_per_cpu("runqueues").nr_running - $3 = 1 - (gdb) p $lx_per_cpu("runqueues", 2).nr_running - $4 = 0 - -- Dig into hrtimers using the container_of helper:: - - (gdb) set $next = $lx_per_cpu("hrtimer_bases").clock_base[0].active.next - (gdb) p *$container_of($next, "struct hrtimer", "node") - $5 = { - node = { - node = { - __rb_parent_color = 18446612133355256072, - rb_right = 0x0 , - rb_left = 0x0 - }, - expires = { - tv64 = 1835268000000 - } - }, - _softexpires = { - tv64 = 1835268000000 - }, - function = 0xffffffff81078232 , - base = 0xffff88003fd0d6f0, - state = 1, - start_pid = 0, - start_site = 0xffffffff81055c1f , - start_comm = "swapper/2\000\000\000\000\000\000" - } - - -List of commands and functions ------------------------------- - -The number of commands and convenience functions may evolve over the time, -this is just a snapshot of the initial version:: - - (gdb) apropos lx - function lx_current -- Return current task - function lx_module -- Find module by name and return the module variable - function lx_per_cpu -- Return per-cpu variable - function lx_task_by_pid -- Find Linux task by PID and return the task_struct variable - function lx_thread_info -- Calculate Linux thread_info from task variable - lx-dmesg -- Print Linux kernel log buffer - lx-lsmod -- List currently loaded modules - lx-symbols -- (Re-)load symbols of Linux kernel and currently loaded modules - -Detailed help can be obtained via "help " for commands and "help -function " for convenience functions. diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst index 3c0ac08b2709..65c54b27a60b 100644 --- a/Documentation/dev-tools/index.rst +++ b/Documentation/dev-tools/index.rst @@ -10,6 +10,9 @@ whole; patches welcome! A brief overview of testing-specific tools can be found in Documentation/dev-tools/testing-overview.rst +Tools that are specific to debugging can be found in +Documentation/process/debugging/index.rst + .. toctree:: :caption: Table of contents :maxdepth: 2 @@ -27,8 +30,6 @@ Documentation/dev-tools/testing-overview.rst kmemleak kcsan kfence - gdb-kernel-debugging - kgdb kselftest kunit/index ktap diff --git a/Documentation/dev-tools/kgdb.rst b/Documentation/dev-tools/kgdb.rst deleted file mode 100644 index b29b0aac2717..000000000000 --- a/Documentation/dev-tools/kgdb.rst +++ /dev/null @@ -1,937 +0,0 @@ -================================================= -Using kgdb, kdb and the kernel debugger internals -================================================= - -:Author: Jason Wessel - -Introduction -============ - -The kernel has two different debugger front ends (kdb and kgdb) which -interface to the debug core. It is possible to use either of the -debugger front ends and dynamically transition between them if you -configure the kernel properly at compile and runtime. - -Kdb is simplistic shell-style interface which you can use on a system -console with a keyboard or serial console. You can use it to inspect -memory, registers, process lists, dmesg, and even set breakpoints to -stop in a certain location. Kdb is not a source level debugger, although -you can set breakpoints and execute some basic kernel run control. Kdb -is mainly aimed at doing some analysis to aid in development or -diagnosing kernel problems. You can access some symbols by name in -kernel built-ins or in kernel modules if the code was built with -``CONFIG_KALLSYMS``. - -Kgdb is intended to be used as a source level debugger for the Linux -kernel. It is used along with gdb to debug a Linux kernel. The -expectation is that gdb can be used to "break in" to the kernel to -inspect memory, variables and look through call stack information -similar to the way an application developer would use gdb to debug an -application. It is possible to place breakpoints in kernel code and -perform some limited execution stepping. - -Two machines are required for using kgdb. One of these machines is a -development machine and the other is the target machine. The kernel to -be debugged runs on the target machine. The development machine runs an -instance of gdb against the vmlinux file which contains the symbols (not -a boot image such as bzImage, zImage, uImage...). In gdb the developer -specifies the connection parameters and connects to kgdb. The type of -connection a developer makes with gdb depends on the availability of -kgdb I/O modules compiled as built-ins or loadable kernel modules in the -test machine's kernel. - -Compiling a kernel -================== - -- In order to enable compilation of kdb, you must first enable kgdb. - -- The kgdb test compile options are described in the kgdb test suite - chapter. - -Kernel config options for kgdb ------------------------------- - -To enable ``CONFIG_KGDB`` you should look under -:menuselection:`Kernel hacking --> Kernel debugging` and select -:menuselection:`KGDB: kernel debugger`. - -While it is not a hard requirement that you have symbols in your vmlinux -file, gdb tends not to be very useful without the symbolic data, so you -will want to turn on ``CONFIG_DEBUG_INFO`` which is called -:menuselection:`Compile the kernel with debug info` in the config menu. - -It is advised, but not required, that you turn on the -``CONFIG_FRAME_POINTER`` kernel option which is called :menuselection:`Compile -the kernel with frame pointers` in the config menu. This option inserts code -into the compiled executable which saves the frame information in registers -or on the stack at different points which allows a debugger such as gdb to -more accurately construct stack back traces while debugging the kernel. - -If the architecture that you are using supports the kernel option -``CONFIG_STRICT_KERNEL_RWX``, you should consider turning it off. This -option will prevent the use of software breakpoints because it marks -certain regions of the kernel's memory space as read-only. If kgdb -supports it for the architecture you are using, you can use hardware -breakpoints if you desire to run with the ``CONFIG_STRICT_KERNEL_RWX`` -option turned on, else you need to turn off this option. - -Next you should choose one or more I/O drivers to interconnect the debugging -host and debugged target. Early boot debugging requires a KGDB I/O -driver that supports early debugging and the driver must be built into -the kernel directly. Kgdb I/O driver configuration takes place via -kernel or module parameters which you can learn more about in the -section that describes the parameter kgdboc. - -Here is an example set of ``.config`` symbols to enable or disable for kgdb:: - - # CONFIG_STRICT_KERNEL_RWX is not set - CONFIG_FRAME_POINTER=y - CONFIG_KGDB=y - CONFIG_KGDB_SERIAL_CONSOLE=y - -Kernel config options for kdb ------------------------------ - -Kdb is quite a bit more complex than the simple gdbstub sitting on top -of the kernel's debug core. Kdb must implement a shell, and also adds -some helper functions in other parts of the kernel, responsible for -printing out interesting data such as what you would see if you ran -``lsmod``, or ``ps``. In order to build kdb into the kernel you follow the -same steps as you would for kgdb. - -The main config option for kdb is ``CONFIG_KGDB_KDB`` which is called -:menuselection:`KGDB_KDB: include kdb frontend for kgdb` in the config menu. -In theory you would have already also selected an I/O driver such as the -``CONFIG_KGDB_SERIAL_CONSOLE`` interface if you plan on using kdb on a -serial port, when you were configuring kgdb. - -If you want to use a PS/2-style keyboard with kdb, you would select -``CONFIG_KDB_KEYBOARD`` which is called :menuselection:`KGDB_KDB: keyboard as -input device` in the config menu. The ``CONFIG_KDB_KEYBOARD`` option is not -used for anything in the gdb interface to kgdb. The ``CONFIG_KDB_KEYBOARD`` -option only works with kdb. - -Here is an example set of ``.config`` symbols to enable/disable kdb:: - - # CONFIG_STRICT_KERNEL_RWX is not set - CONFIG_FRAME_POINTER=y - CONFIG_KGDB=y - CONFIG_KGDB_SERIAL_CONSOLE=y - CONFIG_KGDB_KDB=y - CONFIG_KDB_KEYBOARD=y - -Kernel Debugger Boot Arguments -============================== - -This section describes the various runtime kernel parameters that affect -the configuration of the kernel debugger. The following chapter covers -using kdb and kgdb as well as providing some examples of the -configuration parameters. - -Kernel parameter: kgdboc ------------------------- - -The kgdboc driver was originally an abbreviation meant to stand for -"kgdb over console". Today it is the primary mechanism to configure how -to communicate from gdb to kgdb as well as the devices you want to use -to interact with the kdb shell. - -For kgdb/gdb, kgdboc is designed to work with a single serial port. It -is intended to cover the circumstance where you want to use a serial -console as your primary console as well as using it to perform kernel -debugging. It is also possible to use kgdb on a serial port which is not -designated as a system console. Kgdboc may be configured as a kernel -built-in or a kernel loadable module. You can only make use of -``kgdbwait`` and early debugging if you build kgdboc into the kernel as -a built-in. - -Optionally you can elect to activate kms (Kernel Mode Setting) -integration. When you use kms with kgdboc and you have a video driver -that has atomic mode setting hooks, it is possible to enter the debugger -on the graphics console. When the kernel execution is resumed, the -previous graphics mode will be restored. This integration can serve as a -useful tool to aid in diagnosing crashes or doing analysis of memory -with kdb while allowing the full graphics console applications to run. - -kgdboc arguments -~~~~~~~~~~~~~~~~ - -Usage:: - - kgdboc=[kms][[,]kbd][[,]serial_device][,baud] - -The order listed above must be observed if you use any of the optional -configurations together. - -Abbreviations: - -- kms = Kernel Mode Setting - -- kbd = Keyboard - -You can configure kgdboc to use the keyboard, and/or a serial device -depending on if you are using kdb and/or kgdb, in one of the following -scenarios. The order listed above must be observed if you use any of the -optional configurations together. Using kms + only gdb is generally not -a useful combination. - -Using loadable module or built-in -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -1. As a kernel built-in: - - Use the kernel boot argument:: - - kgdboc=,[baud] - -2. As a kernel loadable module: - - Use the command:: - - modprobe kgdboc kgdboc=,[baud] - - Here are two examples of how you might format the kgdboc string. The - first is for an x86 target using the first serial port. The second - example is for the ARM Versatile AB using the second serial port. - - 1. ``kgdboc=ttyS0,115200`` - - 2. ``kgdboc=ttyAMA1,115200`` - -Configure kgdboc at runtime with sysfs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -At run time you can enable or disable kgdboc by writing parameters -into sysfs. Here are two examples: - -1. Enable kgdboc on ttyS0:: - - echo ttyS0 > /sys/module/kgdboc/parameters/kgdboc - -2. Disable kgdboc:: - - echo "" > /sys/module/kgdboc/parameters/kgdboc - -.. note:: - - You do not need to specify the baud if you are configuring the - console on tty which is already configured or open. - -More examples -^^^^^^^^^^^^^ - -You can configure kgdboc to use the keyboard, and/or a serial device -depending on if you are using kdb and/or kgdb, in one of the following -scenarios. - -1. kdb and kgdb over only a serial port:: - - kgdboc=[,baud] - - Example:: - - kgdboc=ttyS0,115200 - -2. kdb and kgdb with keyboard and a serial port:: - - kgdboc=kbd,[,baud] - - Example:: - - kgdboc=kbd,ttyS0,115200 - -3. kdb with a keyboard:: - - kgdboc=kbd - -4. kdb with kernel mode setting:: - - kgdboc=kms,kbd - -5. kdb with kernel mode setting and kgdb over a serial port:: - - kgdboc=kms,kbd,ttyS0,115200 - -.. note:: - - Kgdboc does not support interrupting the target via the gdb remote - protocol. You must manually send a `SysRq-G` unless you have a proxy - that splits console output to a terminal program. A console proxy has a - separate TCP port for the debugger and a separate TCP port for the - "human" console. The proxy can take care of sending the `SysRq-G` - for you. - -When using kgdboc with no debugger proxy, you can end up connecting the -debugger at one of two entry points. If an exception occurs after you -have loaded kgdboc, a message should print on the console stating it is -waiting for the debugger. In this case you disconnect your terminal -program and then connect the debugger in its place. If you want to -interrupt the target system and forcibly enter a debug session you have -to issue a `Sysrq` sequence and then type the letter `g`. Then you -disconnect the terminal session and connect gdb. Your options if you -don't like this are to hack gdb to send the `SysRq-G` for you as well as -on the initial connect, or to use a debugger proxy that allows an -unmodified gdb to do the debugging. - -Kernel parameter: ``kgdboc_earlycon`` -------------------------------------- - -If you specify the kernel parameter ``kgdboc_earlycon`` and your serial -driver registers a boot console that supports polling (doesn't need -interrupts and implements a nonblocking read() function) kgdb will attempt -to work using the boot console until it can transition to the regular -tty driver specified by the ``kgdboc`` parameter. - -Normally there is only one boot console (especially that implements the -read() function) so just adding ``kgdboc_earlycon`` on its own is -sufficient to make this work. If you have more than one boot console you -can add the boot console's name to differentiate. Note that names that -are registered through the boot console layer and the tty layer are not -the same for the same port. - -For instance, on one board to be explicit you might do:: - - kgdboc_earlycon=qcom_geni kgdboc=ttyMSM0 - -If the only boot console on the device was "qcom_geni", you could simplify:: - - kgdboc_earlycon kgdboc=ttyMSM0 - -Kernel parameter: ``kgdbwait`` ------------------------------- - -The Kernel command line option ``kgdbwait`` makes kgdb wait for a -debugger connection during booting of a kernel. You can only use this -option if you compiled a kgdb I/O driver into the kernel and you -specified the I/O driver configuration as a kernel command line option. -The kgdbwait parameter should always follow the configuration parameter -for the kgdb I/O driver in the kernel command line else the I/O driver -will not be configured prior to asking the kernel to use it to wait. - -The kernel will stop and wait as early as the I/O driver and -architecture allows when you use this option. If you build the kgdb I/O -driver as a loadable kernel module kgdbwait will not do anything. - -Kernel parameter: ``kgdbcon`` ------------------------------ - -The ``kgdbcon`` feature allows you to see printk() messages inside gdb -while gdb is connected to the kernel. Kdb does not make use of the kgdbcon -feature. - -Kgdb supports using the gdb serial protocol to send console messages to -the debugger when the debugger is connected and running. There are two -ways to activate this feature. - -1. Activate with the kernel command line option:: - - kgdbcon - -2. Use sysfs before configuring an I/O driver:: - - echo 1 > /sys/module/debug_core/parameters/kgdb_use_con - -.. note:: - - If you do this after you configure the kgdb I/O driver, the - setting will not take effect until the next point the I/O is - reconfigured. - -.. important:: - - You cannot use kgdboc + kgdbcon on a tty that is an - active system console. An example of incorrect usage is:: - - console=ttyS0,115200 kgdboc=ttyS0 kgdbcon - -It is possible to use this option with kgdboc on a tty that is not a -system console. - -Run time parameter: ``kgdbreboot`` ----------------------------------- - -The kgdbreboot feature allows you to change how the debugger deals with -the reboot notification. You have 3 choices for the behavior. The -default behavior is always set to 0. - -.. tabularcolumns:: |p{0.4cm}|p{11.5cm}|p{5.6cm}| - -.. flat-table:: - :widths: 1 10 8 - - * - 1 - - ``echo -1 > /sys/module/debug_core/parameters/kgdbreboot`` - - Ignore the reboot notification entirely. - - * - 2 - - ``echo 0 > /sys/module/debug_core/parameters/kgdbreboot`` - - Send the detach message to any attached debugger client. - - * - 3 - - ``echo 1 > /sys/module/debug_core/parameters/kgdbreboot`` - - Enter the debugger on reboot notify. - -Kernel parameter: ``nokaslr`` ------------------------------ - -If the architecture that you are using enables KASLR by default, -you should consider turning it off. KASLR randomizes the -virtual address where the kernel image is mapped and confuses -gdb which resolves addresses of kernel symbols from the symbol table -of vmlinux. - -Using kdb -========= - -Quick start for kdb on a serial port ------------------------------------- - -This is a quick example of how to use kdb. - -1. Configure kgdboc at boot using kernel parameters:: - - console=ttyS0,115200 kgdboc=ttyS0,115200 nokaslr - - OR - - Configure kgdboc after the kernel has booted; assuming you are using - a serial port console:: - - echo ttyS0 > /sys/module/kgdboc/parameters/kgdboc - -2. Enter the kernel debugger manually or by waiting for an oops or - fault. There are several ways you can enter the kernel debugger - manually; all involve using the `SysRq-G`, which means you must have - enabled ``CONFIG_MAGIC_SYSRQ=y`` in your kernel config. - - - When logged in as root or with a super user session you can run:: - - echo g > /proc/sysrq-trigger - - - Example using minicom 2.2 - - Press: `CTRL-A` `f` `g` - - - When you have telneted to a terminal server that supports sending - a remote break - - Press: `CTRL-]` - - Type in: ``send break`` - - Press: `Enter` `g` - -3. From the kdb prompt you can run the ``help`` command to see a complete - list of the commands that are available. - - Some useful commands in kdb include: - - =========== ================================================================= - ``lsmod`` Shows where kernel modules are loaded - ``ps`` Displays only the active processes - ``ps A`` Shows all the processes - ``summary`` Shows kernel version info and memory usage - ``bt`` Get a backtrace of the current process using dump_stack() - ``dmesg`` View the kernel syslog buffer - ``go`` Continue the system - =========== ================================================================= - -4. When you are done using kdb you need to consider rebooting the system - or using the ``go`` command to resuming normal kernel execution. If you - have paused the kernel for a lengthy period of time, applications - that rely on timely networking or anything to do with real wall clock - time could be adversely affected, so you should take this into - consideration when using the kernel debugger. - -Quick start for kdb using a keyboard connected console ------------------------------------------------------- - -This is a quick example of how to use kdb with a keyboard. - -1. Configure kgdboc at boot using kernel parameters:: - - kgdboc=kbd - - OR - - Configure kgdboc after the kernel has booted:: - - echo kbd > /sys/module/kgdboc/parameters/kgdboc - -2. Enter the kernel debugger manually or by waiting for an oops or - fault. There are several ways you can enter the kernel debugger - manually; all involve using the `SysRq-G`, which means you must have - enabled ``CONFIG_MAGIC_SYSRQ=y`` in your kernel config. - - - When logged in as root or with a super user session you can run:: - - echo g > /proc/sysrq-trigger - - - Example using a laptop keyboard: - - Press and hold down: `Alt` - - Press and hold down: `Fn` - - Press and release the key with the label: `SysRq` - - Release: `Fn` - - Press and release: `g` - - Release: `Alt` - - - Example using a PS/2 101-key keyboard - - Press and hold down: `Alt` - - Press and release the key with the label: `SysRq` - - Press and release: `g` - - Release: `Alt` - -3. Now type in a kdb command such as ``help``, ``dmesg``, ``bt`` or ``go`` to - continue kernel execution. - -Using kgdb / gdb -================ - -In order to use kgdb you must activate it by passing configuration -information to one of the kgdb I/O drivers. If you do not pass any -configuration information kgdb will not do anything at all. Kgdb will -only actively hook up to the kernel trap hooks if a kgdb I/O driver is -loaded and configured. If you unconfigure a kgdb I/O driver, kgdb will -unregister all the kernel hook points. - -All kgdb I/O drivers can be reconfigured at run time, if -``CONFIG_SYSFS`` and ``CONFIG_MODULES`` are enabled, by echo'ing a new -config string to ``/sys/module//parameter/