diff options
| author | Tejun Heo <tj@kernel.org> | 2026-03-06 20:48:08 +0300 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2026-03-06 20:48:21 +0300 |
| commit | a0b0f6c7d7f29f1ade9ec59699d02e3b153ee8e4 (patch) | |
| tree | 2c673c2d689a0a0da7b8a6584ef368ba8f6aa164 /include | |
| parent | 32e940f2bd3b16551f23ea44be47f6f5d1746d64 (diff) | |
| parent | 5b30afc20b3fea29b9beb83c6415c4ff06f774aa (diff) | |
| download | linux-a0b0f6c7d7f29f1ade9ec59699d02e3b153ee8e4.tar.xz | |
Merge branch 'for-7.1' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup into for-7.1
To receive 5b30afc20b3f ("cgroup: Expose some cgroup helpers") which will be
used by sub-sched support.
Signed-off-by: Tejun Heo <tj@kernel.org>
Diffstat (limited to 'include')
| -rw-r--r-- | include/kunit/run-in-irq-context.h | 44 | ||||
| -rw-r--r-- | include/linux/cgroup.h | 65 | ||||
| -rw-r--r-- | include/linux/indirect_call_wrapper.h | 18 | ||||
| -rw-r--r-- | include/linux/kthread.h | 21 | ||||
| -rw-r--r-- | include/linux/netdevice.h | 27 | ||||
| -rw-r--r-- | include/linux/ns_common.h | 2 | ||||
| -rw-r--r-- | include/linux/ring_buffer.h | 1 | ||||
| -rw-r--r-- | include/linux/usb/r8152.h | 1 | ||||
| -rw-r--r-- | include/net/act_api.h | 1 | ||||
| -rw-r--r-- | include/net/bonding.h | 1 | ||||
| -rw-r--r-- | include/net/inet6_hashtables.h | 2 | ||||
| -rw-r--r-- | include/net/inet_hashtables.h | 2 | ||||
| -rw-r--r-- | include/net/ip.h | 2 | ||||
| -rw-r--r-- | include/net/ip_fib.h | 2 | ||||
| -rw-r--r-- | include/net/libeth/xsk.h | 3 | ||||
| -rw-r--r-- | include/net/netfilter/nf_tables.h | 7 | ||||
| -rw-r--r-- | include/net/sch_generic.h | 10 | ||||
| -rw-r--r-- | include/net/secure_seq.h | 45 | ||||
| -rw-r--r-- | include/net/tc_act/tc_gate.h | 33 | ||||
| -rw-r--r-- | include/net/tc_act/tc_ife.h | 4 | ||||
| -rw-r--r-- | include/net/tcp.h | 6 | ||||
| -rw-r--r-- | include/net/xdp_sock_drv.h | 16 | ||||
| -rw-r--r-- | include/trace/events/netfs.h | 4 |
23 files changed, 259 insertions, 58 deletions
diff --git a/include/kunit/run-in-irq-context.h b/include/kunit/run-in-irq-context.h index c89b1b1b12dd..bfe60d6cf28d 100644 --- a/include/kunit/run-in-irq-context.h +++ b/include/kunit/run-in-irq-context.h @@ -12,16 +12,16 @@ #include <linux/hrtimer.h> #include <linux/workqueue.h> -#define KUNIT_IRQ_TEST_HRTIMER_INTERVAL us_to_ktime(5) - struct kunit_irq_test_state { bool (*func)(void *test_specific_state); void *test_specific_state; bool task_func_reported_failure; bool hardirq_func_reported_failure; bool softirq_func_reported_failure; + atomic_t task_func_calls; atomic_t hardirq_func_calls; atomic_t softirq_func_calls; + ktime_t interval; struct hrtimer timer; struct work_struct bh_work; }; @@ -30,14 +30,25 @@ static enum hrtimer_restart kunit_irq_test_timer_func(struct hrtimer *timer) { struct kunit_irq_test_state *state = container_of(timer, typeof(*state), timer); + int task_calls, hardirq_calls, softirq_calls; WARN_ON_ONCE(!in_hardirq()); - atomic_inc(&state->hardirq_func_calls); + task_calls = atomic_read(&state->task_func_calls); + hardirq_calls = atomic_inc_return(&state->hardirq_func_calls); + softirq_calls = atomic_read(&state->softirq_func_calls); + + /* + * If the timer is firing too often for the softirq or task to ever have + * a chance to run, increase the timer interval. This is needed on very + * slow systems. + */ + if (hardirq_calls >= 20 && (softirq_calls == 0 || task_calls == 0)) + state->interval = ktime_add_ns(state->interval, 250); if (!state->func(state->test_specific_state)) state->hardirq_func_reported_failure = true; - hrtimer_forward_now(&state->timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL); + hrtimer_forward_now(&state->timer, state->interval); queue_work(system_bh_wq, &state->bh_work); return HRTIMER_RESTART; } @@ -86,10 +97,14 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), struct kunit_irq_test_state state = { .func = func, .test_specific_state = test_specific_state, + /* + * Start with a 5us timer interval. If the system can't keep + * up, kunit_irq_test_timer_func() will increase it. + */ + .interval = us_to_ktime(5), }; unsigned long end_jiffies; - int hardirq_calls, softirq_calls; - bool allctx = false; + int task_calls, hardirq_calls, softirq_calls; /* * Set up a hrtimer (the way we access hardirq context) and a work @@ -104,21 +119,18 @@ static inline void kunit_run_irq_test(struct kunit *test, bool (*func)(void *), * and hardirq), or 1 second, whichever comes first. */ end_jiffies = jiffies + HZ; - hrtimer_start(&state.timer, KUNIT_IRQ_TEST_HRTIMER_INTERVAL, - HRTIMER_MODE_REL_HARD); - for (int task_calls = 0, calls = 0; - ((calls < max_iterations) || !allctx) && - !time_after(jiffies, end_jiffies); - task_calls++) { + hrtimer_start(&state.timer, state.interval, HRTIMER_MODE_REL_HARD); + do { if (!func(test_specific_state)) state.task_func_reported_failure = true; + task_calls = atomic_inc_return(&state.task_func_calls); hardirq_calls = atomic_read(&state.hardirq_func_calls); softirq_calls = atomic_read(&state.softirq_func_calls); - calls = task_calls + hardirq_calls + softirq_calls; - allctx = (task_calls > 0) && (hardirq_calls > 0) && - (softirq_calls > 0); - } + } while ((task_calls + hardirq_calls + softirq_calls < max_iterations || + (task_calls == 0 || hardirq_calls == 0 || + softirq_calls == 0)) && + !time_after(jiffies, end_jiffies)); /* Cancel the timer and work. */ hrtimer_cancel(&state.timer); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bc892e3b37ee..e52160e85af4 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -42,6 +42,14 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS +/* + * To avoid confusing the compiler (and generating warnings) with code + * that attempts to access what would be a 0-element array (i.e. sized + * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this + * constant expression can be added. + */ +#define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0) + enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ @@ -76,6 +84,7 @@ enum cgroup_lifetime_events { extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; +extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct blocking_notifier_head cgroup_lifetime_notifier; @@ -103,6 +112,8 @@ extern struct blocking_notifier_head cgroup_lifetime_notifier; #define cgroup_subsys_on_dfl(ss) \ static_branch_likely(&ss ## _on_dfl_key) +bool cgroup_on_dfl(const struct cgroup *cgrp); + bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, @@ -274,6 +285,32 @@ void css_task_iter_end(struct css_task_iter *it); for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \ (pos) = css_next_descendant_post((pos), (css))) +/* iterate over child cgrps, lock should be held throughout iteration */ +#define cgroup_for_each_live_child(child, cgrp) \ + list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + cgroup_is_dead(child); })) \ + ; \ + else + +/* walk live descendants in pre order */ +#define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ + css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + (dsct) = (d_css)->cgroup; \ + cgroup_is_dead(dsct); })) \ + ; \ + else + +/* walk live descendants in postorder */ +#define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \ + css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + (dsct) = (d_css)->cgroup; \ + cgroup_is_dead(dsct); })) \ + ; \ + else + /** * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor @@ -337,6 +374,27 @@ static inline u64 cgroup_id(const struct cgroup *cgrp) } /** + * cgroup_css - obtain a cgroup's css for the specified subsystem + * @cgrp: the cgroup of interest + * @ss: the subsystem of interest (%NULL returns @cgrp->self) + * + * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This + * function must be called either under cgroup_mutex or rcu_read_lock() and + * the caller is responsible for pinning the returned css if it wants to + * keep accessing it outside the said locks. This function may return + * %NULL if @cgrp doesn't have @subsys_id enabled. + */ +static inline struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, + struct cgroup_subsys *ss) +{ + if (CGROUP_HAS_SUBSYS_CONFIG && ss) + return rcu_dereference_check(cgrp->subsys[ss->id], + lockdep_is_held(&cgroup_mutex)); + else + return &cgrp->self; +} + +/** * css_is_dying - test whether the specified css is dying * @css: target css * @@ -372,6 +430,11 @@ static inline bool css_is_self(struct cgroup_subsys_state *css) return false; } +static inline bool cgroup_is_dead(const struct cgroup *cgrp) +{ + return !(cgrp->self.flags & CSS_ONLINE); +} + static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); @@ -387,8 +450,6 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } -extern struct mutex cgroup_mutex; - static inline void cgroup_lock(void) { mutex_lock(&cgroup_mutex); diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h index 35227d47cfc9..dc272b514a01 100644 --- a/include/linux/indirect_call_wrapper.h +++ b/include/linux/indirect_call_wrapper.h @@ -16,22 +16,26 @@ */ #define INDIRECT_CALL_1(f, f1, ...) \ ({ \ - likely(f == f1) ? f1(__VA_ARGS__) : f(__VA_ARGS__); \ + typeof(f) __f1 = (f); \ + likely(__f1 == f1) ? f1(__VA_ARGS__) : __f1(__VA_ARGS__); \ }) #define INDIRECT_CALL_2(f, f2, f1, ...) \ ({ \ - likely(f == f2) ? f2(__VA_ARGS__) : \ - INDIRECT_CALL_1(f, f1, __VA_ARGS__); \ + typeof(f) __f2 = (f); \ + likely(__f2 == f2) ? f2(__VA_ARGS__) : \ + INDIRECT_CALL_1(__f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_3(f, f3, f2, f1, ...) \ ({ \ - likely(f == f3) ? f3(__VA_ARGS__) : \ - INDIRECT_CALL_2(f, f2, f1, __VA_ARGS__); \ + typeof(f) __f3 = (f); \ + likely(__f3 == f3) ? f3(__VA_ARGS__) : \ + INDIRECT_CALL_2(__f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALL_4(f, f4, f3, f2, f1, ...) \ ({ \ - likely(f == f4) ? f4(__VA_ARGS__) : \ - INDIRECT_CALL_3(f, f3, f2, f1, __VA_ARGS__); \ + typeof(f) __f4 = (f); \ + likely(__f4 == f4) ? f4(__VA_ARGS__) : \ + INDIRECT_CALL_3(__f4, f3, f2, f1, __VA_ARGS__); \ }) #define INDIRECT_CALLABLE_DECLARE(f) f diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c92c1149ee6e..a01a474719a7 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -7,6 +7,24 @@ struct mm_struct; +/* opaque kthread data */ +struct kthread; + +/* + * When "(p->flags & PF_KTHREAD)" is set the task is a kthread and will + * always remain a kthread. For kthreads p->worker_private always + * points to a struct kthread. For tasks that are not kthreads + * p->worker_private is used to point to other things. + * + * Return NULL for any task that is not a kthread. + */ +static inline struct kthread *tsk_is_kthread(struct task_struct *p) +{ + if (p->flags & PF_KTHREAD) + return p->worker_private; + return NULL; +} + __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), void *data, @@ -98,9 +116,10 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); -void kthread_exit(long result) __noreturn; +#define kthread_exit(result) do_exit(result) void kthread_complete_and_exit(struct completion *, long) __noreturn; int kthreads_update_housekeeping(void); +void kthread_do_exit(struct kthread *, long); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d4e6e00bb90a..67e25f6d15a4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4711,7 +4711,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, cpu); } @@ -4729,7 +4729,7 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } @@ -4738,7 +4738,7 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) bool ok = spin_trylock(&txq->_xmit_lock); if (likely(ok)) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } return ok; @@ -4746,14 +4746,14 @@ static inline bool __netif_tx_trylock(struct netdev_queue *txq) static inline void __netif_tx_unlock(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + /* Pairs with READ_ONCE() in netif_tx_owned() */ WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } @@ -4846,6 +4846,23 @@ static inline void netif_tx_disable(struct net_device *dev) local_bh_enable(); } +#ifndef CONFIG_PREEMPT_RT +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + return READ_ONCE(txq->xmit_lock_owner) == cpu; +} + +#else +static inline bool netif_tx_owned(struct netdev_queue *txq, unsigned int cpu) +{ + return rt_mutex_owner(&txq->_xmit_lock.lock) == current; +} + +#endif + static inline void netif_addr_lock(struct net_device *dev) { unsigned char nest_level = 0; diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 825f5865bfc5..c8e227a3f9e2 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -55,6 +55,8 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns) #define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns))) +bool may_see_all_namespaces(void); + static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns) { return atomic_read(&ns->__ns_ref_active); diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 876358cfe1b1..d862fa610270 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -248,6 +248,7 @@ int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); int ring_buffer_map(struct trace_buffer *buffer, int cpu, struct vm_area_struct *vma); +void ring_buffer_map_dup(struct trace_buffer *buffer, int cpu); int ring_buffer_unmap(struct trace_buffer *buffer, int cpu); int ring_buffer_map_get_reader(struct trace_buffer *buffer, int cpu); #endif /* _LINUX_RING_BUFFER_H */ diff --git a/include/linux/usb/r8152.h b/include/linux/usb/r8152.h index 2ca60828f28b..1502b2a355f9 100644 --- a/include/linux/usb/r8152.h +++ b/include/linux/usb/r8152.h @@ -32,6 +32,7 @@ #define VENDOR_ID_DLINK 0x2001 #define VENDOR_ID_DELL 0x413c #define VENDOR_ID_ASUS 0x0b05 +#define VENDOR_ID_TRENDNET 0x20f4 #if IS_REACHABLE(CONFIG_USB_RTL8152) extern u8 rtl8152_get_version(struct usb_interface *intf); diff --git a/include/net/act_api.h b/include/net/act_api.h index e1e8f0f7dacb..d11b79107930 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -70,6 +70,7 @@ struct tc_action { #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) #define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) +#define TCA_ACT_FLAGS_AT_INGRESS_OR_CLSACT (1U << (TCA_ACT_FLAGS_USER_BITS + 5)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. diff --git a/include/net/bonding.h b/include/net/bonding.h index 4ad5521e7731..395c6e281c5f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -699,6 +699,7 @@ void bond_debug_register(struct bonding *bond); void bond_debug_unregister(struct bonding *bond); void bond_debug_reregister(struct bonding *bond); const char *bond_mode_name(int mode); +bool __bond_xdp_check(int mode, int xmit_policy); bool bond_xdp_check(struct bonding *bond, int mode); void bond_setup(struct net_device *bond_dev); unsigned int bond_get_num_tx_queues(void); diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 282e29237d93..c16de5b7963f 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -175,7 +175,7 @@ static inline bool inet6_match(const struct net *net, const struct sock *sk, { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index ac05a52d9e13..5a979dcab538 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -345,7 +345,7 @@ static inline bool inet_match(const struct net *net, const struct sock *sk, int dif, int sdif) { if (!net_eq(sock_net(sk), net) || - sk->sk_portpair != ports || + READ_ONCE(sk->sk_portpair) != ports || sk->sk_addrpair != cookie) return false; diff --git a/include/net/ip.h b/include/net/ip.h index 69d5cef46004..7f9abd457e01 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -101,7 +101,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm, ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if); ipcm->addr = inet->inet_saddr; - ipcm->protocol = inet->inet_num; + ipcm->protocol = READ_ONCE(inet->inet_num); } #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index b4495c38e0a0..318593743b6e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -559,7 +559,7 @@ static inline u32 fib_multipath_hash_from_keys(const struct net *net, siphash_aligned_key_t hash_key; u32 mp_seed; - mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed).mp_seed; + mp_seed = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_seed.mp_seed); fib_multipath_hash_construct_key(&hash_key, mp_seed); return flow_hash_from_keys_seed(keys, &hash_key); diff --git a/include/net/libeth/xsk.h b/include/net/libeth/xsk.h index 481a7b28e6f2..82b5d21aae87 100644 --- a/include/net/libeth/xsk.h +++ b/include/net/libeth/xsk.h @@ -597,6 +597,7 @@ __libeth_xsk_run_pass(struct libeth_xdp_buff *xdp, * @pending: current number of XSkFQEs to refill * @thresh: threshold below which the queue is refilled * @buf_len: HW-writeable length per each buffer + * @truesize: step between consecutive buffers, 0 if none exists * @nid: ID of the closest NUMA node with memory */ struct libeth_xskfq { @@ -614,6 +615,8 @@ struct libeth_xskfq { u32 thresh; u32 buf_len; + u32 truesize; + int nid; }; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 426534a711b0..e2d2bfc1f989 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -320,11 +320,13 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv) * @NFT_ITER_UNSPEC: unspecified, to catch errors * @NFT_ITER_READ: read-only iteration over set elements * @NFT_ITER_UPDATE: iteration under mutex to update set element state + * @NFT_ITER_UPDATE_CLONE: clone set before iteration under mutex to update element */ enum nft_iter_type { NFT_ITER_UNSPEC, NFT_ITER_READ, NFT_ITER_UPDATE, + NFT_ITER_UPDATE_CLONE, }; struct nft_set; @@ -1861,6 +1863,11 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline int nft_trans_gc_space(const struct nft_trans_gc *trans) +{ + return NFT_TRANS_GC_BATCHCOUNT - trans->count; +} + static inline void nft_ctx_update(struct nft_ctx *ctx, const struct nft_trans *trans) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c3a7268b567e..d5d55cb21686 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -778,13 +778,23 @@ static inline bool skb_skip_tc_classify(struct sk_buff *skb) static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i) { struct Qdisc *qdisc; + bool nolock; for (; i < dev->num_tx_queues; i++) { qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); if (qdisc) { + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); spin_lock_bh(qdisc_lock(qdisc)); qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + clear_bit(__QDISC_STATE_DRAINING, &qdisc->state); + spin_unlock_bh(&qdisc->seqlock); + } } } } diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index cddebafb9f77..6f996229167b 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -5,16 +5,47 @@ #include <linux/types.h> struct net; +extern struct net init_net; + +union tcp_seq_and_ts_off { + struct { + u32 seq; + u32 ts_off; + }; + u64 hash64; +}; u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); -u32 secure_tcp_seq(__be32 saddr, __be32 daddr, - __be16 sport, __be16 dport); -u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr); -u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, - __be16 sport, __be16 dport); -u32 secure_tcpv6_ts_off(const struct net *net, - const __be32 *saddr, const __be32 *daddr); +union tcp_seq_and_ts_off +secure_tcp_seq_and_ts_off(const struct net *net, __be32 saddr, __be32 daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcp_seq(__be32 saddr, __be32 daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcp_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + + return ts.seq; +} + +union tcp_seq_and_ts_off +secure_tcpv6_seq_and_ts_off(const struct net *net, const __be32 *saddr, + const __be32 *daddr, + __be16 sport, __be16 dport); + +static inline u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, + __be16 sport, __be16 dport) +{ + union tcp_seq_and_ts_off ts; + + ts = secure_tcpv6_seq_and_ts_off(&init_net, saddr, daddr, + sport, dport); + return ts.seq; +} #endif /* _NET_SECURE_SEQ */ diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h index b147a3bb1a46..e0fded18e18c 100644 --- a/include/net/tc_act/tc_gate.h +++ b/include/net/tc_act/tc_gate.h @@ -32,6 +32,7 @@ struct tcf_gate_params { s32 tcfg_clockid; size_t num_entries; struct list_head entries; + struct rcu_head rcu; }; #define GATE_ACT_GATE_OPEN BIT(0) @@ -39,7 +40,7 @@ struct tcf_gate_params { struct tcf_gate { struct tc_action common; - struct tcf_gate_params param; + struct tcf_gate_params __rcu *param; u8 current_gate_status; ktime_t current_close_time; u32 current_entry_octets; @@ -51,47 +52,65 @@ struct tcf_gate { #define to_gate(a) ((struct tcf_gate *)a) +static inline struct tcf_gate_params *tcf_gate_params_locked(const struct tc_action *a) +{ + struct tcf_gate *gact = to_gate(a); + + return rcu_dereference_protected(gact->param, + lockdep_is_held(&gact->tcf_lock)); +} + static inline s32 tcf_gate_prio(const struct tc_action *a) { + struct tcf_gate_params *p; s32 tcfg_prio; - tcfg_prio = to_gate(a)->param.tcfg_priority; + p = tcf_gate_params_locked(a); + tcfg_prio = p->tcfg_priority; return tcfg_prio; } static inline u64 tcf_gate_basetime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_basetime; - tcfg_basetime = to_gate(a)->param.tcfg_basetime; + p = tcf_gate_params_locked(a); + tcfg_basetime = p->tcfg_basetime; return tcfg_basetime; } static inline u64 tcf_gate_cycletime(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletime; - tcfg_cycletime = to_gate(a)->param.tcfg_cycletime; + p = tcf_gate_params_locked(a); + tcfg_cycletime = p->tcfg_cycletime; return tcfg_cycletime; } static inline u64 tcf_gate_cycletimeext(const struct tc_action *a) { + struct tcf_gate_params *p; u64 tcfg_cycletimeext; - tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext; + p = tcf_gate_params_locked(a); + tcfg_cycletimeext = p->tcfg_cycletime_ext; return tcfg_cycletimeext; } static inline u32 tcf_gate_num_entries(const struct tc_action *a) { + struct tcf_gate_params *p; u32 num_entries; - num_entries = to_gate(a)->param.num_entries; + p = tcf_gate_params_locked(a); + num_entries = p->num_entries; return num_entries; } @@ -105,7 +124,7 @@ static inline struct action_gate_entry u32 num_entries; int i = 0; - p = &to_gate(a)->param; + p = tcf_gate_params_locked(a); num_entries = p->num_entries; list_for_each_entry(entry, &p->entries, list) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index c7f24a2da1ca..24d4d5a62b3c 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -13,15 +13,13 @@ struct tcf_ife_params { u8 eth_src[ETH_ALEN]; u16 eth_type; u16 flags; - + struct list_head metalist; struct rcu_head rcu; }; struct tcf_ife_info { struct tc_action common; struct tcf_ife_params __rcu *params; - /* list of metaids allowed */ - struct list_head metalist; }; #define to_ife(a) ((struct tcf_ife_info *)a) diff --git a/include/net/tcp.h b/include/net/tcp.h index eb8bf63fdafc..978eea2d5df0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -43,6 +43,7 @@ #include <net/dst.h> #include <net/mptcp.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include <linux/seq_file.h> #include <linux/memcontrol.h> @@ -2464,8 +2465,9 @@ struct tcp_request_sock_ops { struct flowi *fl, struct request_sock *req, u32 tw_isn); - u32 (*init_seq)(const struct sk_buff *skb); - u32 (*init_ts_off)(const struct net *net, const struct sk_buff *skb); + union tcp_seq_and_ts_off (*init_seq_and_ts_off)( + const struct net *net, + const struct sk_buff *skb); int (*send_synack)(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 242e34f771cc..6b9ebae2dc95 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -51,6 +51,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return xsk_pool_get_chunk_size(pool) - xsk_pool_get_headroom(pool); } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return pool->unaligned ? 0 : xsk_pool_get_chunk_size(pool); +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { @@ -122,7 +127,7 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) goto out; list_for_each_entry_safe(pos, tmp, xskb_list, list_node) { - list_del(&pos->list_node); + list_del_init(&pos->list_node); xp_free(pos); } @@ -157,7 +162,7 @@ static inline struct xdp_buff *xsk_buff_get_frag(const struct xdp_buff *first) frag = list_first_entry_or_null(&xskb->pool->xskb_list, struct xdp_buff_xsk, list_node); if (frag) { - list_del(&frag->list_node); + list_del_init(&frag->list_node); ret = &frag->xdp; } @@ -168,7 +173,7 @@ static inline void xsk_buff_del_frag(struct xdp_buff *xdp) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); - list_del(&xskb->list_node); + list_del_init(&xskb->list_node); } static inline struct xdp_buff *xsk_buff_get_head(struct xdp_buff *first) @@ -337,6 +342,11 @@ static inline u32 xsk_pool_get_rx_frame_size(struct xsk_buff_pool *pool) return 0; } +static inline u32 xsk_pool_get_rx_frag_step(struct xsk_buff_pool *pool) +{ + return 0; +} + static inline void xsk_pool_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq) { diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index 64a382fbc31a..2d366be46a1c 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -57,6 +57,7 @@ EM(netfs_rreq_trace_done, "DONE ") \ EM(netfs_rreq_trace_end_copy_to_cache, "END-C2C") \ EM(netfs_rreq_trace_free, "FREE ") \ + EM(netfs_rreq_trace_intr, "INTR ") \ EM(netfs_rreq_trace_ki_complete, "KI-CMPL") \ EM(netfs_rreq_trace_recollect, "RECLLCT") \ EM(netfs_rreq_trace_redirty, "REDIRTY") \ @@ -169,7 +170,8 @@ EM(netfs_sreq_trace_put_oom, "PUT OOM ") \ EM(netfs_sreq_trace_put_wip, "PUT WIP ") \ EM(netfs_sreq_trace_put_work, "PUT WORK ") \ - E_(netfs_sreq_trace_put_terminated, "PUT TERM ") + EM(netfs_sreq_trace_put_terminated, "PUT TERM ") \ + E_(netfs_sreq_trace_see_failed, "SEE FAILED ") #define netfs_folio_traces \ EM(netfs_folio_is_uptodate, "mod-uptodate") \ |
