diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 03:23:48 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-25 03:23:48 +0300 |
| commit | bcb044256d3f5d9f5bb61d1eac6492f77883bd60 (patch) | |
| tree | 0c0c48beef0742ac47a5e136fe0ecbcea66b6871 /include/linux | |
| parent | 94dc216ad848ebee06ce7692fcfcbb2e9b3e643c (diff) | |
| parent | e4855fc90e52efef7e3926205c8dc53ce39b6138 (diff) | |
| download | linux-bcb044256d3f5d9f5bb61d1eac6492f77883bd60.tar.xz | |
Merge tag 'sched_ext-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext
Pull sched_ext updates from Tejun Heo:
- Add mechanism to count and report internal events. This significantly
improves visibility on subtle corner conditions.
- The default idle CPU selection logic is revamped and improved in
multiple ways including being made topology aware.
- sched_ext was disabling ttwu_queue for simplicity, which can be
costly when hardware topology is more complex. Implement
SCX_OPS_ALLOWED_QUEUED_WAKEUP so that BPF schedulers can selectively
enable ttwu_queue.
- tools/sched_ext updates to improve compatibility among others.
- Other misc updates and fixes.
- sched_ext/for-6.14-fixes were pulled a few times to receive
prerequisite fixes and resolve conflicts.
* tag 'sched_ext-for-6.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext: (42 commits)
sched_ext: idle: Refactor scx_select_cpu_dfl()
sched_ext: idle: Honor idle flags in the built-in idle selection policy
sched_ext: Skip per-CPU tasks in scx_bpf_reenqueue_local()
sched_ext: Add trace point to track sched_ext core events
sched_ext: Change the event type from u64 to s64
sched_ext: Documentation: add task lifecycle summary
tools/sched_ext: Provide a compatible helper for scx_bpf_events()
selftests/sched_ext: Add NUMA-aware scheduler test
tools/sched_ext: Provide consistent access to scx flags
sched_ext: idle: Fix scx_bpf_pick_any_cpu_node() behavior
sched_ext: idle: Introduce scx_bpf_nr_node_ids()
sched_ext: idle: Introduce node-aware idle cpu kfunc helpers
sched_ext: idle: Per-node idle cpumasks
sched_ext: idle: Introduce SCX_OPS_BUILTIN_IDLE_PER_NODE
sched_ext: idle: Make idle static keys private
sched/topology: Introduce for_each_node_numadist() iterator
mm/numa: Introduce nearest_node_nodemask()
nodemask: numa: reorganize inclusion path
nodemask: add nodes_copy()
tools/sched_ext: Sync with scx repo
...
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/nodemask.h | 8 | ||||
| -rw-r--r-- | include/linux/nodemask_types.h | 11 | ||||
| -rw-r--r-- | include/linux/numa.h | 17 | ||||
| -rw-r--r-- | include/linux/sched/ext.h | 1 | ||||
| -rw-r--r-- | include/linux/topology.h | 30 |
5 files changed, 56 insertions, 11 deletions
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 9fd7a0ce9c1a..f0ac0633366b 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -94,7 +94,6 @@ #include <linux/bitmap.h> #include <linux/minmax.h> #include <linux/nodemask_types.h> -#include <linux/numa.h> #include <linux/random.h> extern nodemask_t _unused_nodemask_arg_; @@ -191,6 +190,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) +static __always_inline void __nodes_copy(nodemask_t *dstp, + const nodemask_t *srcp, unsigned int nbits) +{ + bitmap_copy(dstp->bits, srcp->bits, nbits); +} + #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h index 6b28d97ea6ed..f850a48742f1 100644 --- a/include/linux/nodemask_types.h +++ b/include/linux/nodemask_types.h @@ -3,7 +3,16 @@ #define __LINUX_NODEMASK_TYPES_H #include <linux/bitops.h> -#include <linux/numa.h> + +#ifdef CONFIG_NODES_SHIFT +#define NODES_SHIFT CONFIG_NODES_SHIFT +#else +#define NODES_SHIFT 0 +#endif + +#define MAX_NUMNODES (1 << NODES_SHIFT) + +#define NUMA_NO_NODE (-1) typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/numa.h b/include/linux/numa.h index 3567e40329eb..e6baaf6051bc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -3,16 +3,8 @@ #define _LINUX_NUMA_H #include <linux/init.h> #include <linux/types.h> +#include <linux/nodemask.h> -#ifdef CONFIG_NODES_SHIFT -#define NODES_SHIFT CONFIG_NODES_SHIFT -#else -#define NODES_SHIFT 0 -#endif - -#define MAX_NUMNODES (1 << NODES_SHIFT) - -#define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) static inline bool numa_valid_node(int nid) @@ -39,6 +31,8 @@ void __init alloc_offline_node_data(int nid); /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); +int nearest_node_nodemask(int node, nodemask_t *mask); + #ifndef memory_add_physaddr_to_nid int memory_add_physaddr_to_nid(u64 start); #endif @@ -55,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state) return NUMA_NO_NODE; } +static inline int nearest_node_nodemask(int node, nodemask_t *mask) +{ + return NUMA_NO_NODE; +} + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1d70a9867fb1..f7545430a548 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -146,6 +146,7 @@ struct sched_ext_entity { u32 weight; s32 sticky_cpu; s32 holding_cpu; + s32 selected_cpu; u32 kf_mask; /* see scx_kf_mask above */ struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ atomic_long_t ops_state; diff --git a/include/linux/topology.h b/include/linux/topology.h index 52f5850730b3..a1815f4395ab 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -262,6 +262,36 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) #endif /* CONFIG_NUMA */ /** + * for_each_node_numadist() - iterate over nodes in increasing distance + * order, starting from a given node + * @node: the iteration variable and the starting node. + * @unvisited: a nodemask to keep track of the unvisited nodes. + * + * This macro iterates over NUMA node IDs in increasing distance from the + * starting @node and yields MAX_NUMNODES when all the nodes have been + * visited. + * + * Note that by the time the loop completes, the @unvisited nodemask will + * be fully cleared, unless the loop exits early. + * + * The difference between for_each_node() and for_each_node_numadist() is + * that the former allows to iterate over nodes in numerical order, whereas + * the latter iterates over nodes in increasing order of distance. + * + * This complexity of this iterator is O(N^2), where N represents the + * number of nodes, as each iteration involves scanning all nodes to + * find the one with the shortest distance. + * + * Requires rcu_lock to be held. + */ +#define for_each_node_numadist(node, unvisited) \ + for (int __start = (node), \ + (node) = nearest_node_nodemask((__start), &(unvisited)); \ + (node) < MAX_NUMNODES; \ + node_clear((node), (unvisited)), \ + (node) = nearest_node_nodemask((__start), &(unvisited))) + +/** * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance * from a given node. * @mask: the iteration variable. |
