From f7f6142107f0e0dd4a2b041116461a049ca18cb0 Mon Sep 17 00:00:00 2001 From: Changwoo Min Date: Fri, 31 Jan 2025 16:09:29 +0900 Subject: sched_ext: Add an event, SCX_EV_SELECT_CPU_FALLBACK Add a core event, SCX_EV_SELECT_CPU_FALLBACK, which represents how many times ops.select_cpu() returns a CPU that the task can't use. __scx_add_event() is used since the caller holds an rq lock, so the preemption has already been disabled. Signed-off-by: Changwoo Min Signed-off-by: Tejun Heo --- include/linux/sched/ext.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched/ext.h b/include/linux/sched/ext.h index 1d70a9867fb1..f7545430a548 100644 --- a/include/linux/sched/ext.h +++ b/include/linux/sched/ext.h @@ -146,6 +146,7 @@ struct sched_ext_entity { u32 weight; s32 sticky_cpu; s32 holding_cpu; + s32 selected_cpu; u32 kf_mask; /* see scx_kf_mask above */ struct task_struct *kf_tasks[2]; /* see SCX_CALL_OP_TASK() */ atomic_long_t ops_state; -- cgit v1.2.3 From 7665054ee0dd0caba6f5f7bae48aa5f221218496 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 14 Feb 2025 20:40:00 +0100 Subject: nodemask: add nodes_copy() Nodemasks API misses the plain nodes_copy() which is required in this series. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- include/linux/nodemask.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 9fd7a0ce9c1a..41cf43c4e70f 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -191,6 +191,13 @@ static __always_inline void __nodes_andnot(nodemask_t *dstp, const nodemask_t *s bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } +#define nodes_copy(dst, src) __nodes_copy(&(dst), &(src), MAX_NUMNODES) +static __always_inline void __nodes_copy(nodemask_t *dstp, + const nodemask_t *srcp, unsigned int nbits) +{ + bitmap_copy(dstp->bits, srcp->bits, nbits); +} + #define nodes_complement(dst, src) \ __nodes_complement(&(dst), &(src), MAX_NUMNODES) static __always_inline void __nodes_complement(nodemask_t *dstp, -- cgit v1.2.3 From 14a8262f505bc4478c50e66309057bc0d0d4b62e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 14 Feb 2025 20:40:01 +0100 Subject: nodemask: numa: reorganize inclusion path Nodemasks now pull linux/numa.h for MAX_NUMNODES and NUMA_NO_NODE macros. This series makes numa.h depending on nodemasks, so we hit a circular dependency. Nodemasks library is highly employed by NUMA code, and it would be logical to resolve the circular dependency by making NUMA headers dependent nodemask.h. Signed-off-by: Yury Norov [NVIDIA] Signed-off-by: Andrea Righi Signed-off-by: Tejun Heo --- include/linux/nodemask.h | 1 - include/linux/nodemask_types.h | 11 ++++++++++- include/linux/numa.h | 10 +--------- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 41cf43c4e70f..f0ac0633366b 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -94,7 +94,6 @@ #include #include #include -#include #include extern nodemask_t _unused_nodemask_arg_; diff --git a/include/linux/nodemask_types.h b/include/linux/nodemask_types.h index 6b28d97ea6ed..f850a48742f1 100644 --- a/include/linux/nodemask_types.h +++ b/include/linux/nodemask_types.h @@ -3,7 +3,16 @@ #define __LINUX_NODEMASK_TYPES_H #include -#include + +#ifdef CONFIG_NODES_SHIFT +#define NODES_SHIFT CONFIG_NODES_SHIFT +#else +#define NODES_SHIFT 0 +#endif + +#define MAX_NUMNODES (1 << NODES_SHIFT) + +#define NUMA_NO_NODE (-1) typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/numa.h b/include/linux/numa.h index 3567e40329eb..31d8bf8a951a 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -3,16 +3,8 @@ #define _LINUX_NUMA_H #include #include +#include -#ifdef CONFIG_NODES_SHIFT -#define NODES_SHIFT CONFIG_NODES_SHIFT -#else -#define NODES_SHIFT 0 -#endif - -#define MAX_NUMNODES (1 << NODES_SHIFT) - -#define NUMA_NO_NODE (-1) #define NUMA_NO_MEMBLK (-1) static inline bool numa_valid_node(int nid) -- cgit v1.2.3 From 16d79f2a4f155b54e7580563c6c03832506b3b09 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 14 Feb 2025 20:40:02 +0100 Subject: mm/numa: Introduce nearest_node_nodemask() Introduce the new helper nearest_node_nodemask() to find the closest node in a specified nodemask from a given starting node. Returns MAX_NUMNODES if no node is found. Suggested-by: Yury Norov [NVIDIA] Signed-off-by: Andrea Righi Acked-by: Yury Norov [NVIDIA] Signed-off-by: Tejun Heo --- include/linux/numa.h | 7 +++++++ mm/mempolicy.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/numa.h b/include/linux/numa.h index 31d8bf8a951a..e6baaf6051bc 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -31,6 +31,8 @@ void __init alloc_offline_node_data(int nid); /* Generic implementation available */ int numa_nearest_node(int node, unsigned int state); +int nearest_node_nodemask(int node, nodemask_t *mask); + #ifndef memory_add_physaddr_to_nid int memory_add_physaddr_to_nid(u64 start); #endif @@ -47,6 +49,11 @@ static inline int numa_nearest_node(int node, unsigned int state) return NUMA_NO_NODE; } +static inline int nearest_node_nodemask(int node, nodemask_t *mask) +{ + return NUMA_NO_NODE; +} + static inline int memory_add_physaddr_to_nid(u64 start) { return 0; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 162407fbf2bc..488cad280efb 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -196,6 +196,37 @@ int numa_nearest_node(int node, unsigned int state) } EXPORT_SYMBOL_GPL(numa_nearest_node); +/** + * nearest_node_nodemask - Find the node in @mask at the nearest distance + * from @node. + * + * @node: a valid node ID to start the search from. + * @mask: a pointer to a nodemask representing the allowed nodes. + * + * This function iterates over all nodes in @mask and calculates the + * distance from the starting @node, then it returns the node ID that is + * the closest to @node, or MAX_NUMNODES if no node is found. + * + * Note that @node must be a valid node ID usable with node_distance(), + * providing an invalid node ID (e.g., NUMA_NO_NODE) may result in crashes + * or unexpected behavior. + */ +int nearest_node_nodemask(int node, nodemask_t *mask) +{ + int dist, n, min_dist = INT_MAX, min_node = MAX_NUMNODES; + + for_each_node_mask(n, *mask) { + dist = node_distance(node, n); + if (dist < min_dist) { + min_dist = dist; + min_node = n; + } + } + + return min_node; +} +EXPORT_SYMBOL_GPL(nearest_node_nodemask); + struct mempolicy *get_task_policy(struct task_struct *p) { struct mempolicy *pol = p->mempolicy; -- cgit v1.2.3 From f09177ca5f242a32368a2e9414dce4c90dc1d405 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Fri, 14 Feb 2025 20:40:03 +0100 Subject: sched/topology: Introduce for_each_node_numadist() iterator Introduce the new helper for_each_node_numadist() to iterate over node IDs in order of increasing NUMA distance from a given starting node. This iterator is somehow similar to for_each_numa_hop_mask(), but instead of providing a cpumask at each iteration, it provides a node ID. Example usage: nodemask_t unvisited = NODE_MASK_ALL; int node, start = cpu_to_node(smp_processor_id()); node = start; for_each_node_numadist(node, unvisited) pr_info("node (%d, %d) -> %d\n", start, node, node_distance(start, node)); On a system with equidistant nodes: $ numactl -H ... node distances: node 0 1 2 3 0: 10 20 20 20 1: 20 10 20 20 2: 20 20 10 20 3: 20 20 20 10 Output of the example above (on node 0): [ 7.367022] node (0, 0) -> 10 [ 7.367151] node (0, 1) -> 20 [ 7.367186] node (0, 2) -> 20 [ 7.367247] node (0, 3) -> 20 On a system with non-equidistant nodes (simulated using virtme-ng): $ numactl -H ... node distances: node 0 1 2 3 0: 10 51 31 41 1: 51 10 21 61 2: 31 21 10 11 3: 41 61 11 10 Output of the example above (on node 0): [ 8.953644] node (0, 0) -> 10 [ 8.953712] node (0, 2) -> 31 [ 8.953764] node (0, 3) -> 41 [ 8.953817] node (0, 1) -> 51 Suggested-by: Yury Norov [NVIDIA] Signed-off-by: Andrea Righi Acked-by: Yury Norov [NVIDIA] Signed-off-by: Tejun Heo --- include/linux/topology.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 52f5850730b3..a1815f4395ab 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -261,6 +261,36 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) } #endif /* CONFIG_NUMA */ +/** + * for_each_node_numadist() - iterate over nodes in increasing distance + * order, starting from a given node + * @node: the iteration variable and the starting node. + * @unvisited: a nodemask to keep track of the unvisited nodes. + * + * This macro iterates over NUMA node IDs in increasing distance from the + * starting @node and yields MAX_NUMNODES when all the nodes have been + * visited. + * + * Note that by the time the loop completes, the @unvisited nodemask will + * be fully cleared, unless the loop exits early. + * + * The difference between for_each_node() and for_each_node_numadist() is + * that the former allows to iterate over nodes in numerical order, whereas + * the latter iterates over nodes in increasing order of distance. + * + * This complexity of this iterator is O(N^2), where N represents the + * number of nodes, as each iteration involves scanning all nodes to + * find the one with the shortest distance. + * + * Requires rcu_lock to be held. + */ +#define for_each_node_numadist(node, unvisited) \ + for (int __start = (node), \ + (node) = nearest_node_nodemask((__start), &(unvisited)); \ + (node) < MAX_NUMNODES; \ + node_clear((node), (unvisited)), \ + (node) = nearest_node_nodemask((__start), &(unvisited))) + /** * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance * from a given node. -- cgit v1.2.3