diff options
Diffstat (limited to 'tools/sched_ext')
-rw-r--r-- | tools/sched_ext/include/scx/common.bpf.h | 276 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/common.h | 7 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/compat.bpf.h | 100 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/compat.h | 17 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/enum_defs.autogen.h | 123 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/enums.autogen.bpf.h | 129 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/enums.autogen.h | 49 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/enums.bpf.h | 12 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/enums.h | 28 | ||||
-rw-r--r-- | tools/sched_ext/include/scx/user_exit_info.h | 9 | ||||
-rw-r--r-- | tools/sched_ext/scx_central.bpf.c | 13 | ||||
-rw-r--r-- | tools/sched_ext/scx_central.c | 16 | ||||
-rw-r--r-- | tools/sched_ext/scx_flatcg.bpf.c | 27 | ||||
-rw-r--r-- | tools/sched_ext/scx_flatcg.c | 1 | ||||
-rw-r--r-- | tools/sched_ext/scx_qmap.bpf.c | 25 | ||||
-rw-r--r-- | tools/sched_ext/scx_qmap.c | 2 | ||||
-rw-r--r-- | tools/sched_ext/scx_simple.bpf.c | 9 |
17 files changed, 787 insertions, 56 deletions
diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index 9fa64b053ba9..8787048c6762 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -7,9 +7,16 @@ #ifndef __SCX_COMMON_BPF_H #define __SCX_COMMON_BPF_H +/* + * The generated kfunc prototypes in vmlinux.h are missing address space + * attributes which cause build failures. For now, suppress the generated + * prototypes. See https://github.com/sched-ext/scx/issues/1111. + */ +#define BPF_NO_KFUNC_PROTOTYPES + #ifdef LSP #define __bpf__ -#include "../vmlinux/vmlinux.h" +#include "../vmlinux.h" #else #include "vmlinux.h" #endif @@ -18,12 +25,17 @@ #include <bpf/bpf_tracing.h> #include <asm-generic/errno.h> #include "user_exit_info.h" +#include "enum_defs.autogen.h" #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_EXITING 0x00000004 #define CLOCK_MONOTONIC 1 +extern int LINUX_KERNEL_VERSION __kconfig; +extern const char CONFIG_CC_VERSION_TEXT[64] __kconfig __weak; +extern const char CONFIG_LOCALVERSION[64] __kconfig __weak; + /* * Earlier versions of clang/pahole lost upper 32bits in 64bit enums which can * lead to really confusing misbehaviors. Let's trigger a build failure. @@ -58,20 +70,28 @@ void scx_bpf_dump_bstr(char *fmt, unsigned long long *data, u32 data_len) __ksym u32 scx_bpf_cpuperf_cap(s32 cpu) __ksym __weak; u32 scx_bpf_cpuperf_cur(s32 cpu) __ksym __weak; void scx_bpf_cpuperf_set(s32 cpu, u32 perf) __ksym __weak; +u32 scx_bpf_nr_node_ids(void) __ksym __weak; u32 scx_bpf_nr_cpu_ids(void) __ksym __weak; +int scx_bpf_cpu_node(s32 cpu) __ksym __weak; const struct cpumask *scx_bpf_get_possible_cpumask(void) __ksym __weak; const struct cpumask *scx_bpf_get_online_cpumask(void) __ksym __weak; void scx_bpf_put_cpumask(const struct cpumask *cpumask) __ksym __weak; +const struct cpumask *scx_bpf_get_idle_cpumask_node(int node) __ksym __weak; const struct cpumask *scx_bpf_get_idle_cpumask(void) __ksym; +const struct cpumask *scx_bpf_get_idle_smtmask_node(int node) __ksym __weak; const struct cpumask *scx_bpf_get_idle_smtmask(void) __ksym; void scx_bpf_put_idle_cpumask(const struct cpumask *cpumask) __ksym; bool scx_bpf_test_and_clear_cpu_idle(s32 cpu) __ksym; +s32 scx_bpf_pick_idle_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak; s32 scx_bpf_pick_idle_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; +s32 scx_bpf_pick_any_cpu_node(const cpumask_t *cpus_allowed, int node, u64 flags) __ksym __weak; s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; +u64 scx_bpf_now(void) __ksym __weak; +void scx_bpf_events(struct scx_event_stats *events, size_t events__sz) __ksym __weak; /* * Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from @@ -79,6 +99,9 @@ struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; */ #define BPF_FOR_EACH_ITER (&___it) +#define scx_read_event(e, name) \ + (bpf_core_field_exists((e)->name) ? (e)->name : 0) + static inline __attribute__((format(printf, 1, 2))) void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} @@ -98,7 +121,7 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} _Pragma("GCC diagnostic push") \ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ + _Pragma("GCC diagnostic pop") /* * scx_bpf_exit() wraps the scx_bpf_exit_bstr() kfunc with variadic arguments @@ -136,6 +159,20 @@ void ___scx_bpf_bstr_format_checker(const char *fmt, ...) {} ___scx_bpf_bstr_format_checker(fmt, ##args); \ }) +/* + * scx_bpf_dump_header() is a wrapper around scx_bpf_dump that adds a header + * of system information for debugging. + */ +#define scx_bpf_dump_header() \ +({ \ + scx_bpf_dump("kernel: %d.%d.%d %s\ncc: %s\n", \ + LINUX_KERNEL_VERSION >> 16, \ + LINUX_KERNEL_VERSION >> 8 & 0xFF, \ + LINUX_KERNEL_VERSION & 0xFF, \ + CONFIG_LOCALVERSION, \ + CONFIG_CC_VERSION_TEXT); \ +}) + #define BPF_STRUCT_OPS(name, args...) \ SEC("struct_ops/"#name) \ BPF_PROG(name, ##args) @@ -325,6 +362,66 @@ u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; +int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words) __ksym; +int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym; +void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym; + +#define def_iter_struct(name) \ +struct bpf_iter_##name { \ + struct bpf_iter_bits it; \ + const struct cpumask *bitmap; \ +}; + +#define def_iter_new(name) \ +static inline int bpf_iter_##name##_new( \ + struct bpf_iter_##name *it, const u64 *unsafe_ptr__ign, u32 nr_words) \ +{ \ + it->bitmap = scx_bpf_get_##name##_cpumask(); \ + return bpf_iter_bits_new(&it->it, (const u64 *)it->bitmap, \ + sizeof(struct cpumask) / 8); \ +} + +#define def_iter_next(name) \ +static inline int *bpf_iter_##name##_next(struct bpf_iter_##name *it) { \ + return bpf_iter_bits_next(&it->it); \ +} + +#define def_iter_destroy(name) \ +static inline void bpf_iter_##name##_destroy(struct bpf_iter_##name *it) { \ + scx_bpf_put_cpumask(it->bitmap); \ + bpf_iter_bits_destroy(&it->it); \ +} +#define def_for_each_cpu(cpu, name) for_each_##name##_cpu(cpu) + +/// Provides iterator for possible and online cpus. +/// +/// # Example +/// +/// ``` +/// static inline void example_use() { +/// int *cpu; +/// +/// for_each_possible_cpu(cpu){ +/// bpf_printk("CPU %d is possible", *cpu); +/// } +/// +/// for_each_online_cpu(cpu){ +/// bpf_printk("CPU %d is online", *cpu); +/// } +/// } +/// ``` +def_iter_struct(possible); +def_iter_new(possible); +def_iter_next(possible); +def_iter_destroy(possible); +#define for_each_possible_cpu(cpu) bpf_for_each(possible, cpu, NULL, 0) + +def_iter_struct(online); +def_iter_new(online); +def_iter_next(online); +def_iter_destroy(online); +#define for_each_online_cpu(cpu) bpf_for_each(online, cpu, NULL, 0) + /* * Access a cpumask in read-only mode (typically to check bits). */ @@ -333,10 +430,115 @@ static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) return (const struct cpumask *)mask; } +/* + * Return true if task @p cannot migrate to a different CPU, false + * otherwise. + */ +static inline bool is_migration_disabled(const struct task_struct *p) +{ + if (bpf_core_field_exists(p->migration_disabled)) + return p->migration_disabled; + return false; +} + /* rcu */ void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; +/* + * Time helpers, most of which are from jiffies.h. + */ + +/** + * time_delta - Calculate the delta between new and old time stamp + * @after: first comparable as u64 + * @before: second comparable as u64 + * + * Return: the time difference, which is >= 0 + */ +static inline s64 time_delta(u64 after, u64 before) +{ + return (s64)(after - before) > 0 ? (s64)(after - before) : 0; +} + +/** + * time_after - returns true if the time a is after time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Do this with "<0" and ">=0" to only test the sign of the result. A + * good compiler would generate better code (and a really good compiler + * wouldn't care). Gcc is currently neither. + * + * Return: %true is time a is after time b, otherwise %false. + */ +static inline bool time_after(u64 a, u64 b) +{ + return (s64)(b - a) < 0; +} + +/** + * time_before - returns true if the time a is before time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Return: %true is time a is before time b, otherwise %false. + */ +static inline bool time_before(u64 a, u64 b) +{ + return time_after(b, a); +} + +/** + * time_after_eq - returns true if the time a is after or the same as time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Return: %true is time a is after or the same as time b, otherwise %false. + */ +static inline bool time_after_eq(u64 a, u64 b) +{ + return (s64)(a - b) >= 0; +} + +/** + * time_before_eq - returns true if the time a is before or the same as time b. + * @a: first comparable as u64 + * @b: second comparable as u64 + * + * Return: %true is time a is before or the same as time b, otherwise %false. + */ +static inline bool time_before_eq(u64 a, u64 b) +{ + return time_after_eq(b, a); +} + +/** + * time_in_range - Calculate whether a is in the range of [b, c]. + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c], otherwise %false. + */ +static inline bool time_in_range(u64 a, u64 b, u64 c) +{ + return time_after_eq(a, b) && time_before_eq(a, c); +} + +/** + * time_in_range_open - Calculate whether a is in the range of [b, c). + * @a: time to test + * @b: beginning of the range + * @c: end of the range + * + * Return: %true is time a is in the range [b, c), otherwise %false. + */ +static inline bool time_in_range_open(u64 a, u64 b, u64 c) +{ + return time_after_eq(a, b) && time_before(a, c); +} + /* * Other helpers @@ -384,20 +586,48 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s } } -#define READ_ONCE(x) \ -({ \ - union { typeof(x) __val; char __c[1]; } __u = \ - { .__c = { 0 } }; \ - __read_once_size(&(x), __u.__c, sizeof(x)); \ - __u.__val; \ +/* + * __unqual_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged, + * + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' + * is not type-compatible with 'signed char', and we define a separate case. + * + * This is copied verbatim from kernel's include/linux/compiler_types.h, but + * with default expression (for pointers) changed from (x) to (typeof(x)0). + * + * This is because LLVM has a bug where for lvalue (x), it does not get rid of + * an extra address_space qualifier, but does in case of rvalue (typeof(x)0). + * Hence, for pointers, we need to create an rvalue expression to get the + * desired type. See https://github.com/llvm/llvm-project/issues/53400. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type : (unsigned type)0, signed type : (signed type)0 + +#define __unqual_typeof(x) \ + typeof(_Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (typeof(x))0)) + +#define READ_ONCE(x) \ +({ \ + union { __unqual_typeof(x) __val; char __c[1]; } __u = \ + { .__c = { 0 } }; \ + __read_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \ + __u.__val; \ }) -#define WRITE_ONCE(x, val) \ -({ \ - union { typeof(x) __val; char __c[1]; } __u = \ - { .__val = (val) }; \ - __write_once_size(&(x), __u.__c, sizeof(x)); \ - __u.__val; \ +#define WRITE_ONCE(x, val) \ +({ \ + union { __unqual_typeof(x) __val; char __c[1]; } __u = \ + { .__val = (val) }; \ + __write_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \ + __u.__val; \ }) /* @@ -430,6 +660,24 @@ static inline u32 log2_u64(u64 v) return log2_u32(v) + 1; } +/* + * Return a value proportionally scaled to the task's weight. + */ +static inline u64 scale_by_task_weight(const struct task_struct *p, u64 value) +{ + return (value * p->scx.weight) / 100; +} + +/* + * Return a value inversely proportional to the task's weight. + */ +static inline u64 scale_by_task_weight_inverse(const struct task_struct *p, u64 value) +{ + return value * 100 / p->scx.weight; +} + + #include "compat.bpf.h" +#include "enums.bpf.h" #endif /* __SCX_COMMON_BPF_H */ diff --git a/tools/sched_ext/include/scx/common.h b/tools/sched_ext/include/scx/common.h index 5b0f90152152..1dc76bd84296 100644 --- a/tools/sched_ext/include/scx/common.h +++ b/tools/sched_ext/include/scx/common.h @@ -16,6 +16,7 @@ #include <stdlib.h> #include <stdint.h> #include <errno.h> +#include "enum_defs.autogen.h" typedef uint8_t u8; typedef uint16_t u16; @@ -71,5 +72,11 @@ typedef int64_t s64; #include "user_exit_info.h" #include "compat.h" +#include "enums.h" + +/* not available when building kernel tools/sched_ext */ +#if __has_include(<lib/sdt_task.h>) +#include <lib/sdt_task.h> +#endif #endif /* __SCHED_EXT_COMMON_H */ diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index d56520100a26..9252e1a00556 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -125,6 +125,106 @@ bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, false; \ }) +/** + * __COMPAT_is_enq_cpu_selected - Test if SCX_ENQ_CPU_SELECTED is on + * in a compatible way. We will preserve this __COMPAT helper until v6.16. + * + * @enq_flags: enqueue flags from ops.enqueue() + * + * Return: True if SCX_ENQ_CPU_SELECTED is turned on in @enq_flags + */ +static inline bool __COMPAT_is_enq_cpu_selected(u64 enq_flags) +{ +#ifdef HAVE_SCX_ENQ_CPU_SELECTED + /* + * This is the case that a BPF code compiled against vmlinux.h + * where the enum SCX_ENQ_CPU_SELECTED exists. + */ + + /* + * We should temporarily suspend the macro expansion of + * 'SCX_ENQ_CPU_SELECTED'. This avoids 'SCX_ENQ_CPU_SELECTED' being + * rewritten to '__SCX_ENQ_CPU_SELECTED' when 'SCX_ENQ_CPU_SELECTED' + * is defined in 'scripts/gen_enums.py'. + */ +#pragma push_macro("SCX_ENQ_CPU_SELECTED") +#undef SCX_ENQ_CPU_SELECTED + u64 flag; + + /* + * When the kernel did not have SCX_ENQ_CPU_SELECTED, + * select_task_rq_scx() has never been skipped. Thus, this case + * should be considered that the CPU has already been selected. + */ + if (!bpf_core_enum_value_exists(enum scx_enq_flags, + SCX_ENQ_CPU_SELECTED)) + return true; + + flag = bpf_core_enum_value(enum scx_enq_flags, SCX_ENQ_CPU_SELECTED); + return enq_flags & flag; + + /* + * Once done, resume the macro expansion of 'SCX_ENQ_CPU_SELECTED'. + */ +#pragma pop_macro("SCX_ENQ_CPU_SELECTED") +#else + /* + * This is the case that a BPF code compiled against vmlinux.h + * where the enum SCX_ENQ_CPU_SELECTED does NOT exist. + */ + return true; +#endif /* HAVE_SCX_ENQ_CPU_SELECTED */ +} + + +#define scx_bpf_now() \ + (bpf_ksym_exists(scx_bpf_now) ? \ + scx_bpf_now() : \ + bpf_ktime_get_ns()) + +/* + * v6.15: Introduce event counters. + * + * Preserve the following macro until v6.17. + */ +#define __COMPAT_scx_bpf_events(events, size) \ + (bpf_ksym_exists(scx_bpf_events) ? \ + scx_bpf_events(events, size) : ({})) + +/* + * v6.15: Introduce NUMA-aware kfuncs to operate with per-node idle + * cpumasks. + * + * Preserve the following __COMPAT_scx_*_node macros until v6.17. + */ +#define __COMPAT_scx_bpf_nr_node_ids() \ + (bpf_ksym_exists(scx_bpf_nr_node_ids) ? \ + scx_bpf_nr_node_ids() : 1U) + +#define __COMPAT_scx_bpf_cpu_node(cpu) \ + (bpf_ksym_exists(scx_bpf_cpu_node) ? \ + scx_bpf_cpu_node(cpu) : 0) + +#define __COMPAT_scx_bpf_get_idle_cpumask_node(node) \ + (bpf_ksym_exists(scx_bpf_get_idle_cpumask_node) ? \ + scx_bpf_get_idle_cpumask_node(node) : \ + scx_bpf_get_idle_cpumask()) \ + +#define __COMPAT_scx_bpf_get_idle_smtmask_node(node) \ + (bpf_ksym_exists(scx_bpf_get_idle_smtmask_node) ? \ + scx_bpf_get_idle_smtmask_node(node) : \ + scx_bpf_get_idle_smtmask()) + +#define __COMPAT_scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) \ + (bpf_ksym_exists(scx_bpf_pick_idle_cpu_node) ? \ + scx_bpf_pick_idle_cpu_node(cpus_allowed, node, flags) : \ + scx_bpf_pick_idle_cpu(cpus_allowed, flags)) + +#define __COMPAT_scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) \ + (bpf_ksym_exists(scx_bpf_pick_any_cpu_node) ? \ + scx_bpf_pick_any_cpu_node(cpus_allowed, node, flags) : \ + scx_bpf_pick_any_cpu(cpus_allowed, flags)) + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). diff --git a/tools/sched_ext/include/scx/compat.h b/tools/sched_ext/include/scx/compat.h index cc56ff9aa252..35c67c5174ac 100644 --- a/tools/sched_ext/include/scx/compat.h +++ b/tools/sched_ext/include/scx/compat.h @@ -106,8 +106,20 @@ static inline bool __COMPAT_struct_has_field(const char *type, const char *field return false; } -#define SCX_OPS_SWITCH_PARTIAL \ - __COMPAT_ENUM_OR_ZERO("scx_ops_flags", "SCX_OPS_SWITCH_PARTIAL") +#define SCX_OPS_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_ops_flags", #name) + +#define SCX_OPS_KEEP_BUILTIN_IDLE SCX_OPS_FLAG(SCX_OPS_KEEP_BUILTIN_IDLE) +#define SCX_OPS_ENQ_LAST SCX_OPS_FLAG(SCX_OPS_ENQ_LAST) +#define SCX_OPS_ENQ_EXITING SCX_OPS_FLAG(SCX_OPS_ENQ_EXITING) +#define SCX_OPS_SWITCH_PARTIAL SCX_OPS_FLAG(SCX_OPS_SWITCH_PARTIAL) +#define SCX_OPS_ENQ_MIGRATION_DISABLED SCX_OPS_FLAG(SCX_OPS_ENQ_MIGRATION_DISABLED) +#define SCX_OPS_ALLOW_QUEUED_WAKEUP SCX_OPS_FLAG(SCX_OPS_ALLOW_QUEUED_WAKEUP) +#define SCX_OPS_BUILTIN_IDLE_PER_NODE SCX_OPS_FLAG(SCX_OPS_BUILTIN_IDLE_PER_NODE) + +#define SCX_PICK_IDLE_FLAG(name) __COMPAT_ENUM_OR_ZERO("scx_pick_idle_cpu_flags", #name) + +#define SCX_PICK_IDLE_CORE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_CORE) +#define SCX_PICK_IDLE_IN_NODE SCX_PICK_IDLE_FLAG(SCX_PICK_IDLE_IN_NODE) static inline long scx_hotplug_seq(void) { @@ -149,6 +161,7 @@ static inline long scx_hotplug_seq(void) __skel = __scx_name##__open(); \ SCX_BUG_ON(!__skel, "Could not open " #__scx_name); \ __skel->struct_ops.__ops_name->hotplug_seq = scx_hotplug_seq(); \ + SCX_ENUM_INIT(__skel); \ __skel; \ }) diff --git a/tools/sched_ext/include/scx/enum_defs.autogen.h b/tools/sched_ext/include/scx/enum_defs.autogen.h new file mode 100644 index 000000000000..c2c33df9292c --- /dev/null +++ b/tools/sched_ext/include/scx/enum_defs.autogen.h @@ -0,0 +1,123 @@ +/* + * WARNING: This file is autogenerated from gen_enum_defs.py [1]. + * + * [1] https://github.com/sched-ext/scx/blob/main/scripts/gen_enum_defs.py + */ + +#ifndef __ENUM_DEFS_AUTOGEN_H__ +#define __ENUM_DEFS_AUTOGEN_H__ + +#define HAVE_SCX_DSP_DFL_MAX_BATCH +#define HAVE_SCX_DSP_MAX_LOOPS +#define HAVE_SCX_WATCHDOG_MAX_TIMEOUT +#define HAVE_SCX_EXIT_BT_LEN +#define HAVE_SCX_EXIT_MSG_LEN +#define HAVE_SCX_EXIT_DUMP_DFL_LEN +#define HAVE_SCX_CPUPERF_ONE +#define HAVE_SCX_OPS_TASK_ITER_BATCH +#define HAVE_SCX_CPU_PREEMPT_RT +#define HAVE_SCX_CPU_PREEMPT_DL +#define HAVE_SCX_CPU_PREEMPT_STOP +#define HAVE_SCX_CPU_PREEMPT_UNKNOWN +#define HAVE_SCX_DEQ_SLEEP +#define HAVE_SCX_DEQ_CORE_SCHED_EXEC +#define HAVE_SCX_DSQ_FLAG_BUILTIN +#define HAVE_SCX_DSQ_FLAG_LOCAL_ON +#define HAVE_SCX_DSQ_INVALID +#define HAVE_SCX_DSQ_GLOBAL +#define HAVE_SCX_DSQ_LOCAL +#define HAVE_SCX_DSQ_LOCAL_ON +#define HAVE_SCX_DSQ_LOCAL_CPU_MASK +#define HAVE_SCX_DSQ_ITER_REV +#define HAVE___SCX_DSQ_ITER_HAS_SLICE +#define HAVE___SCX_DSQ_ITER_HAS_VTIME +#define HAVE___SCX_DSQ_ITER_USER_FLAGS +#define HAVE___SCX_DSQ_ITER_ALL_FLAGS +#define HAVE_SCX_DSQ_LNODE_ITER_CURSOR +#define HAVE___SCX_DSQ_LNODE_PRIV_SHIFT +#define HAVE_SCX_ENQ_WAKEUP +#define HAVE_SCX_ENQ_HEAD +#define HAVE_SCX_ENQ_CPU_SELECTED +#define HAVE_SCX_ENQ_PREEMPT +#define HAVE_SCX_ENQ_REENQ +#define HAVE_SCX_ENQ_LAST +#define HAVE___SCX_ENQ_INTERNAL_MASK +#define HAVE_SCX_ENQ_CLEAR_OPSS +#define HAVE_SCX_ENQ_DSQ_PRIQ +#define HAVE_SCX_TASK_DSQ_ON_PRIQ +#define HAVE_SCX_TASK_QUEUED +#define HAVE_SCX_TASK_RESET_RUNNABLE_AT +#define HAVE_SCX_TASK_DEQD_FOR_SLEEP +#define HAVE_SCX_TASK_STATE_SHIFT +#define HAVE_SCX_TASK_STATE_BITS +#define HAVE_SCX_TASK_STATE_MASK +#define HAVE_SCX_TASK_CURSOR +#define HAVE_SCX_ECODE_RSN_HOTPLUG +#define HAVE_SCX_ECODE_ACT_RESTART +#define HAVE_SCX_EXIT_NONE +#define HAVE_SCX_EXIT_DONE +#define HAVE_SCX_EXIT_UNREG +#define HAVE_SCX_EXIT_UNREG_BPF +#define HAVE_SCX_EXIT_UNREG_KERN +#define HAVE_SCX_EXIT_SYSRQ +#define HAVE_SCX_EXIT_ERROR +#define HAVE_SCX_EXIT_ERROR_BPF +#define HAVE_SCX_EXIT_ERROR_STALL +#define HAVE_SCX_KF_UNLOCKED +#define HAVE_SCX_KF_CPU_RELEASE +#define HAVE_SCX_KF_DISPATCH +#define HAVE_SCX_KF_ENQUEUE +#define HAVE_SCX_KF_SELECT_CPU +#define HAVE_SCX_KF_REST +#define HAVE___SCX_KF_RQ_LOCKED +#define HAVE___SCX_KF_TERMINAL +#define HAVE_SCX_KICK_IDLE +#define HAVE_SCX_KICK_PREEMPT +#define HAVE_SCX_KICK_WAIT +#define HAVE_SCX_OPI_BEGIN +#define HAVE_SCX_OPI_NORMAL_BEGIN +#define HAVE_SCX_OPI_NORMAL_END +#define HAVE_SCX_OPI_CPU_HOTPLUG_BEGIN +#define HAVE_SCX_OPI_CPU_HOTPLUG_END +#define HAVE_SCX_OPI_END +#define HAVE_SCX_OPS_ENABLING +#define HAVE_SCX_OPS_ENABLED +#define HAVE_SCX_OPS_DISABLING +#define HAVE_SCX_OPS_DISABLED +#define HAVE_SCX_OPS_KEEP_BUILTIN_IDLE +#define HAVE_SCX_OPS_ENQ_LAST +#define HAVE_SCX_OPS_ENQ_EXITING +#define HAVE_SCX_OPS_SWITCH_PARTIAL +#define HAVE_SCX_OPS_ENQ_MIGRATION_DISABLED +#define HAVE_SCX_OPS_ALLOW_QUEUED_WAKEUP +#define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT +#define HAVE_SCX_OPS_ALL_FLAGS +#define HAVE_SCX_OPSS_NONE +#define HAVE_SCX_OPSS_QUEUEING +#define HAVE_SCX_OPSS_QUEUED +#define HAVE_SCX_OPSS_DISPATCHING +#define HAVE_SCX_OPSS_QSEQ_SHIFT +#define HAVE_SCX_PICK_IDLE_CORE +#define HAVE_SCX_OPS_NAME_LEN +#define HAVE_SCX_SLICE_DFL +#define HAVE_SCX_SLICE_INF +#define HAVE_SCX_RQ_ONLINE +#define HAVE_SCX_RQ_CAN_STOP_TICK +#define HAVE_SCX_RQ_BAL_PENDING +#define HAVE_SCX_RQ_BAL_KEEP +#define HAVE_SCX_RQ_BYPASSING +#define HAVE_SCX_RQ_CLK_VALID +#define HAVE_SCX_RQ_IN_WAKEUP +#define HAVE_SCX_RQ_IN_BALANCE +#define HAVE_SCX_TASK_NONE +#define HAVE_SCX_TASK_INIT +#define HAVE_SCX_TASK_READY +#define HAVE_SCX_TASK_ENABLED +#define HAVE_SCX_TASK_NR_STATES +#define HAVE_SCX_TG_ONLINE +#define HAVE_SCX_TG_INITED +#define HAVE_SCX_WAKE_FORK +#define HAVE_SCX_WAKE_TTWU +#define HAVE_SCX_WAKE_SYNC + +#endif /* __ENUM_DEFS_AUTOGEN_H__ */ diff --git a/tools/sched_ext/include/scx/enums.autogen.bpf.h b/tools/sched_ext/include/scx/enums.autogen.bpf.h new file mode 100644 index 000000000000..2f8002bcc19a --- /dev/null +++ b/tools/sched_ext/include/scx/enums.autogen.bpf.h @@ -0,0 +1,129 @@ +/* + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would + * like to access an enum that is currently missing, add it to the script + * and run it from the root directory to update this file. + */ + +const volatile u64 __SCX_OPS_NAME_LEN __weak; +#define SCX_OPS_NAME_LEN __SCX_OPS_NAME_LEN + +const volatile u64 __SCX_SLICE_DFL __weak; +#define SCX_SLICE_DFL __SCX_SLICE_DFL + +const volatile u64 __SCX_SLICE_INF __weak; +#define SCX_SLICE_INF __SCX_SLICE_INF + +const volatile u64 __SCX_RQ_ONLINE __weak; +#define SCX_RQ_ONLINE __SCX_RQ_ONLINE + +const volatile u64 __SCX_RQ_CAN_STOP_TICK __weak; +#define SCX_RQ_CAN_STOP_TICK __SCX_RQ_CAN_STOP_TICK + +const volatile u64 __SCX_RQ_BAL_PENDING __weak; +#define SCX_RQ_BAL_PENDING __SCX_RQ_BAL_PENDING + +const volatile u64 __SCX_RQ_BAL_KEEP __weak; +#define SCX_RQ_BAL_KEEP __SCX_RQ_BAL_KEEP + +const volatile u64 __SCX_RQ_BYPASSING __weak; +#define SCX_RQ_BYPASSING __SCX_RQ_BYPASSING + +const volatile u64 __SCX_RQ_CLK_VALID __weak; +#define SCX_RQ_CLK_VALID __SCX_RQ_CLK_VALID + +const volatile u64 __SCX_RQ_IN_WAKEUP __weak; +#define SCX_RQ_IN_WAKEUP __SCX_RQ_IN_WAKEUP + +const volatile u64 __SCX_RQ_IN_BALANCE __weak; +#define SCX_RQ_IN_BALANCE __SCX_RQ_IN_BALANCE + +const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak; +#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN + +const volatile u64 __SCX_DSQ_FLAG_LOCAL_ON __weak; +#define SCX_DSQ_FLAG_LOCAL_ON __SCX_DSQ_FLAG_LOCAL_ON + +const volatile u64 __SCX_DSQ_INVALID __weak; +#define SCX_DSQ_INVALID __SCX_DSQ_INVALID + +const volatile u64 __SCX_DSQ_GLOBAL __weak; +#define SCX_DSQ_GLOBAL __SCX_DSQ_GLOBAL + +const volatile u64 __SCX_DSQ_LOCAL __weak; +#define SCX_DSQ_LOCAL __SCX_DSQ_LOCAL + +const volatile u64 __SCX_DSQ_LOCAL_ON __weak; +#define SCX_DSQ_LOCAL_ON __SCX_DSQ_LOCAL_ON + +const volatile u64 __SCX_DSQ_LOCAL_CPU_MASK __weak; +#define SCX_DSQ_LOCAL_CPU_MASK __SCX_DSQ_LOCAL_CPU_MASK + +const volatile u64 __SCX_TASK_QUEUED __weak; +#define SCX_TASK_QUEUED __SCX_TASK_QUEUED + +const volatile u64 __SCX_TASK_RESET_RUNNABLE_AT __weak; +#define SCX_TASK_RESET_RUNNABLE_AT __SCX_TASK_RESET_RUNNABLE_AT + +const volatile u64 __SCX_TASK_DEQD_FOR_SLEEP __weak; +#define SCX_TASK_DEQD_FOR_SLEEP __SCX_TASK_DEQD_FOR_SLEEP + +const volatile u64 __SCX_TASK_STATE_SHIFT __weak; +#define SCX_TASK_STATE_SHIFT __SCX_TASK_STATE_SHIFT + +const volatile u64 __SCX_TASK_STATE_BITS __weak; +#define SCX_TASK_STATE_BITS __SCX_TASK_STATE_BITS + +const volatile u64 __SCX_TASK_STATE_MASK __weak; +#define SCX_TASK_STATE_MASK __SCX_TASK_STATE_MASK + +const volatile u64 __SCX_TASK_CURSOR __weak; +#define SCX_TASK_CURSOR __SCX_TASK_CURSOR + +const volatile u64 __SCX_TASK_NONE __weak; +#define SCX_TASK_NONE __SCX_TASK_NONE + +const volatile u64 __SCX_TASK_INIT __weak; +#define SCX_TASK_INIT __SCX_TASK_INIT + +const volatile u64 __SCX_TASK_READY __weak; +#define SCX_TASK_READY __SCX_TASK_READY + +const volatile u64 __SCX_TASK_ENABLED __weak; +#define SCX_TASK_ENABLED __SCX_TASK_ENABLED + +const volatile u64 __SCX_TASK_NR_STATES __weak; +#define SCX_TASK_NR_STATES __SCX_TASK_NR_STATES + +const volatile u64 __SCX_TASK_DSQ_ON_PRIQ __weak; +#define SCX_TASK_DSQ_ON_PRIQ __SCX_TASK_DSQ_ON_PRIQ + +const volatile u64 __SCX_KICK_IDLE __weak; +#define SCX_KICK_IDLE __SCX_KICK_IDLE + +const volatile u64 __SCX_KICK_PREEMPT __weak; +#define SCX_KICK_PREEMPT __SCX_KICK_PREEMPT + +const volatile u64 __SCX_KICK_WAIT __weak; +#define SCX_KICK_WAIT __SCX_KICK_WAIT + +const volatile u64 __SCX_ENQ_WAKEUP __weak; +#define SCX_ENQ_WAKEUP __SCX_ENQ_WAKEUP + +const volatile u64 __SCX_ENQ_HEAD __weak; +#define SCX_ENQ_HEAD __SCX_ENQ_HEAD + +const volatile u64 __SCX_ENQ_PREEMPT __weak; +#define SCX_ENQ_PREEMPT __SCX_ENQ_PREEMPT + +const volatile u64 __SCX_ENQ_REENQ __weak; +#define SCX_ENQ_REENQ __SCX_ENQ_REENQ + +const volatile u64 __SCX_ENQ_LAST __weak; +#define SCX_ENQ_LAST __SCX_ENQ_LAST + +const volatile u64 __SCX_ENQ_CLEAR_OPSS __weak; +#define SCX_ENQ_CLEAR_OPSS __SCX_ENQ_CLEAR_OPSS + +const volatile u64 __SCX_ENQ_DSQ_PRIQ __weak; +#define SCX_ENQ_DSQ_PRIQ __SCX_ENQ_DSQ_PRIQ + diff --git a/tools/sched_ext/include/scx/enums.autogen.h b/tools/sched_ext/include/scx/enums.autogen.h new file mode 100644 index 000000000000..fedec938584b --- /dev/null +++ b/tools/sched_ext/include/scx/enums.autogen.h @@ -0,0 +1,49 @@ +/* + * WARNING: This file is autogenerated from scripts/gen_enums.py. If you would + * like to access an enum that is currently missing, add it to the script + * and run it from the root directory to update this file. + */ + +#define SCX_ENUM_INIT(skel) do { \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \ + SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_ONLINE); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CAN_STOP_TICK); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_PENDING); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_KEEP); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BYPASSING); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CLK_VALID); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_WAKEUP); \ + SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_BALANCE); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_GLOBAL); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_ON); \ + SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_LOCAL_CPU_MASK); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_QUEUED); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_RESET_RUNNABLE_AT); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_DEQD_FOR_SLEEP); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_SHIFT); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_BITS); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_STATE_MASK); \ + SCX_ENUM_SET(skel, scx_ent_flags, SCX_TASK_CURSOR); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NONE); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_INIT); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_READY); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_ENABLED); \ + SCX_ENUM_SET(skel, scx_task_state, SCX_TASK_NR_STATES); \ + SCX_ENUM_SET(skel, scx_ent_dsq_flags, SCX_TASK_DSQ_ON_PRIQ); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_IDLE); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_PREEMPT); \ + SCX_ENUM_SET(skel, scx_kick_flags, SCX_KICK_WAIT); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_WAKEUP); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_HEAD); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_PREEMPT); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_REENQ); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_LAST); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_CLEAR_OPSS); \ + SCX_ENUM_SET(skel, scx_enq_flags, SCX_ENQ_DSQ_PRIQ); \ +} while (0) diff --git a/tools/sched_ext/include/scx/enums.bpf.h b/tools/sched_ext/include/scx/enums.bpf.h new file mode 100644 index 000000000000..af704c5d6334 --- /dev/null +++ b/tools/sched_ext/include/scx/enums.bpf.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Convenience macros for getting/setting struct scx_enums instances. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + */ +#ifndef __SCX_ENUMS_BPF_H +#define __SCX_ENUMS_BPF_H + +#include "enums.autogen.bpf.h" + +#endif /* __SCX_ENUMS_BPF_H */ diff --git a/tools/sched_ext/include/scx/enums.h b/tools/sched_ext/include/scx/enums.h new file mode 100644 index 000000000000..8e7c91575f0b --- /dev/null +++ b/tools/sched_ext/include/scx/enums.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Define struct scx_enums that stores the load-time values of enums + * used by the BPF program. + * + * Copyright (c) 2024 Meta Platforms, Inc. and affiliates. + */ + +#ifndef __SCX_ENUMS_H +#define __SCX_ENUMS_H + +static inline void __ENUM_set(u64 *val, char *type, char *name) +{ + bool res; + + res = __COMPAT_read_enum(type, name, val); + if (!res) + *val = 0; +} + +#define SCX_ENUM_SET(skel, type, name) do { \ + __ENUM_set(&skel->rodata->__##name, #type, #name); \ + } while (0) + + +#include "enums.autogen.h" + +#endif /* __SCX_ENUMS_H */ diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h index 8ce2734402e1..66f856640ee7 100644 --- a/tools/sched_ext/include/scx/user_exit_info.h +++ b/tools/sched_ext/include/scx/user_exit_info.h @@ -10,6 +10,11 @@ #ifndef __USER_EXIT_INFO_H #define __USER_EXIT_INFO_H +#ifdef LSP +#define __bpf__ +#include "../vmlinux.h" +#endif + enum uei_sizes { UEI_REASON_LEN = 128, UEI_MSG_LEN = 1024, @@ -25,9 +30,7 @@ struct user_exit_info { #ifdef __bpf__ -#ifdef LSP -#include "../vmlinux/vmlinux.h" -#else +#ifndef LSP #include "vmlinux.h" #endif #include <bpf/bpf_core_read.h> diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c index e6fad6211f6c..50bc1737c167 100644 --- a/tools/sched_ext/scx_central.bpf.c +++ b/tools/sched_ext/scx_central.bpf.c @@ -57,7 +57,7 @@ enum { const volatile s32 central_cpu; const volatile u32 nr_cpu_ids = 1; /* !0 for veristat, set during init */ -const volatile u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns; bool timer_pinned = true; u64 nr_total, nr_locals, nr_queued, nr_lost_pids; @@ -87,11 +87,6 @@ struct { __type(value, struct central_timer); } central_timer SEC(".maps"); -static bool vtime_before(u64 a, u64 b) -{ - return (s64)(a - b) < 0; -} - s32 BPF_STRUCT_OPS(central_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -245,7 +240,7 @@ void BPF_STRUCT_OPS(central_running, struct task_struct *p) s32 cpu = scx_bpf_task_cpu(p); u64 *started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at) - *started_at = bpf_ktime_get_ns() ?: 1; /* 0 indicates idle */ + *started_at = scx_bpf_now() ?: 1; /* 0 indicates idle */ } void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable) @@ -258,7 +253,7 @@ void BPF_STRUCT_OPS(central_stopping, struct task_struct *p, bool runnable) static int central_timerfn(void *map, int *key, struct bpf_timer *timer) { - u64 now = bpf_ktime_get_ns(); + u64 now = scx_bpf_now(); u64 nr_to_kick = nr_queued; s32 i, curr_cpu; @@ -279,7 +274,7 @@ static int central_timerfn(void *map, int *key, struct bpf_timer *timer) /* kick iff the current one exhausted its slice */ started_at = ARRAY_ELEM_PTR(cpu_started_at, cpu, nr_cpu_ids); if (started_at && *started_at && - vtime_before(now, *started_at + slice_ns)) + time_before(now, *started_at + slice_ns)) continue; /* and there's something pending */ diff --git a/tools/sched_ext/scx_central.c b/tools/sched_ext/scx_central.c index e938156ed0a0..6ba6e610eeaa 100644 --- a/tools/sched_ext/scx_central.c +++ b/tools/sched_ext/scx_central.c @@ -10,6 +10,7 @@ #include <unistd.h> #include <inttypes.h> #include <signal.h> +#include <assert.h> #include <libgen.h> #include <bpf/bpf.h> #include <scx/common.h> @@ -58,15 +59,24 @@ restart: skel->rodata->central_cpu = 0; skel->rodata->nr_cpu_ids = libbpf_num_possible_cpus(); + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); + + assert(skel->rodata->nr_cpu_ids <= INT32_MAX); while ((opt = getopt(argc, argv, "s:c:pvh")) != -1) { switch (opt) { case 's': skel->rodata->slice_ns = strtoull(optarg, NULL, 0) * 1000; break; - case 'c': - skel->rodata->central_cpu = strtoul(optarg, NULL, 0); + case 'c': { + u32 central_cpu = strtoul(optarg, NULL, 0); + if (central_cpu >= skel->rodata->nr_cpu_ids) { + fprintf(stderr, "invalid central CPU id value, %u given (%u max)\n", central_cpu, skel->rodata->nr_cpu_ids); + return -1; + } + skel->rodata->central_cpu = (s32)central_cpu; break; + } case 'v': verbose = true; break; @@ -95,7 +105,7 @@ restart: */ cpuset = CPU_ALLOC(skel->rodata->nr_cpu_ids); SCX_BUG_ON(!cpuset, "Failed to allocate cpuset"); - CPU_ZERO(cpuset); + CPU_ZERO_S(CPU_ALLOC_SIZE(skel->rodata->nr_cpu_ids), cpuset); CPU_SET(skel->rodata->central_cpu, cpuset); SCX_BUG_ON(sched_setaffinity(0, sizeof(*cpuset), cpuset), "Failed to affinitize to central CPU %d (max %d)", diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 4e3afcd260bf..fdc7170639e6 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -57,7 +57,7 @@ enum { char _license[] SEC("license") = "GPL"; const volatile u32 nr_cpus = 32; /* !0 for veristat, set during init */ -const volatile u64 cgrp_slice_ns = SCX_SLICE_DFL; +const volatile u64 cgrp_slice_ns; const volatile bool fifo_sched; u64 cvtime_now; @@ -137,11 +137,6 @@ static u64 div_round_up(u64 dividend, u64 divisor) return (dividend + divisor - 1) / divisor; } -static bool vtime_before(u64 a, u64 b) -{ - return (s64)(a - b) < 0; -} - static bool cgv_node_less(struct bpf_rb_node *a, const struct bpf_rb_node *b) { struct cgv_node *cgc_a, *cgc_b; @@ -271,7 +266,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc) */ max_budget = (cgrp_slice_ns * nr_cpus * cgc->hweight) / (2 * FCG_HWEIGHT_ONE); - if (vtime_before(cvtime, cvtime_now - max_budget)) + if (time_before(cvtime, cvtime_now - max_budget)) cvtime = cvtime_now - max_budget; cgv_node->cvtime = cvtime; @@ -401,7 +396,7 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) * Limit the amount of budget that an idling task can accumulate * to one slice. */ - if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) + if (time_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) tvtime = cgc->tvtime_now - SCX_SLICE_DFL; scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, @@ -535,7 +530,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p) * from multiple CPUs and thus racy. Any error should be * contained and temporary. Let's just live with it. */ - if (vtime_before(cgc->tvtime_now, p->scx.dsq_vtime)) + if (time_before(cgc->tvtime_now, p->scx.dsq_vtime)) cgc->tvtime_now = p->scx.dsq_vtime; } bpf_cgroup_release(cgrp); @@ -645,7 +640,7 @@ static bool try_pick_next_cgroup(u64 *cgidp) cgv_node = container_of(rb_node, struct cgv_node, rb_node); cgid = cgv_node->cgid; - if (vtime_before(cvtime_now, cgv_node->cvtime)) + if (time_before(cvtime_now, cgv_node->cvtime)) cvtime_now = cgv_node->cvtime; /* @@ -734,7 +729,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) struct fcg_cpu_ctx *cpuc; struct fcg_cgrp_ctx *cgc; struct cgroup *cgrp; - u64 now = bpf_ktime_get_ns(); + u64 now = scx_bpf_now(); bool picked_next = false; cpuc = find_cpu_ctx(); @@ -744,7 +739,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) if (!cpuc->cur_cgid) goto pick_next_cgroup; - if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) { + if (time_before(now, cpuc->cur_at + cgrp_slice_ns)) { if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) { stat_inc(FCG_STAT_CNS_KEEP); return; @@ -920,14 +915,14 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p, struct cgroup *from, struct cgroup *to) { struct fcg_cgrp_ctx *from_cgc, *to_cgc; - s64 vtime_delta; + s64 delta; /* find_cgrp_ctx() triggers scx_ops_error() on lookup failures */ if (!(from_cgc = find_cgrp_ctx(from)) || !(to_cgc = find_cgrp_ctx(to))) return; - vtime_delta = p->scx.dsq_vtime - from_cgc->tvtime_now; - p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta; + delta = time_delta(p->scx.dsq_vtime, from_cgc->tvtime_now); + p->scx.dsq_vtime = to_cgc->tvtime_now + delta; } s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init) @@ -955,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); diff --git a/tools/sched_ext/scx_flatcg.c b/tools/sched_ext/scx_flatcg.c index 5d24ca9c29d9..6dd423eeb4ff 100644 --- a/tools/sched_ext/scx_flatcg.c +++ b/tools/sched_ext/scx_flatcg.c @@ -137,6 +137,7 @@ restart: skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg); skel->rodata->nr_cpus = libbpf_num_possible_cpus(); + skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) { double v; diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index ee264947e0c3..26c40ca4f36c 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -33,7 +33,7 @@ enum consts { char _license[] SEC("license") = "GPL"; -const volatile u64 slice_ns = SCX_SLICE_DFL; +const volatile u64 slice_ns; const volatile u32 stall_user_nth; const volatile u32 stall_kernel_nth; const volatile u32 dsp_inf_loop_after; @@ -231,7 +231,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) } /* if select_cpu() wasn't called, try direct dispatch */ - if (!(enq_flags & SCX_ENQ_CPU_SELECTED) && + if (!__COMPAT_is_enq_cpu_selected(enq_flags) && (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { __sync_fetch_and_add(&nr_ddsp_from_enq, 1); scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); @@ -763,6 +763,8 @@ static void dump_shared_dsq(void) static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) { + struct scx_event_stats events; + bpf_rcu_read_lock(); dispatch_highpri(true); bpf_rcu_read_unlock(); @@ -772,6 +774,25 @@ static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer) if (print_shared_dsq) dump_shared_dsq(); + __COMPAT_scx_bpf_events(&events, sizeof(events)); + + bpf_printk("%35s: %lld", "SCX_EV_SELECT_CPU_FALLBACK", + scx_read_event(&events, SCX_EV_SELECT_CPU_FALLBACK)); + bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE", + scx_read_event(&events, SCX_EV_DISPATCH_LOCAL_DSQ_OFFLINE)); + bpf_printk("%35s: %lld", "SCX_EV_DISPATCH_KEEP_LAST", + scx_read_event(&events, SCX_EV_DISPATCH_KEEP_LAST)); + bpf_printk("%35s: %lld", "SCX_EV_ENQ_SKIP_EXITING", + scx_read_event(&events, SCX_EV_ENQ_SKIP_EXITING)); + bpf_printk("%35s: %lld", "SCX_EV_ENQ_SLICE_DFL", + scx_read_event(&events, SCX_EV_ENQ_SLICE_DFL)); + bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DURATION", + scx_read_event(&events, SCX_EV_BYPASS_DURATION)); + bpf_printk("%35s: %lld", "SCX_EV_BYPASS_DISPATCH", + scx_read_event(&events, SCX_EV_BYPASS_DISPATCH)); + bpf_printk("%35s: %lld", "SCX_EV_BYPASS_ACTIVATE", + scx_read_event(&events, SCX_EV_BYPASS_ACTIVATE)); + bpf_timer_start(timer, ONE_SEC_IN_NS, 0); return 0; } diff --git a/tools/sched_ext/scx_qmap.c b/tools/sched_ext/scx_qmap.c index ac45a02b4055..c4912ab2e76f 100644 --- a/tools/sched_ext/scx_qmap.c +++ b/tools/sched_ext/scx_qmap.c @@ -64,6 +64,8 @@ int main(int argc, char **argv) skel = SCX_OPS_OPEN(qmap_ops, scx_qmap); + skel->rodata->slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL"); + while ((opt = getopt(argc, argv, "s:e:t:T:l:b:PHd:D:Spvh")) != -1) { switch (opt) { case 's': diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c index 31f915b286c6..e6de99dba7db 100644 --- a/tools/sched_ext/scx_simple.bpf.c +++ b/tools/sched_ext/scx_simple.bpf.c @@ -52,11 +52,6 @@ static void stat_inc(u32 idx) (*cnt_p)++; } -static inline bool vtime_before(u64 a, u64 b) -{ - return (s64)(a - b) < 0; -} - s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { bool is_idle = false; @@ -84,7 +79,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) * Limit the amount of budget that an idling task can accumulate * to one slice. */ - if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL)) + if (time_before(vtime, vtime_now - SCX_SLICE_DFL)) vtime = vtime_now - SCX_SLICE_DFL; scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, @@ -108,7 +103,7 @@ void BPF_STRUCT_OPS(simple_running, struct task_struct *p) * thus racy. Any error should be contained and temporary. Let's just * live with it. */ - if (vtime_before(vtime_now, p->scx.dsq_vtime)) + if (time_before(vtime_now, p->scx.dsq_vtime)) vtime_now = p->scx.dsq_vtime; } |